001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search.fields; 029 030import org.opencms.search.CmsSearchManager; 031import org.opencms.util.CmsStringUtil; 032 033import org.apache.lucene.analysis.Analyzer; 034import org.apache.lucene.document.Field; 035import org.apache.lucene.document.FieldType; 036import org.apache.lucene.index.IndexOptions; 037 038/** 039 * An individual field configuration in a Lucene search index.<p> 040 * 041 * @since 7.0.0 042 */ 043public class CmsLuceneField extends CmsSearchField { 044 045 /** Value of m_displayName if field should not be displayed. */ 046 public static final String IGNORE_DISPLAY_NAME = "-"; 047 048 /** Constant for the "compress" index setting. */ 049 public static final String STR_COMPRESS = "compress"; 050 051 /** Constant for the "no" index setting. */ 052 public static final String STR_NO = "no"; 053 054 /** Constant for the "tokenized" index setting. */ 055 public static final String STR_TOKENIZED = "tokenized"; 056 057 /** Constant for the "untokenized" index setting. */ 058 public static final String STR_UN_TOKENIZED = "untokenized"; 059 060 /** Constant for the "yes" index setting. */ 061 public static final String STR_YES = "yes"; 062 063 /** The serial version UID. */ 064 private static final long serialVersionUID = -4946013624087640706L; 065 066 /** The special analyzer to use for this field. */ 067 private transient Analyzer m_analyzer; 068 069 /** Indicates if the content of this field is compressed. */ 070 private boolean m_compressed; 071 072 /** Indicates if this field should be displayed. */ 073 private boolean m_displayed; 074 075 /** The display name of the field. */ 076 private String m_displayName; 077 078 /** The display name set from the configuration. */ 079 private String m_displayNameForConfiguration; 080 081 /** Indicates if the content of this field should be tokenized. */ 082 private boolean m_tokenized; 083 084 /** The type used to convert a field to a Solr field. */ 085 private String m_type; 086 087 /** 088 * Creates a new search field configuration.<p> 089 */ 090 public CmsLuceneField() { 091 092 super(); 093 } 094 095 /** 096 * Creates a new search field configuration.<p> 097 * 098 * The field will be tokenized if it is indexed. 099 * The field will not be in the excerpt. 100 * There is no default value.<p> 101 * 102 * @param name the name of the field, see {@link #setName(String)} 103 * @param displayName the display name of this field, see {@link #setDisplayName(String)} 104 * @param isStored controls if the field is stored and in the excerpt, see {@link #setStored(boolean)} 105 * @param isIndexed controls if the field is indexed and tokenized, see {@link #setIndexed(boolean)} 106 */ 107 public CmsLuceneField(String name, String displayName, boolean isStored, boolean isIndexed) { 108 109 this(name, displayName, isStored, isIndexed, isIndexed, false, null); 110 } 111 112 /** 113 * Creates a new search field configuration.<p> 114 * 115 * @param name the name of the field, see {@link #setName(String)} 116 * @param displayName the display name of this field, see {@link #setDisplayName(String)} 117 * @param isStored controls if the field is stored, see {@link #setStored(boolean)} 118 * @param isCompressed controls if the filed is compressed, see {@link #setCompressed(boolean)} 119 * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)} 120 * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)} 121 * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()} 122 * @param analyzer the analyzer to use, see {@link #setAnalyzer(Analyzer)} 123 * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)} 124 */ 125 public CmsLuceneField( 126 String name, 127 String displayName, 128 boolean isStored, 129 boolean isCompressed, 130 boolean isIndexed, 131 boolean isTokenized, 132 boolean isInExcerpt, 133 Analyzer analyzer, 134 String defaultValue) { 135 136 super(name, defaultValue); 137 setDisplayName(displayName); 138 setStored(isStored); 139 setCompressed(isCompressed); 140 setIndexed(isIndexed); 141 setTokenized(isTokenized); 142 setInExcerpt(isInExcerpt); 143 setAnalyzer(analyzer); 144 } 145 146 /** 147 * Creates a new search field configuration.<p> 148 * 149 * @param name the name of the field, see {@link #setName(String)} 150 * @param displayName the display name of this field, see {@link #setDisplayName(String)} 151 * @param isStored controls if the field is stored, see {@link #setStored(boolean)} 152 * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)} 153 * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)} 154 * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()} 155 * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)} 156 */ 157 public CmsLuceneField( 158 String name, 159 String displayName, 160 boolean isStored, 161 boolean isIndexed, 162 boolean isTokenized, 163 boolean isInExcerpt, 164 String defaultValue) { 165 166 this(name, displayName, isStored, false, isIndexed, isTokenized, isInExcerpt, null, defaultValue); 167 } 168 169 /** 170 * Closes the analyzer.<p> 171 */ 172 public void closeAnalyzer() { 173 174 if (m_analyzer != null) { 175 m_analyzer.close(); 176 } 177 } 178 179 /** 180 * Creates a field from the configuration and the provided content.<p> 181 * 182 * The configured name of the field as provided by {@link #getName()} is used.<p> 183 * 184 * If no valid content is provided (that is the content is either <code>null</code> or 185 * only whitespace), then no field is created and <code>null</code> is returned.<p> 186 * 187 * @param content the content to create the field with 188 * 189 * @return a field created from the configuration and the provided content 190 */ 191 public Field createField(String content) { 192 193 return createField(getName(), content); 194 } 195 196 /** 197 * Creates a field with the given name from the configuration and the provided content.<p> 198 * 199 * If no valid content is provided (that is the content is either <code>null</code> or 200 * only whitespace), then no field is created and <code>null</code> is returned.<p> 201 * 202 * @param name the name of the field to create 203 * @param content the content to create the field with 204 * 205 * @return a field with the given name from the configuration and the provided content 206 */ 207 public Field createField(String name, String content) { 208 209 if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) { 210 content = getDefaultValue(); 211 } 212 if (content != null) { 213 final FieldType ft = new FieldType(); 214 if (isIndexed()) { 215 if (isTokenizedAndIndexed()) { 216 ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 217 ft.setTokenized(true); 218 } else { 219 ft.setIndexOptions(IndexOptions.DOCS); 220 ft.setTokenized(false); 221 } 222 } 223 ft.setStored(isStored() || isCompressed()); 224 Field result = new Field(name, content, ft); 225 return result; 226 } 227 return null; 228 } 229 230 /** 231 * Returns the analyzer used for this field.<p> 232 * 233 * @return the analyzer used for this field 234 */ 235 public Analyzer getAnalyzer() { 236 237 return m_analyzer; 238 } 239 240 /** 241 * Returns the display name of the field.<p> 242 * 243 * @return the display name of the field 244 */ 245 public String getDisplayName() { 246 247 if (!isDisplayed()) { 248 return IGNORE_DISPLAY_NAME; 249 } 250 if (m_displayName == null) { 251 return getName(); 252 } else { 253 return m_displayName; 254 } 255 } 256 257 /** 258 * Returns the displayNameForConfiguration.<p> 259 * 260 * @return the displayNameForConfiguration 261 */ 262 public String getDisplayNameForConfiguration() { 263 264 return m_displayNameForConfiguration; 265 } 266 267 /** 268 * Returns the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index.<p> 269 * 270 * @return the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index 271 * 272 * @see #isTokenizedAndIndexed() 273 * @see #isIndexed() 274 */ 275 @Override 276 public String getIndexed() { 277 278 if (isTokenizedAndIndexed()) { 279 return String.valueOf(isTokenizedAndIndexed()); 280 } 281 if (isIndexed()) { 282 return STR_UN_TOKENIZED; 283 } else { 284 return String.valueOf(isIndexed()); 285 } 286 } 287 288 /** 289 * Returns the type.<p> 290 * 291 * @return the type 292 */ 293 public String getType() { 294 295 return m_type; 296 } 297 298 /** 299 * Returns <code>true</code> if the content of this field is compressed.<p> 300 * 301 * If the field is compressed, it must also be stored, this means 302 * {@link #isStored()} will always return <code>true</code> for compressed fields.<p> 303 * 304 * @return <code>true</code> if the content of this field is compressed 305 */ 306 public boolean isCompressed() { 307 308 return m_compressed; 309 } 310 311 /** 312 * Returns true if the field should be displayed.<p> 313 * 314 * @return returns true if the field should be displayed otherwise false 315 */ 316 public boolean isDisplayed() { 317 318 return m_displayed; 319 } 320 321 /** 322 * Returns <code>true</code> if this fields content is used in the search result excerpt.<p> 323 * 324 * A field can only be used in the excerpt if it is stored, see {@link #isStored()}.<p> 325 * 326 * @return <code>true</code> if this fields content is used in the search result excerpt 327 * 328 * @see #isStored() 329 */ 330 public boolean isInExcerptAndStored() { 331 332 return isInExcerpt() && isStored(); 333 } 334 335 /** 336 * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p> 337 * 338 * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p> 339 * 340 * @return <code>true</code> if the content of this field is tokenized in the Lucene index 341 */ 342 public boolean isTokenized() { 343 344 return m_tokenized; 345 } 346 347 /** 348 * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p> 349 * 350 * A field can only be tokenized if it is also indexed, see {@link #isIndexed()}.<p> 351 * 352 * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p> 353 * 354 * @return <code>true</code> if the content of this field is tokenized in the Lucene index 355 * 356 * @see #isStored() 357 * @see #isIndexed() 358 */ 359 public boolean isTokenizedAndIndexed() { 360 361 return m_tokenized && isIndexed(); 362 } 363 364 /** 365 * Sets the analyzer used for this field.<p> 366 * 367 * @param analyzer the analyzer to set 368 */ 369 public void setAnalyzer(Analyzer analyzer) { 370 371 m_analyzer = analyzer; 372 } 373 374 /** 375 * Sets the analyzer used for this field.<p> 376 * 377 * The parameter must be a name of a class the implements the Lucene {@link Analyzer} interface. 378 * 379 * @param analyzerName the analyzer class name to set 380 * 381 * @throws Exception in case of problems creating the analyzer class instance 382 */ 383 public void setAnalyzer(String analyzerName) throws Exception { 384 385 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(analyzerName)) { 386 setAnalyzer(CmsSearchManager.getAnalyzer(analyzerName)); 387 } 388 } 389 390 /** 391 * Controls if this field value will be stored compressed or not.<p> 392 * 393 * If this is set to <code>true</code>, the value for {@link #isStored()} will also 394 * be set to <code>true</code>, since compressed fields are always stored.<p> 395 * 396 * @param compressed if <code>true</code>, the field value will be stored compressed 397 */ 398 public void setCompressed(boolean compressed) { 399 400 m_compressed = compressed; 401 if (compressed) { 402 setStored(true); 403 } 404 } 405 406 /** 407 * Controls if the field is displayed or not.<p> 408 * 409 * @param displayed if true the field is displayed 410 */ 411 public void setDisplayed(boolean displayed) { 412 413 m_displayed = displayed; 414 } 415 416 /** 417 * Sets the display name. If the given name equals IGNORE_DISPLAY_NAME the field is not displayed.<p> 418 * 419 * @param displayName the display name to set 420 */ 421 public void setDisplayName(String displayName) { 422 423 if (CmsStringUtil.isEmpty(displayName) || (IGNORE_DISPLAY_NAME.equals(displayName))) { 424 m_displayName = null; 425 setDisplayed(false); 426 } else { 427 m_displayName = displayName; 428 m_displayNameForConfiguration = displayName; 429 setDisplayed(true); 430 } 431 } 432 433 /** 434 * Sets the displayNameForConfiguration.<p> 435 * 436 * @param displayNameForConfiguration the displayNameForConfiguration to set 437 */ 438 public void setDisplayNameForConfiguration(String displayNameForConfiguration) { 439 440 m_displayNameForConfiguration = displayNameForConfiguration; 441 setDisplayName(displayNameForConfiguration); 442 } 443 444 /** 445 * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index from a String parameter.<p> 446 * 447 * This sets the values for {@link #isIndexed()} as well as {@link #isTokenizedAndIndexed()}.<p> 448 * 449 * The parameter can have the following values: 450 * <ul> 451 * <li><b>"true"</b> or <b>"tokenized"</b>: The field is indexed and tokenized. 452 * <li><b>"false"</b> or <b>"no"</b>: The field is not indexed and not tokenized. 453 * <li><b>"untokenized"</b>: The field is indexed but not tokenized. 454 * </ul> 455 * 456 * @param indexed the index setting to use 457 * 458 * @see #setIndexed(boolean) 459 * @see #setTokenized(boolean) 460 */ 461 public void setIndexed(String indexed) { 462 463 boolean isIndexed = false; 464 boolean isTokenized = false; 465 if (indexed != null) { 466 indexed = indexed.trim().toLowerCase(); 467 if (STR_TOKENIZED.equals(indexed)) { 468 isIndexed = true; 469 isTokenized = true; 470 } else if (STR_UN_TOKENIZED.equals(indexed)) { 471 isIndexed = true; 472 } else if (STR_NO.equals(indexed)) { 473 // "no", both values will be false 474 } else { 475 // only "true" or "false" remain 476 isIndexed = Boolean.valueOf(indexed).booleanValue(); 477 isTokenized = isIndexed; 478 } 479 } 480 setIndexed(isIndexed); 481 setTokenized(isTokenized); 482 } 483 484 /** 485 * Controls if this fields content is used in the search result excerpt.<p> 486 * 487 * @param excerpt if <code>"true"</code>, then this fields content is used in the search excerpt 488 * 489 * @see #setInExcerpt(boolean) 490 */ 491 public void setInExcerpt(String excerpt) { 492 493 setInExcerpt(Boolean.valueOf(String.valueOf(excerpt)).booleanValue()); 494 } 495 496 /** 497 * Controls if the content of this field is stored in the Lucene index from a String parameter.<p> 498 * 499 * @param stored if <code>"true"</code>, then the field content is stored 500 * 501 * @see #setStored(boolean) 502 */ 503 public void setStored(String stored) { 504 505 boolean isStored = false; 506 boolean isCompressed = false; 507 if (stored != null) { 508 stored = stored.trim().toLowerCase(); 509 if (STR_COMPRESS.equals(stored)) { 510 isCompressed = true; 511 isStored = true; 512 } else if (STR_YES.equals(stored)) { 513 // "yes", value will be stored but not compressed 514 isStored = true; 515 } else { 516 // only "true" or "false" remain 517 isStored = Boolean.valueOf(stored).booleanValue(); 518 } 519 } 520 setStored(isStored); 521 setCompressed(isCompressed); 522 } 523 524 /** 525 * Controls if the content of this field is tokenized in the Lucene index.<p> 526 * 527 * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p> 528 * 529 * @param tokenized if <code>true</code>, then the field content is tokenized 530 * 531 * @see #setStored(boolean) 532 */ 533 public void setTokenized(boolean tokenized) { 534 535 m_tokenized = tokenized; 536 } 537 538 /** 539 * Sets the type.<p> 540 * 541 * @param type the type to set 542 */ 543 public void setType(String type) { 544 545 m_type = type; 546 } 547}