001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.fields; 033 034import org.opencms.util.CmsStringUtil; 035 036import java.io.Serializable; 037import java.util.ArrayList; 038import java.util.List; 039import java.util.Map; 040 041import org.apache.solr.uninverting.UninvertingReader.Type; 042 043/** 044 * A abstract implementation for a search field.<p> 045 * 046 * @since 8.5.0 047 */ 048public class CmsSearchField implements Serializable { 049 050 /** Name of the field that contains the (optional) category of the document (hardcoded). */ 051 public static final String FIELD_CATEGORY = "category"; 052 053 /** Name of the field that usually contains the complete content of the document (optional). */ 054 public static final String FIELD_CONTENT = "content"; 055 056 /** Name of the field that contains the complete extracted content of the document as serialized object (hardcoded). */ 057 public static final String FIELD_CONTENT_BLOB = "contentblob"; 058 059 /** Name of the field that contains the locale of the document. */ 060 public static final String FIELD_CONTENT_LOCALES = "con_locales"; 061 062 /** Name of the field that contains the document content date (hardcoded). */ 063 public static final String FIELD_DATE_CONTENT = "contentdate"; 064 065 /** Name of the field that contains the document creation date (hardcoded). */ 066 public static final String FIELD_DATE_CREATED = "created"; 067 068 /** Name of the field that contains the document creation date for fast lookup (hardcoded). */ 069 public static final String FIELD_DATE_CREATED_LOOKUP = "created_lookup"; 070 071 /** The field name for the expiration date. */ 072 public static final String FIELD_DATE_EXPIRED = "expired"; 073 074 /** Name of the field that contains the document last modification date (hardcoded). */ 075 public static final String FIELD_DATE_LASTMODIFIED = "lastmodified"; 076 077 /** Name of the field that contains the document last modification date for fast lookup (hardcoded). */ 078 public static final String FIELD_DATE_LASTMODIFIED_LOOKUP = "lastmodified_lookup"; 079 080 /** The lookup suffix for date fields. */ 081 public static final String FIELD_DATE_LOOKUP_SUFFIX = "_lookup"; 082 083 /** The field name for the release date. */ 084 public static final String FIELD_DATE_RELEASED = "released"; 085 086 /** The dependency type. */ 087 public static final String FIELD_DEPENDENCY_TYPE = "dependencyType"; 088 089 /** Name of the field that usually contains the value of the "Description" property of the document (optional). */ 090 public static final String FIELD_DESCRIPTION = "description"; 091 092 /** Name of the dynamic exact field. */ 093 public static final String FIELD_DYNAMIC_EXACT = "_exact"; 094 095 /** Name of the dynamic property field (searched properties). */ 096 public static final String FIELD_DYNAMIC_PROPERTIES = "_prop"; 097 098 /** Name of the dynamic property field (non-searched properties). */ 099 public static final String FIELD_DYNAMIC_PROPERTIES_DIRECT = "_dprop"; 100 101 /** The name of the dynamic field that stores the shortened value of the content field in order to save performance. */ 102 public static final String FIELD_EXCERPT = "_excerpt"; 103 104 /** Name of the field that contains the filename. */ 105 public static final String FIELD_FILENAME = "filename"; 106 107 /** Name of the field that contains the documents structure id. */ 108 public static final String FIELD_ID = "id"; 109 110 /** Name of the field that usually contains the value of the "Keywords" property of the document (optional). */ 111 public static final String FIELD_KEYWORDS = "keywords"; 112 113 /** The field name for the link. */ 114 public static final String FIELD_LINK = "link"; 115 116 /** 117 * Name of the field that usually combines all document "meta" information, 118 * that is the values of the "Title", "Keywords" and "Description" properties (optional). 119 */ 120 public static final String FIELD_META = "meta"; 121 122 /** Name of the field that contains the mime type. */ 123 public static final String FIELD_MIMETYPE = "mimetype"; 124 125 /** Name of the field that contains all VFS parent folders of a document (hardcoded). */ 126 public static final String FIELD_PARENT_FOLDERS = "parent-folders"; 127 128 /** Name of the field that contains the document root path in the VFS (hardcoded). */ 129 public static final String FIELD_PATH = "path"; 130 131 /** The prefix used to store dependency fields. */ 132 public static final String FIELD_PREFIX_DEPENDENCY = "dep_"; 133 134 /** The prefix for dynamic fields. */ 135 public static final String FIELD_PREFIX_DYNAMIC = "*_"; 136 137 /** The default text field prefix. */ 138 public static final String FIELD_PREFIX_TEXT = "text_"; 139 140 /** The default string field postfix. */ 141 public static final String FIELD_POSTFIX_STRING = "_s"; 142 143 /** The default (single-valued) date field postfix. */ 144 public static final String FIELD_POSTFIX_DATE = "_dt"; 145 146 /** The default (multi-valued) dates field postfix. */ 147 public static final String FIELD_POSTFIX_DATES = "_dts"; 148 149 /** The default (single-valued) date range field postfix. */ 150 public static final String FIELD_POSTFIX_DATE_RANGE = "_dr"; 151 152 /** The default (multi-valued) date range field postfix. */ 153 public static final String FIELD_POSTFIX_DATE_RANGES = "_drs"; 154 155 /** The default int field postfix. */ 156 public static final String FIELD_POSTFIX_INT = "_i"; 157 158 /** The default local field postfix. */ 159 public static final String FIELD_POSTFIX_LOC = "_loc"; 160 161 /** The default field postfix for alpha-numeric sorting. */ 162 public static final String FIELD_POSTFIX_SORT = "_sort"; 163 164 /** 165 * Name of the field that contains the (optional) document priority, 166 * which can be used to boost the document in the result list (hardcoded). 167 */ 168 public static final String FIELD_PRIORITY = "priority"; 169 170 /** Name of the field that contains the resource locales of the document. */ 171 public static final String FIELD_RESOURCE_LOCALES = "res_locales"; 172 173 /** The name of the score field. */ 174 public static final String FIELD_SCORE = "score"; 175 176 /** Name of the field that contains the searched property value of 'search.exclude'. */ 177 public static final String FIELD_SEARCH_EXCLUDE = "search_exclude"; 178 179 /** Name of the field that usually contains file size. */ 180 public static final String FIELD_SIZE = "size"; 181 182 /** Name of the field that contains the lower-case title, untokenized, for sorting. */ 183 public static final String FIELD_SORT_TITLE = "sort-title"; 184 185 /** Name of the field that contains the resource state. */ 186 public static final String FIELD_STATE = "state"; 187 188 /** Name of the field that contains the file name suffix of the resource. */ 189 public static final String FIELD_SUFFIX = "suffix"; 190 191 /** Name of the field that contains the general text of a resource and also serves as prefix. */ 192 public static final String FIELD_TEXT = "text"; 193 194 /** 195 * Name of the field that usually contains the value of the "Title" property of the document 196 * as a keyword used for sorting and also for retrieving the title text (optional). 197 * 198 * Please note: This field should NOT be used for searching. Use {@link #FIELD_TITLE_UNSTORED} instead.<p> 199 */ 200 public static final String FIELD_TITLE = "title-key"; 201 202 /** 203 * Name of the field that usually contains the value of the "Title" property of the document 204 * in an analyzed form used for searching in the title (optional). 205 */ 206 public static final String FIELD_TITLE_UNSTORED = "title"; 207 208 // TODO: Comments 209 public static final String FIELD_TIMESTAMP = "timestamp"; 210 public static final String FIELD_PATH_HIERARCHY = "path_hierarchy"; 211 public static final String FIELD_CATEGORY_EXACT = "category_exact"; 212 public static final String FIELD_PLACE = "place"; 213 public static final String FIELD_SPELL = "spell"; 214 // TODO: concat those field names; "text" + locale, where needed like content fields or exceprt fields 215 public static final String FIELD_TEXT_EN = "text_en"; 216 public static final String FIELD_TEXT_DE = "text_de"; 217 public static final String FIELD_TEXT_EL = "text_el"; 218 public static final String FIELD_TEXT_ES = "text_es"; 219 public static final String FIELD_TEXT_FR = "text_fr"; 220 public static final String FIELD_TEXT_HU = "text_hu"; 221 public static final String FIELD_TEXT_IT = "text_it"; 222 public static final String FIELD_SEARCH_CHANNEL = "search_channel"; 223 224 /** The field PREFIX of the fields that contain the display title (without locale and postfix "_s"). */ 225 public static final String FIELD_DISPTITLE = "disptitle"; 226 227 /** The field PREFIX of the fields that contain the display order (without locale and postfix "_i"). */ 228 public static final String FIELD_DISPORDER = "disporder"; 229 230 /** Name of the field that contains Geo coordinates. */ 231 public static final String FIELD_GEOCOORDS = "geocoords" + FIELD_POSTFIX_LOC; 232 233 /** The field PREFIX where the start date for the single entry of a serial date entry set is stored. */ 234 public static final String FIELD_INSTANCEDATE = "instancedate"; 235 236 /** The field PREFIX where the end date for the single entry of a serial date entry set is stored. */ 237 public static final String FIELD_INSTANCEDATE_END = "instancedateend"; 238 239 /** The field PREFIX where the date until which the single entry of a serial date entry should be treated as "current" is stored. */ 240 public static final String FIELD_INSTANCEDATE_CURRENT_TILL = "instancedatecurrenttill"; 241 242 /** The field PREFIX where the start date and the end date of the single entry of a serial date entry is stored as a date range. */ 243 public static final String FIELD_INSTANCEDATE_RANGE = "instancedaterange"; 244 245 /** The field where the dates for a serial date are stored. */ 246 public static final String FIELD_SERIESDATES = "seriesdates" + FIELD_POSTFIX_DATES; 247 248 /** The field where the end dates for a serial date are stored. 249 * NOTE: The field is only used during indexing and not stored in the content itself. 250 */ 251 public static final String FIELD_SERIESDATES_END = "seriesdatesend" + FIELD_POSTFIX_DATES; 252 253 /** The field where the dates until when the single serial dates are treated as "current" are stored. 254 * NOTE: The field is only used during indexing and not stored in the content itself. 255 */ 256 public static final String FIELD_SERIESDATES_CURRENT_TILL = "seriesdatescurrenttill" + FIELD_POSTFIX_DATES; 257 258 /** The field where the type of the date series is stored. */ 259 public static final String FIELD_SERIESDATES_TYPE = "seriesdatestype" + FIELD_POSTFIX_STRING; 260 261 /** Name of the field that contains the type of the document. */ 262 public static final String FIELD_TYPE = "type"; 263 264 /** Name of the field that contains the user created. */ 265 public static final String FIELD_USER_CREATED = "userCreated"; 266 267 /** Name of the field that contains the user last modified. */ 268 public static final String FIELD_USER_LAST_MODIFIED = "userLastModified"; 269 270 /** Name of the field that contains the latest version number of the resource. */ 271 public static final String FIELD_VERSION = "version"; 272 273 /** Name of the field that contains the unique Solr id. */ 274 public static final String FIELD_SOLR_ID = "solr_id"; 275 276 /** Serial version UID. */ 277 private static final long serialVersionUID = 3185631015824549119L; 278 279 /** A default value for the field in case the content does not provide the value. */ 280 private String m_defaultValue; 281 282 /** Indicates if this field should be used for generating the excerpt. */ 283 private boolean m_excerpt; 284 285 /** Indicates if the content of this field should be indexed. */ 286 private boolean m_indexed; 287 288 /** The search field mappings. */ 289 private List<I_CmsSearchFieldMapping> m_mappings; 290 291 /** The name of the field. */ 292 private String m_name; 293 294 /** Indicates if the content of this field should be stored. */ 295 private boolean m_stored; 296 297 /** 298 * Creates a new search field.<p> 299 */ 300 public CmsSearchField() { 301 302 m_mappings = new ArrayList<I_CmsSearchFieldMapping>(); 303 } 304 305 /** 306 * Creates a new search field.<p> 307 * 308 * @param name the name of the field, see {@link #setName(String)} 309 * @param defaultValue the default value to use, see {@link #setDefaultValue(String)} 310 * 311 */ 312 public CmsSearchField(String name, String defaultValue) { 313 314 this(); 315 m_name = name; 316 m_defaultValue = defaultValue; 317 } 318 319 /** To allow sorting on a field the field must be added to the map given to {@link org.apache.solr.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}. 320 * The method adds all default fields. 321 * @param uninvertingMap the map to which the fields are added. 322 */ 323 public static void addUninvertingMappings(Map<String, Type> uninvertingMap) { 324 325 uninvertingMap.put(FIELD_CATEGORY, Type.SORTED); 326 uninvertingMap.put(FIELD_CONTENT, Type.SORTED); 327 uninvertingMap.put(FIELD_CONTENT_BLOB, Type.SORTED); 328 uninvertingMap.put(FIELD_CONTENT_LOCALES, Type.SORTED); 329 uninvertingMap.put(FIELD_DATE_CONTENT, Type.SORTED); 330 uninvertingMap.put(FIELD_DATE_CREATED, Type.SORTED); 331 uninvertingMap.put(FIELD_DATE_CREATED_LOOKUP, Type.SORTED); 332 uninvertingMap.put(FIELD_DATE_EXPIRED, Type.SORTED); 333 uninvertingMap.put(FIELD_DATE_LASTMODIFIED, Type.SORTED); 334 uninvertingMap.put(FIELD_DATE_LASTMODIFIED_LOOKUP, Type.SORTED); 335 uninvertingMap.put(FIELD_DATE_LOOKUP_SUFFIX, Type.SORTED); 336 uninvertingMap.put(FIELD_DATE_RELEASED, Type.SORTED); 337 uninvertingMap.put(FIELD_DEPENDENCY_TYPE, Type.SORTED); 338 uninvertingMap.put(FIELD_DESCRIPTION, Type.SORTED); 339 uninvertingMap.put(FIELD_DYNAMIC_EXACT, Type.SORTED); 340 uninvertingMap.put(FIELD_DYNAMIC_PROPERTIES, Type.SORTED); 341 uninvertingMap.put(FIELD_EXCERPT, Type.SORTED); 342 uninvertingMap.put(FIELD_FILENAME, Type.SORTED); 343 uninvertingMap.put(FIELD_ID, Type.SORTED); 344 uninvertingMap.put(FIELD_KEYWORDS, Type.SORTED); 345 uninvertingMap.put(FIELD_LINK, Type.SORTED); 346 uninvertingMap.put(FIELD_META, Type.SORTED); 347 uninvertingMap.put(FIELD_MIMETYPE, Type.SORTED); 348 uninvertingMap.put(FIELD_PARENT_FOLDERS, Type.SORTED); 349 uninvertingMap.put(FIELD_PATH, Type.SORTED); 350 uninvertingMap.put(FIELD_PREFIX_DEPENDENCY, Type.SORTED); 351 uninvertingMap.put(FIELD_PREFIX_DYNAMIC, Type.SORTED); 352 uninvertingMap.put(FIELD_PREFIX_TEXT, Type.SORTED); 353 uninvertingMap.put(FIELD_PRIORITY, Type.SORTED); 354 uninvertingMap.put(FIELD_RESOURCE_LOCALES, Type.SORTED); 355 uninvertingMap.put(FIELD_SCORE, Type.SORTED); 356 uninvertingMap.put(FIELD_SEARCH_EXCLUDE, Type.SORTED); 357 uninvertingMap.put(FIELD_SIZE, Type.SORTED); 358 uninvertingMap.put(FIELD_SORT_TITLE, Type.SORTED); 359 uninvertingMap.put(FIELD_STATE, Type.SORTED); 360 uninvertingMap.put(FIELD_SUFFIX, Type.SORTED); 361 uninvertingMap.put(FIELD_TEXT, Type.SORTED); 362 uninvertingMap.put(FIELD_TITLE, Type.SORTED); 363 uninvertingMap.put(FIELD_TITLE_UNSTORED, Type.SORTED); 364 uninvertingMap.put(FIELD_TYPE, Type.SORTED); 365 uninvertingMap.put(FIELD_USER_CREATED, Type.SORTED); 366 uninvertingMap.put(FIELD_USER_LAST_MODIFIED, Type.SORTED); 367 uninvertingMap.put(FIELD_VERSION, Type.SORTED); 368 } 369 370 /** 371 * Adds a new field mapping to the internal list of mappings.<p> 372 * 373 * @param mapping the mapping to add 374 */ 375 public void addMapping(I_CmsSearchFieldMapping mapping) { 376 377 m_mappings.add(mapping); 378 } 379 380 /** 381 * Two fields are equal if the name of the Lucene field is equal.<p> 382 * 383 * @see java.lang.Object#equals(java.lang.Object) 384 */ 385 @Override 386 public boolean equals(Object obj) { 387 388 if ((obj instanceof CmsSearchField)) { 389 return CmsStringUtil.isEqual(m_name, ((CmsSearchField)obj).getName()); 390 } 391 return false; 392 } 393 394 /** 395 * Returns the default value to use if no content for this field was collected.<p> 396 * 397 * In case no default is configured, <code>null</code> is returned.<p> 398 * 399 * @return the default value to use if no content for this field was collected 400 */ 401 public String getDefaultValue() { 402 403 return m_defaultValue; 404 } 405 406 /** 407 * Returns the String value state of this field if it is indexed (and possibly tokenized) in the index.<p> 408 * 409 * <b>IMPORTANT:</b> Not supported by Solr 410 * 411 * @return the String value state of this field if it is indexed (and possibly tokenized) in the index 412 */ 413 public String getIndexed() { 414 415 return null; 416 } 417 418 /** 419 * Returns the mappings for this field.<p> 420 * 421 * @return the mappings for this field 422 */ 423 public List<I_CmsSearchFieldMapping> getMappings() { 424 425 return m_mappings; 426 } 427 428 /** 429 * Returns the name of this field in the Lucene search index.<p> 430 * 431 * @return the name of this field in the Lucene search index 432 */ 433 public String getName() { 434 435 return m_name; 436 } 437 438 /** 439 * The hash code for a field is based only on the field name.<p> 440 * 441 * @see java.lang.Object#hashCode() 442 */ 443 @Override 444 public int hashCode() { 445 446 return m_name == null ? 41 : m_name.hashCode(); 447 } 448 449 /** 450 * Returns the indexed.<p> 451 * 452 * @return the indexed 453 */ 454 public boolean isIndexed() { 455 456 return m_indexed; 457 } 458 459 /** 460 * Returns <code>true</code> if this fields content is used in the search result excerpt.<p> 461 * 462 * @return <code>true</code> if this fields content is used in the search result excerpt 463 * 464 * @see #isStored() 465 */ 466 public boolean isInExcerpt() { 467 468 return m_excerpt; 469 } 470 471 /** 472 * Returns <code>true</code> if the content of this field is stored in the Lucene index.<p> 473 * 474 * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store} 475 * for the concept behind stored and unstored fields.<p> 476 * 477 * @return <code>true</code> if the content of this field is stored in the Lucene index 478 */ 479 public boolean isStored() { 480 481 return m_stored; 482 } 483 484 /** 485 * Sets the default value to use if no content for this field was collected.<p> 486 * 487 * @param defaultValue the default value to set 488 */ 489 public void setDefaultValue(String defaultValue) { 490 491 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(defaultValue)) { 492 m_defaultValue = defaultValue.trim(); 493 } else { 494 m_defaultValue = null; 495 } 496 } 497 498 /** 499 * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index.<p> 500 * 501 * @param indexed the indexed to set 502 */ 503 public void setIndexed(boolean indexed) { 504 505 m_indexed = indexed; 506 } 507 508 /** 509 * Controls if this fields content is used in the search result excerpt.<p> 510 * 511 * @param excerpt if <code>true</code>, then this fields content is used in the search excerpt 512 */ 513 public void setInExcerpt(boolean excerpt) { 514 515 m_excerpt = excerpt; 516 } 517 518 /** 519 * Sets the name of this field in the Lucene search index.<p> 520 * 521 * @param fieldName the name to set 522 */ 523 public void setName(String fieldName) { 524 525 m_name = fieldName; 526 } 527 528 /** 529 * Controls if the content of this field is stored in the Lucene index.<p> 530 * 531 * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store} 532 * for the concept behind stored and unstored fields.<p> 533 * 534 * @param stored if <code>true</code>, then the field content is stored 535 */ 536 public void setStored(boolean stored) { 537 538 m_stored = stored; 539 } 540 541 /** 542 * @see java.lang.Object#toString() 543 */ 544 @Override 545 public String toString() { 546 547 return getName(); 548 } 549}