001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: http://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: http://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search.fields;
033
034import org.opencms.util.CmsStringUtil;
035
036import java.io.Serializable;
037import java.util.ArrayList;
038import java.util.List;
039import java.util.Map;
040
041import org.apache.solr.uninverting.UninvertingReader.Type;
042
043/**
044 * A abstract implementation for a search field.<p>
045 *
046 * @since 8.5.0
047 */
048public class CmsSearchField implements Serializable {
049
050    /** Name of the field that contains the (optional) category of the document (hardcoded). */
051    public static final String FIELD_CATEGORY = "category";
052
053    /** Name of the field that usually contains the complete content of the document (optional). */
054    public static final String FIELD_CONTENT = "content";
055
056    /** Name of the field that contains the complete extracted content of the document as serialized object (hardcoded). */
057    public static final String FIELD_CONTENT_BLOB = "contentblob";
058
059    /** Name of the field that contains the locale of the document. */
060    public static final String FIELD_CONTENT_LOCALES = "con_locales";
061
062    /** Name of the field that contains the document content date (hardcoded). */
063    public static final String FIELD_DATE_CONTENT = "contentdate";
064
065    /** Name of the field that contains the document creation date (hardcoded). */
066    public static final String FIELD_DATE_CREATED = "created";
067
068    /** Name of the field that contains the document creation date for fast lookup (hardcoded). */
069    public static final String FIELD_DATE_CREATED_LOOKUP = "created_lookup";
070
071    /** The field name for the expiration date. */
072    public static final String FIELD_DATE_EXPIRED = "expired";
073
074    /** Name of the field that contains the document last modification date (hardcoded). */
075    public static final String FIELD_DATE_LASTMODIFIED = "lastmodified";
076
077    /** Name of the field that contains the document last modification date for fast lookup (hardcoded). */
078    public static final String FIELD_DATE_LASTMODIFIED_LOOKUP = "lastmodified_lookup";
079
080    /** The lookup suffix for date fields. */
081    public static final String FIELD_DATE_LOOKUP_SUFFIX = "_lookup";
082
083    /** The field name for the release date. */
084    public static final String FIELD_DATE_RELEASED = "released";
085
086    /** The dependency type. */
087    public static final String FIELD_DEPENDENCY_TYPE = "dependencyType";
088
089    /** Name of the field that usually contains the value of the "Description" property of the document (optional). */
090    public static final String FIELD_DESCRIPTION = "description";
091
092    /** Name of the dynamic exact field. */
093    public static final String FIELD_DYNAMIC_EXACT = "_exact";
094
095    /** Name of the dynamic property field (searched properties). */
096    public static final String FIELD_DYNAMIC_PROPERTIES = "_prop";
097
098    /** Name of the dynamic property field (non-searched properties). */
099    public static final String FIELD_DYNAMIC_PROPERTIES_DIRECT = "_dprop";
100
101    /** The name of the dynamic field that stores the shortened value of the content field in order to save performance. */
102    public static final String FIELD_EXCERPT = "_excerpt";
103
104    /** Name of the field that contains the filename. */
105    public static final String FIELD_FILENAME = "filename";
106
107    /** Name of the field that contains the documents structure id. */
108    public static final String FIELD_ID = "id";
109
110    /** Name of the field that usually contains the value of the "Keywords" property of the document (optional). */
111    public static final String FIELD_KEYWORDS = "keywords";
112
113    /** The field name for the link. */
114    public static final String FIELD_LINK = "link";
115
116    /**
117     * Name of the field that usually combines all document "meta" information,
118     * that is the values of the "Title", "Keywords" and "Description" properties (optional).
119     */
120    public static final String FIELD_META = "meta";
121
122    /** Name of the field that contains the mime type. */
123    public static final String FIELD_MIMETYPE = "mimetype";
124
125    /** Name of the field that contains all VFS parent folders of a document (hardcoded). */
126    public static final String FIELD_PARENT_FOLDERS = "parent-folders";
127
128    /** Name of the field that contains the document root path in the VFS (hardcoded). */
129    public static final String FIELD_PATH = "path";
130
131    /** The prefix used to store dependency fields. */
132    public static final String FIELD_PREFIX_DEPENDENCY = "dep_";
133
134    /** The prefix for dynamic fields. */
135    public static final String FIELD_PREFIX_DYNAMIC = "*_";
136
137    /** The default text field prefix. */
138    public static final String FIELD_PREFIX_TEXT = "text_";
139
140    /** The default string field postfix. */
141    public static final String FIELD_POSTFIX_STRING = "_s";
142
143    /** The default (single-valued) date field postfix. */
144    public static final String FIELD_POSTFIX_DATE = "_dt";
145
146    /** The default (multi-valued) dates field postfix. */
147    public static final String FIELD_POSTFIX_DATES = "_dts";
148
149    /** The default (single-valued) date range field postfix. */
150    public static final String FIELD_POSTFIX_DATE_RANGE = "_dr";
151
152    /** The default (multi-valued) date range field postfix. */
153    public static final String FIELD_POSTFIX_DATE_RANGES = "_drs";
154
155    /** The default int field postfix. */
156    public static final String FIELD_POSTFIX_INT = "_i";
157
158    /** The default local field postfix. */
159    public static final String FIELD_POSTFIX_LOC = "_loc";
160
161    /** The default field postfix for alpha-numeric sorting. */
162    public static final String FIELD_POSTFIX_SORT = "_sort";
163
164    /**
165     * Name of the field that contains the (optional) document priority,
166     * which can be used to boost the document in the result list (hardcoded).
167     */
168    public static final String FIELD_PRIORITY = "priority";
169
170    /** Name of the field that contains the resource locales of the document. */
171    public static final String FIELD_RESOURCE_LOCALES = "res_locales";
172
173    /** The name of the score field. */
174    public static final String FIELD_SCORE = "score";
175
176    /** Name of the field that contains the searched property value of 'search.exclude'. */
177    public static final String FIELD_SEARCH_EXCLUDE = "search_exclude";
178
179    /** Name of the field that usually contains file size. */
180    public static final String FIELD_SIZE = "size";
181
182    /** Name of the field that contains the lower-case title, untokenized, for sorting. */
183    public static final String FIELD_SORT_TITLE = "sort-title";
184
185    /** Name of the field that contains the resource state. */
186    public static final String FIELD_STATE = "state";
187
188    /** Name of the field that contains the file name suffix of the resource. */
189    public static final String FIELD_SUFFIX = "suffix";
190
191    /** Name of the field that contains the general text of a resource and also serves as prefix. */
192    public static final String FIELD_TEXT = "text";
193
194    /**
195     * Name of the field that usually contains the value of the "Title" property of the document
196     * as a keyword used for sorting and also for retrieving the title text (optional).
197     *
198     * Please note: This field should NOT be used for searching. Use {@link #FIELD_TITLE_UNSTORED} instead.<p>
199     */
200    public static final String FIELD_TITLE = "title-key";
201
202    /**
203     * Name of the field that usually contains the value of the "Title" property of the document
204     * in an analyzed form used for searching in the title (optional).
205     */
206    public static final String FIELD_TITLE_UNSTORED = "title";
207
208    // TODO: Comments
209    public static final String FIELD_TIMESTAMP = "timestamp";
210    public static final String FIELD_PATH_HIERARCHY = "path_hierarchy";
211    public static final String FIELD_CATEGORY_EXACT = "category_exact";
212    public static final String FIELD_PLACE = "place";
213    public static final String FIELD_SPELL = "spell";
214    // TODO: concat those field names; "text" + locale, where needed like content fields or exceprt fields
215    public static final String FIELD_TEXT_EN = "text_en";
216    public static final String FIELD_TEXT_DE = "text_de";
217    public static final String FIELD_TEXT_EL = "text_el";
218    public static final String FIELD_TEXT_ES = "text_es";
219    public static final String FIELD_TEXT_FR = "text_fr";
220    public static final String FIELD_TEXT_HU = "text_hu";
221    public static final String FIELD_TEXT_IT = "text_it";
222    public static final String FIELD_SEARCH_CHANNEL = "search_channel";
223
224    /** The field PREFIX of the fields that contain the display title (without locale and postfix "_s"). */
225    public static final String FIELD_DISPTITLE = "disptitle";
226
227    /** The field PREFIX of the fields that contain the display order (without locale and postfix "_i"). */
228    public static final String FIELD_DISPORDER = "disporder";
229
230    /** Name of the field that contains Geo coordinates. */
231    public static final String FIELD_GEOCOORDS = "geocoords" + FIELD_POSTFIX_LOC;
232
233    /** The field PREFIX where the start date for the single entry of a serial date entry set is stored. */
234    public static final String FIELD_INSTANCEDATE = "instancedate";
235
236    /** The field PREFIX where the end date for the single entry of a serial date entry set is stored. */
237    public static final String FIELD_INSTANCEDATE_END = "instancedateend";
238
239    /** The field PREFIX where the date until which the single entry of a serial date entry should be treated as "current" is stored. */
240    public static final String FIELD_INSTANCEDATE_CURRENT_TILL = "instancedatecurrenttill";
241
242    /** The field PREFIX where the start date and the end date of the single entry of a serial date entry is stored as a date range. */
243    public static final String FIELD_INSTANCEDATE_RANGE = "instancedaterange";
244
245    /** The field where the dates for a serial date are stored. */
246    public static final String FIELD_SERIESDATES = "seriesdates" + FIELD_POSTFIX_DATES;
247
248    /** The field where the end dates for a serial date are stored.
249     *  NOTE: The field is only used during indexing and not stored in the content itself.
250     */
251    public static final String FIELD_SERIESDATES_END = "seriesdatesend" + FIELD_POSTFIX_DATES;
252
253    /** The field where the dates until when the single serial dates are treated as "current" are stored.
254     *  NOTE: The field is only used during indexing and not stored in the content itself.
255     */
256    public static final String FIELD_SERIESDATES_CURRENT_TILL = "seriesdatescurrenttill" + FIELD_POSTFIX_DATES;
257
258    /** The field where the type of the date series is stored. */
259    public static final String FIELD_SERIESDATES_TYPE = "seriesdatestype" + FIELD_POSTFIX_STRING;
260
261    /** Name of the field that contains the type of the document. */
262    public static final String FIELD_TYPE = "type";
263
264    /** Name of the field that contains the user created. */
265    public static final String FIELD_USER_CREATED = "userCreated";
266
267    /** Name of the field that contains the user last modified. */
268    public static final String FIELD_USER_LAST_MODIFIED = "userLastModified";
269
270    /** Name of the field that contains the latest version number of the resource. */
271    public static final String FIELD_VERSION = "version";
272
273    /** Name of the field that contains the unique Solr id. */
274    public static final String FIELD_SOLR_ID = "solr_id";
275
276    /** Serial version UID. */
277    private static final long serialVersionUID = 3185631015824549119L;
278
279    /** A default value for the field in case the content does not provide the value. */
280    private String m_defaultValue;
281
282    /** Indicates if this field should be used for generating the excerpt. */
283    private boolean m_excerpt;
284
285    /** Indicates if the content of this field should be indexed. */
286    private boolean m_indexed;
287
288    /** The search field mappings. */
289    private List<I_CmsSearchFieldMapping> m_mappings;
290
291    /** The name of the field. */
292    private String m_name;
293
294    /** Indicates if the content of this field should be stored. */
295    private boolean m_stored;
296
297    /**
298     * Creates a new search field.<p>
299     */
300    public CmsSearchField() {
301
302        m_mappings = new ArrayList<I_CmsSearchFieldMapping>();
303    }
304
305    /**
306     * Creates a new search field.<p>
307     *
308     * @param name the name of the field, see {@link #setName(String)}
309     * @param defaultValue the default value to use, see {@link #setDefaultValue(String)}
310     *
311     */
312    public CmsSearchField(String name, String defaultValue) {
313
314        this();
315        m_name = name;
316        m_defaultValue = defaultValue;
317    }
318
319    /** To allow sorting on a field the field must be added to the map given to {@link org.apache.solr.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}.
320     *  The method adds all default fields.
321     * @param uninvertingMap the map to which the fields are added.
322     */
323    public static void addUninvertingMappings(Map<String, Type> uninvertingMap) {
324
325        uninvertingMap.put(FIELD_CATEGORY, Type.SORTED);
326        uninvertingMap.put(FIELD_CONTENT, Type.SORTED);
327        uninvertingMap.put(FIELD_CONTENT_BLOB, Type.SORTED);
328        uninvertingMap.put(FIELD_CONTENT_LOCALES, Type.SORTED);
329        uninvertingMap.put(FIELD_DATE_CONTENT, Type.SORTED);
330        uninvertingMap.put(FIELD_DATE_CREATED, Type.SORTED);
331        uninvertingMap.put(FIELD_DATE_CREATED_LOOKUP, Type.SORTED);
332        uninvertingMap.put(FIELD_DATE_EXPIRED, Type.SORTED);
333        uninvertingMap.put(FIELD_DATE_LASTMODIFIED, Type.SORTED);
334        uninvertingMap.put(FIELD_DATE_LASTMODIFIED_LOOKUP, Type.SORTED);
335        uninvertingMap.put(FIELD_DATE_LOOKUP_SUFFIX, Type.SORTED);
336        uninvertingMap.put(FIELD_DATE_RELEASED, Type.SORTED);
337        uninvertingMap.put(FIELD_DEPENDENCY_TYPE, Type.SORTED);
338        uninvertingMap.put(FIELD_DESCRIPTION, Type.SORTED);
339        uninvertingMap.put(FIELD_DYNAMIC_EXACT, Type.SORTED);
340        uninvertingMap.put(FIELD_DYNAMIC_PROPERTIES, Type.SORTED);
341        uninvertingMap.put(FIELD_EXCERPT, Type.SORTED);
342        uninvertingMap.put(FIELD_FILENAME, Type.SORTED);
343        uninvertingMap.put(FIELD_ID, Type.SORTED);
344        uninvertingMap.put(FIELD_KEYWORDS, Type.SORTED);
345        uninvertingMap.put(FIELD_LINK, Type.SORTED);
346        uninvertingMap.put(FIELD_META, Type.SORTED);
347        uninvertingMap.put(FIELD_MIMETYPE, Type.SORTED);
348        uninvertingMap.put(FIELD_PARENT_FOLDERS, Type.SORTED);
349        uninvertingMap.put(FIELD_PATH, Type.SORTED);
350        uninvertingMap.put(FIELD_PREFIX_DEPENDENCY, Type.SORTED);
351        uninvertingMap.put(FIELD_PREFIX_DYNAMIC, Type.SORTED);
352        uninvertingMap.put(FIELD_PREFIX_TEXT, Type.SORTED);
353        uninvertingMap.put(FIELD_PRIORITY, Type.SORTED);
354        uninvertingMap.put(FIELD_RESOURCE_LOCALES, Type.SORTED);
355        uninvertingMap.put(FIELD_SCORE, Type.SORTED);
356        uninvertingMap.put(FIELD_SEARCH_EXCLUDE, Type.SORTED);
357        uninvertingMap.put(FIELD_SIZE, Type.SORTED);
358        uninvertingMap.put(FIELD_SORT_TITLE, Type.SORTED);
359        uninvertingMap.put(FIELD_STATE, Type.SORTED);
360        uninvertingMap.put(FIELD_SUFFIX, Type.SORTED);
361        uninvertingMap.put(FIELD_TEXT, Type.SORTED);
362        uninvertingMap.put(FIELD_TITLE, Type.SORTED);
363        uninvertingMap.put(FIELD_TITLE_UNSTORED, Type.SORTED);
364        uninvertingMap.put(FIELD_TYPE, Type.SORTED);
365        uninvertingMap.put(FIELD_USER_CREATED, Type.SORTED);
366        uninvertingMap.put(FIELD_USER_LAST_MODIFIED, Type.SORTED);
367        uninvertingMap.put(FIELD_VERSION, Type.SORTED);
368    }
369
370    /**
371     * Adds a new field mapping to the internal list of mappings.<p>
372     *
373     * @param mapping the mapping to add
374     */
375    public void addMapping(I_CmsSearchFieldMapping mapping) {
376
377        m_mappings.add(mapping);
378    }
379
380    /**
381     * Two fields are equal if the name of the Lucene field is equal.<p>
382     *
383     * @see java.lang.Object#equals(java.lang.Object)
384     */
385    @Override
386    public boolean equals(Object obj) {
387
388        if ((obj instanceof CmsSearchField)) {
389            return CmsStringUtil.isEqual(m_name, ((CmsSearchField)obj).getName());
390        }
391        return false;
392    }
393
394    /**
395     * Returns the default value to use if no content for this field was collected.<p>
396     *
397     * In case no default is configured, <code>null</code> is returned.<p>
398     *
399     * @return the default value to use if no content for this field was collected
400     */
401    public String getDefaultValue() {
402
403        return m_defaultValue;
404    }
405
406    /**
407     * Returns the String value state of this field if it is indexed (and possibly tokenized) in the index.<p>
408     *
409     * <b>IMPORTANT:</b> Not supported by Solr
410     *
411     * @return the String value state of this field if it is indexed (and possibly tokenized) in the index
412     */
413    public String getIndexed() {
414
415        return null;
416    }
417
418    /**
419     * Returns the mappings for this field.<p>
420     *
421     * @return the mappings for this field
422     */
423    public List<I_CmsSearchFieldMapping> getMappings() {
424
425        return m_mappings;
426    }
427
428    /**
429     * Returns the name of this field in the Lucene search index.<p>
430     *
431     * @return the name of this field in the Lucene search index
432     */
433    public String getName() {
434
435        return m_name;
436    }
437
438    /**
439     * The hash code for a field is based only on the field name.<p>
440     *
441     * @see java.lang.Object#hashCode()
442     */
443    @Override
444    public int hashCode() {
445
446        return m_name == null ? 41 : m_name.hashCode();
447    }
448
449    /**
450     * Returns the indexed.<p>
451     *
452     * @return the indexed
453     */
454    public boolean isIndexed() {
455
456        return m_indexed;
457    }
458
459    /**
460     * Returns <code>true</code> if this fields content is used in the search result excerpt.<p>
461     *
462     * @return <code>true</code> if this fields content is used in the search result excerpt
463     *
464     * @see #isStored()
465     */
466    public boolean isInExcerpt() {
467
468        return m_excerpt;
469    }
470
471    /**
472     * Returns <code>true</code> if the content of this field is stored in the Lucene index.<p>
473     *
474     * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store}
475     * for the concept behind stored and unstored fields.<p>
476     *
477     * @return <code>true</code> if the content of this field is stored in the Lucene index
478     */
479    public boolean isStored() {
480
481        return m_stored;
482    }
483
484    /**
485     * Sets the default value to use if no content for this field was collected.<p>
486     *
487     * @param defaultValue the default value to set
488     */
489    public void setDefaultValue(String defaultValue) {
490
491        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(defaultValue)) {
492            m_defaultValue = defaultValue.trim();
493        } else {
494            m_defaultValue = null;
495        }
496    }
497
498    /**
499     * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index.<p>
500     *
501     * @param indexed the indexed to set
502     */
503    public void setIndexed(boolean indexed) {
504
505        m_indexed = indexed;
506    }
507
508    /**
509     * Controls if this fields content is used in the search result excerpt.<p>
510     *
511     * @param excerpt if <code>true</code>, then this fields content is used in the search excerpt
512     */
513    public void setInExcerpt(boolean excerpt) {
514
515        m_excerpt = excerpt;
516    }
517
518    /**
519     * Sets the name of this field in the Lucene search index.<p>
520     *
521     * @param fieldName the name to set
522     */
523    public void setName(String fieldName) {
524
525        m_name = fieldName;
526    }
527
528    /**
529     * Controls if the content of this field is stored in the Lucene index.<p>
530     *
531     * Please refer to the Lucene documentation about {@link org.apache.lucene.document.Field.Store}
532     * for the concept behind stored and unstored fields.<p>
533     *
534     * @param stored if <code>true</code>, then the field content is stored
535     */
536    public void setStored(boolean stored) {
537
538        m_stored = stored;
539    }
540
541    /**
542     * @see java.lang.Object#toString()
543     */
544    @Override
545    public String toString() {
546
547        return getName();
548    }
549}