001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.fields;
029
030import org.opencms.search.CmsSearchManager;
031import org.opencms.util.CmsStringUtil;
032
033import org.apache.lucene.analysis.Analyzer;
034import org.apache.lucene.document.Field;
035import org.apache.lucene.document.FieldType;
036import org.apache.lucene.index.IndexOptions;
037
038/**
039 * An individual field configuration in a Lucene search index.<p>
040 *
041 * @since 7.0.0
042 */
043public class CmsLuceneField extends CmsSearchField {
044
045    /** Value of m_displayName if field should not be displayed. */
046    public static final String IGNORE_DISPLAY_NAME = "-";
047
048    /** Constant for the "compress" index setting. */
049    public static final String STR_COMPRESS = "compress";
050
051    /** Constant for the "no" index setting. */
052    public static final String STR_NO = "no";
053
054    /** Constant for the "tokenized" index setting. */
055    public static final String STR_TOKENIZED = "tokenized";
056
057    /** Constant for the "untokenized" index setting. */
058    public static final String STR_UN_TOKENIZED = "untokenized";
059
060    /** Constant for the "yes" index setting. */
061    public static final String STR_YES = "yes";
062
063    /** The serial version UID. */
064    private static final long serialVersionUID = -4946013624087640706L;
065
066    /** The special analyzer to use for this field. */
067    private transient Analyzer m_analyzer;
068
069    /** Indicates if the content of this field is compressed. */
070    private boolean m_compressed;
071
072    /** Indicates if this field should be displayed. */
073    private boolean m_displayed;
074
075    /** The display name of the field. */
076    private String m_displayName;
077
078    /** The display name set from the configuration. */
079    private String m_displayNameForConfiguration;
080
081    /** Indicates if the content of this field should be tokenized. */
082    private boolean m_tokenized;
083
084    /** The type used to convert a field to a Solr field. */
085    private String m_type;
086
087    /**
088     * Creates a new search field configuration.<p>
089     */
090    public CmsLuceneField() {
091
092        super();
093    }
094
095    /**
096     * Creates a new search field configuration.<p>
097     *
098     * The field will be tokenized if it is indexed.
099     * The field will not be in the excerpt.
100     * There is no default value.<p>
101     *
102     * @param name the name of the field, see {@link #setName(String)}
103     * @param displayName the display name of this field, see {@link #setDisplayName(String)}
104     * @param isStored controls if the field is stored and in the excerpt, see {@link #setStored(boolean)}
105     * @param isIndexed controls if the field is indexed and tokenized, see {@link #setIndexed(boolean)}
106     */
107    public CmsLuceneField(String name, String displayName, boolean isStored, boolean isIndexed) {
108
109        this(name, displayName, isStored, isIndexed, isIndexed, false, null);
110    }
111
112    /**
113     * Creates a new search field configuration.<p>
114     *
115     * @param name the name of the field, see {@link #setName(String)}
116     * @param displayName the display name of this field, see {@link #setDisplayName(String)}
117     * @param isStored controls if the field is stored, see {@link #setStored(boolean)}
118     * @param isCompressed controls if the filed is compressed, see {@link #setCompressed(boolean)}
119     * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)}
120     * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)}
121     * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()}
122     * @param analyzer the analyzer to use, see {@link #setAnalyzer(Analyzer)}
123     * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)}
124     */
125    public CmsLuceneField(
126        String name,
127        String displayName,
128        boolean isStored,
129        boolean isCompressed,
130        boolean isIndexed,
131        boolean isTokenized,
132        boolean isInExcerpt,
133        Analyzer analyzer,
134        String defaultValue) {
135
136        super(name, defaultValue);
137        setDisplayName(displayName);
138        setStored(isStored);
139        setCompressed(isCompressed);
140        setIndexed(isIndexed);
141        setTokenized(isTokenized);
142        setInExcerpt(isInExcerpt);
143        setAnalyzer(analyzer);
144    }
145
146    /**
147     * Creates a new search field configuration.<p>
148     *
149     * @param name the name of the field, see {@link #setName(String)}
150     * @param displayName the display name of this field, see {@link #setDisplayName(String)}
151     * @param isStored controls if the field is stored, see {@link #setStored(boolean)}
152     * @param isIndexed controls if the field is indexed, see {@link #setIndexed(boolean)}
153     * @param isTokenized controls if the field is tokenized, see {@link #setStored(boolean)}
154     * @param isInExcerpt controls if the field is in the excerpt, see {@link #isInExcerptAndStored()}
155     * @param defaultValue the default value for the field, see {@link #setDefaultValue(String)}
156     */
157    public CmsLuceneField(
158        String name,
159        String displayName,
160        boolean isStored,
161        boolean isIndexed,
162        boolean isTokenized,
163        boolean isInExcerpt,
164        String defaultValue) {
165
166        this(name, displayName, isStored, false, isIndexed, isTokenized, isInExcerpt, null, defaultValue);
167    }
168
169    /**
170     * Closes the analyzer.<p>
171     */
172    public void closeAnalyzer() {
173
174        if (m_analyzer != null) {
175            m_analyzer.close();
176        }
177    }
178
179    /**
180     * Creates a field from the configuration and the provided content.<p>
181     *
182     * The configured name of the field as provided by {@link #getName()} is used.<p>
183     *
184     * If no valid content is provided (that is the content is either <code>null</code> or
185     * only whitespace), then no field is created and <code>null</code> is returned.<p>
186     *
187     * @param content the content to create the field with
188     *
189     * @return a field created from the configuration and the provided content
190     */
191    public Field createField(String content) {
192
193        return createField(getName(), content);
194    }
195
196    /**
197     * Creates a field with the given name from the configuration and the provided content.<p>
198     *
199     * If no valid content is provided (that is the content is either <code>null</code> or
200     * only whitespace), then no field is created and <code>null</code> is returned.<p>
201     *
202     * @param name the name of the field to create
203     * @param content the content to create the field with
204     *
205     * @return a field with the given name from the configuration and the provided content
206     */
207    public Field createField(String name, String content) {
208
209        if (CmsStringUtil.isEmptyOrWhitespaceOnly(content)) {
210            content = getDefaultValue();
211        }
212        if (content != null) {
213            final FieldType ft = new FieldType();
214            if (isIndexed()) {
215                if (isTokenizedAndIndexed()) {
216                    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
217                    ft.setTokenized(true);
218                } else {
219                    ft.setIndexOptions(IndexOptions.DOCS);
220                    ft.setTokenized(false);
221                }
222            }
223            ft.setStored(isStored() || isCompressed());
224            Field result = new Field(name, content, ft);
225            return result;
226        }
227        return null;
228    }
229
230    /**
231     * Returns the analyzer used for this field.<p>
232     *
233     * @return the analyzer used for this field
234     */
235    public Analyzer getAnalyzer() {
236
237        return m_analyzer;
238    }
239
240    /**
241     * Returns the display name of the field.<p>
242     *
243     * @return the display name of the field
244     */
245    public String getDisplayName() {
246
247        if (!isDisplayed()) {
248            return IGNORE_DISPLAY_NAME;
249        }
250        if (m_displayName == null) {
251            return getName();
252        } else {
253            return m_displayName;
254        }
255    }
256
257    /**
258     * Returns the displayNameForConfiguration.<p>
259     *
260     * @return the displayNameForConfiguration
261     */
262    public String getDisplayNameForConfiguration() {
263
264        return m_displayNameForConfiguration;
265    }
266
267    /**
268     * Returns the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index.<p>
269     *
270     * @return the String value state of this field if it is indexed (and possibly tokenized) in the Lucene index
271     *
272     * @see #isTokenizedAndIndexed()
273     * @see #isIndexed()
274     */
275    @Override
276    public String getIndexed() {
277
278        if (isTokenizedAndIndexed()) {
279            return String.valueOf(isTokenizedAndIndexed());
280        }
281        if (isIndexed()) {
282            return STR_UN_TOKENIZED;
283        } else {
284            return String.valueOf(isIndexed());
285        }
286    }
287
288    /**
289     * Returns the type.<p>
290     *
291     * @return the type
292     */
293    public String getType() {
294
295        return m_type;
296    }
297
298    /**
299     * Returns <code>true</code> if the content of this field is compressed.<p>
300     *
301     * If the field is compressed, it must also be stored, this means
302     * {@link #isStored()} will always return <code>true</code> for compressed fields.<p>
303     *
304     * @return <code>true</code> if the content of this field is compressed
305     */
306    public boolean isCompressed() {
307
308        return m_compressed;
309    }
310
311    /**
312     * Returns true if the field should be displayed.<p>
313     *
314     * @return returns true if the field should be displayed otherwise false
315     */
316    public boolean isDisplayed() {
317
318        return m_displayed;
319    }
320
321    /**
322     * Returns <code>true</code> if this fields content is used in the search result excerpt.<p>
323     *
324     * A field can only be used in the excerpt if it is stored, see {@link #isStored()}.<p>
325     *
326     * @return <code>true</code> if this fields content is used in the search result excerpt
327     *
328     * @see #isStored()
329     */
330    public boolean isInExcerptAndStored() {
331
332        return isInExcerpt() && isStored();
333    }
334
335    /**
336     * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p>
337     *
338     * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p>
339     *
340     * @return <code>true</code> if the content of this field is tokenized in the Lucene index
341     */
342    public boolean isTokenized() {
343
344        return m_tokenized;
345    }
346
347    /**
348     * Returns <code>true</code> if the content of this field is tokenized in the Lucene index.<p>
349     *
350     * A field can only be tokenized if it is also indexed, see {@link #isIndexed()}.<p>
351     *
352     * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p>
353     *
354     * @return <code>true</code> if the content of this field is tokenized in the Lucene index
355     *
356     * @see #isStored()
357     * @see #isIndexed()
358     */
359    public boolean isTokenizedAndIndexed() {
360
361        return m_tokenized && isIndexed();
362    }
363
364    /**
365     * Sets the analyzer used for this field.<p>
366     *
367     * @param analyzer the analyzer to set
368     */
369    public void setAnalyzer(Analyzer analyzer) {
370
371        m_analyzer = analyzer;
372    }
373
374    /**
375     * Sets the analyzer used for this field.<p>
376     *
377     * The parameter must be a name of a class the implements the Lucene {@link Analyzer} interface.
378     *
379     * @param analyzerName the analyzer class name to set
380     *
381     * @throws Exception in case of problems creating the analyzer class instance
382     */
383    public void setAnalyzer(String analyzerName) throws Exception {
384
385        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(analyzerName)) {
386            setAnalyzer(CmsSearchManager.getAnalyzer(analyzerName));
387        }
388    }
389
390    /**
391     * Controls if this field value will be stored compressed or not.<p>
392     *
393     * If this is set to <code>true</code>, the value for {@link #isStored()} will also
394     * be set to <code>true</code>, since compressed fields are always stored.<p>
395     *
396     * @param compressed if <code>true</code>, the field value will be stored compressed
397     */
398    public void setCompressed(boolean compressed) {
399
400        m_compressed = compressed;
401        if (compressed) {
402            setStored(true);
403        }
404    }
405
406    /**
407     * Controls if the field is displayed or not.<p>
408     *
409     * @param displayed if true the field is displayed
410     */
411    public void setDisplayed(boolean displayed) {
412
413        m_displayed = displayed;
414    }
415
416    /**
417     * Sets the display name. If the given name equals IGNORE_DISPLAY_NAME the field is not displayed.<p>
418     *
419     * @param displayName the display name to set
420     */
421    public void setDisplayName(String displayName) {
422
423        if (CmsStringUtil.isEmpty(displayName) || (IGNORE_DISPLAY_NAME.equals(displayName))) {
424            m_displayName = null;
425            setDisplayed(false);
426        } else {
427            m_displayName = displayName;
428            m_displayNameForConfiguration = displayName;
429            setDisplayed(true);
430        }
431    }
432
433    /**
434     * Sets the displayNameForConfiguration.<p>
435     *
436     * @param displayNameForConfiguration the displayNameForConfiguration to set
437     */
438    public void setDisplayNameForConfiguration(String displayNameForConfiguration) {
439
440        m_displayNameForConfiguration = displayNameForConfiguration;
441        setDisplayName(displayNameForConfiguration);
442    }
443
444    /**
445     * Controls if the content of this field is indexed (and possibly tokenized) in the Lucene index from a String parameter.<p>
446     *
447     * This sets the values for {@link #isIndexed()} as well as {@link #isTokenizedAndIndexed()}.<p>
448     *
449     * The parameter can have the following values:
450     * <ul>
451     * <li><b>"true"</b> or <b>"tokenized"</b>: The field is indexed and tokenized.
452     * <li><b>"false"</b> or <b>"no"</b>: The field is not indexed and not tokenized.
453     * <li><b>"untokenized"</b>: The field is indexed but not tokenized.
454     * </ul>
455     *
456     * @param indexed the index setting to use
457     *
458     * @see #setIndexed(boolean)
459     * @see #setTokenized(boolean)
460     */
461    public void setIndexed(String indexed) {
462
463        boolean isIndexed = false;
464        boolean isTokenized = false;
465        if (indexed != null) {
466            indexed = indexed.trim().toLowerCase();
467            if (STR_TOKENIZED.equals(indexed)) {
468                isIndexed = true;
469                isTokenized = true;
470            } else if (STR_UN_TOKENIZED.equals(indexed)) {
471                isIndexed = true;
472            } else if (STR_NO.equals(indexed)) {
473                // "no", both values will be false
474            } else {
475                // only "true" or "false" remain
476                isIndexed = Boolean.valueOf(indexed).booleanValue();
477                isTokenized = isIndexed;
478            }
479        }
480        setIndexed(isIndexed);
481        setTokenized(isTokenized);
482    }
483
484    /**
485     * Controls if this fields content is used in the search result excerpt.<p>
486     *
487     * @param excerpt if <code>"true"</code>, then this fields content is used in the search excerpt
488     *
489     * @see #setInExcerpt(boolean)
490     */
491    public void setInExcerpt(String excerpt) {
492
493        setInExcerpt(Boolean.valueOf(String.valueOf(excerpt)).booleanValue());
494    }
495
496    /**
497     * Controls if the content of this field is stored in the Lucene index from a String parameter.<p>
498     *
499     * @param stored if <code>"true"</code>, then the field content is stored
500     *
501     * @see #setStored(boolean)
502     */
503    public void setStored(String stored) {
504
505        boolean isStored = false;
506        boolean isCompressed = false;
507        if (stored != null) {
508            stored = stored.trim().toLowerCase();
509            if (STR_COMPRESS.equals(stored)) {
510                isCompressed = true;
511                isStored = true;
512            } else if (STR_YES.equals(stored)) {
513                // "yes", value will be stored but not compressed
514                isStored = true;
515            } else {
516                // only "true" or "false" remain
517                isStored = Boolean.valueOf(stored).booleanValue();
518            }
519        }
520        setStored(isStored);
521        setCompressed(isCompressed);
522    }
523
524    /**
525     * Controls if the content of this field is tokenized in the Lucene index.<p>
526     *
527     * Please refer to the Lucene documentation about the concept behind tokenized and untokenized fields.<p>
528     *
529     * @param tokenized if <code>true</code>, then the field content is tokenized
530     *
531     * @see #setStored(boolean)
532     */
533    public void setTokenized(boolean tokenized) {
534
535        m_tokenized = tokenized;
536    }
537
538    /**
539     * Sets the type.<p>
540     *
541     * @param type the type to set
542     */
543    public void setType(String type) {
544
545        m_type = type;
546    }
547}