001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.fields;
029
030import org.opencms.file.CmsPropertyDefinition;
031
032import java.util.ArrayList;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.Iterator;
036import java.util.List;
037import java.util.Map;
038import java.util.Set;
039
040import org.apache.lucene.analysis.Analyzer;
041import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
042import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
043
044/**
045 * Describes a configuration of fields that are used in building a search index.<p>
046 *
047 * @since 7.0.0
048 */
049public class CmsLuceneFieldConfiguration extends CmsSearchFieldConfiguration {
050
051    /**
052     * The default for the standard search configuration.<p>
053     *
054     * This defines the default that is used in case no "standard" field configuration
055     * is defined in <code>opencms-search.xml</code>.<p>
056     */
057    public static final CmsLuceneFieldConfiguration DEFAULT_STANDARD = createStandardConfiguration();
058
059    /** The description for the standard field configuration. */
060    public static final String STR_STANDARD_DESCRIPTION = "The standard OpenCms search index field configuration.";
061
062    /** The fields that will be returned by a regular search (all stored and not lazy fields). */
063    private static Set<String> m_returnFields = new HashSet<String>();
064
065    /** The serial version id. */
066    private static final long serialVersionUID = 8011265789649614792L;
067
068    static {
069        m_returnFields.add(CmsSearchField.FIELD_CATEGORY);
070        m_returnFields.add(CmsSearchField.FIELD_DATE_CONTENT);
071        m_returnFields.add(CmsSearchField.FIELD_DATE_CREATED);
072        m_returnFields.add(CmsSearchField.FIELD_DATE_EXPIRED);
073        m_returnFields.add(CmsSearchField.FIELD_DATE_LASTMODIFIED);
074        m_returnFields.add(CmsSearchField.FIELD_DATE_RELEASED);
075        m_returnFields.add(CmsSearchField.FIELD_PARENT_FOLDERS);
076        m_returnFields.add(CmsSearchField.FIELD_PATH);
077        m_returnFields.add(CmsSearchField.FIELD_SUFFIX);
078        m_returnFields.add(CmsSearchField.FIELD_TYPE);
079    }
080
081    /** Contains all names of the fields that are used in the excerpt. */
082    private List<String> m_excerptFieldNames;
083
084    /** The field added flag. */
085    private boolean m_fieldAdded;
086
087    /**
088     * Creates the default standard search configuration.<p>
089     *
090     * This defines the default that is used in case no "standard" field configuration
091     * is defined in <code>opencms-search.xml</code>.<p>
092     *
093     * @return the default standard search configuration
094     */
095    private static CmsLuceneFieldConfiguration createStandardConfiguration() {
096
097        CmsLuceneFieldConfiguration result = new CmsLuceneFieldConfiguration();
098        result.setName(STR_STANDARD);
099        result.setDescription(STR_STANDARD_DESCRIPTION);
100
101        CmsLuceneField field;
102        // content mapping, store as compressed value
103        field = new CmsLuceneField(
104            CmsSearchField.FIELD_CONTENT,
105            "%(key.field.content)",
106            true,
107            true,
108            true,
109            true,
110            true,
111            null,
112            null);
113        field.addMapping(new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, null, true));
114        result.addField(field);
115
116        // title mapping as a keyword
117        field = new CmsLuceneField(
118            CmsSearchField.FIELD_TITLE,
119            CmsLuceneField.IGNORE_DISPLAY_NAME,
120            true,
121            true,
122            false,
123            false,
124            null);
125        field.addMapping(
126            new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE, true));
127        result.addField(field);
128
129        // title mapping as indexed field
130        field = new CmsLuceneField(CmsSearchField.FIELD_TITLE_UNSTORED, "%(key.field.title)", false, true);
131        field.addMapping(
132            new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE, true));
133        result.addField(field);
134
135        // mapping of "Keywords" property to search field with the same name
136        field = new CmsLuceneField(CmsSearchField.FIELD_KEYWORDS, "%(key.field.keywords)", true, true);
137        field.addMapping(
138            new CmsSearchFieldMapping(
139                CmsSearchFieldMappingType.PROPERTY,
140                CmsPropertyDefinition.PROPERTY_KEYWORDS,
141                true));
142        result.addField(field);
143
144        // mapping of "Description" property to search field with the same name
145        field = new CmsLuceneField(CmsSearchField.FIELD_DESCRIPTION, "%(key.field.description)", true, true);
146        field.addMapping(
147            new CmsSearchFieldMapping(
148                CmsSearchFieldMappingType.PROPERTY,
149                CmsPropertyDefinition.PROPERTY_DESCRIPTION,
150                true));
151        result.addField(field);
152
153        // "meta" field is a combination of "Title", "Keywords" and "Description" properties
154        field = new CmsLuceneField(CmsSearchField.FIELD_META, "%(key.field.meta)", false, true);
155        field.addMapping(
156            new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE, true));
157        field.addMapping(
158            new CmsSearchFieldMapping(
159                CmsSearchFieldMappingType.PROPERTY,
160                CmsPropertyDefinition.PROPERTY_KEYWORDS,
161                true));
162        field.addMapping(
163            new CmsSearchFieldMapping(
164                CmsSearchFieldMappingType.PROPERTY,
165                CmsPropertyDefinition.PROPERTY_DESCRIPTION,
166                true));
167        result.addField(field);
168
169        return result;
170    }
171
172    /**
173     *
174     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#addField(org.opencms.search.fields.CmsSearchField)
175     */
176    @Override
177    public void addField(CmsSearchField field) {
178
179        super.addField(field);
180        m_fieldAdded = true;
181    }
182
183    /**
184     * Returns an analyzer that wraps the given base analyzer with the analyzers of this individual field configuration.<p>
185     *
186     * @param analyzer the base analyzer to wrap
187     *
188     * @return an analyzer that wraps the given base analyzer with the analyzers of this individual field configuration
189     */
190    public Analyzer getAnalyzer(Analyzer analyzer) {
191
192        // parent folder and last modified lookup fields must use whitespace analyzer
193        WhitespaceAnalyzer ws = new WhitespaceAnalyzer();
194        Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>();
195        // first make map the default hard coded fields
196        analyzers.put(CmsSearchField.FIELD_PARENT_FOLDERS, ws);
197        analyzers.put(CmsSearchField.FIELD_CATEGORY, ws);
198        analyzers.put(CmsSearchField.FIELD_DATE_LASTMODIFIED_LOOKUP, ws);
199        analyzers.put(CmsSearchField.FIELD_DATE_CREATED_LOOKUP, ws);
200
201        for (CmsLuceneField field : getLuceneFields()) {
202            Analyzer fieldAnalyzer = field.getAnalyzer();
203            if (fieldAnalyzer != null) {
204                // this field has an individual analyzer configured
205                analyzers.put(field.getName(), fieldAnalyzer);
206            }
207        }
208        // return the individual field configured analyzer
209        return new PerFieldAnalyzerWrapper(analyzer, analyzers);
210    }
211
212    /**
213     * Returns a list of all field names (Strings) that are used in generating the search excerpt.<p>
214     *
215     * @return a list of all field names (Strings) that are used in generating the search excerpt
216     */
217    public List<String> getExcerptFieldNames() {
218
219        if (m_excerptFieldNames == null) {
220            // lazy initialize the field names
221            m_excerptFieldNames = new ArrayList<String>();
222            Iterator<CmsSearchField> i = getFields().iterator();
223            while (i.hasNext()) {
224                CmsLuceneField field = (CmsLuceneField)i.next();
225                if (field.isInExcerptAndStored()) {
226                    m_excerptFieldNames.add(field.getName());
227                }
228            }
229        }
230
231        // create a copy of the list to prevent changes in other classes
232        return new ArrayList<String>(m_excerptFieldNames);
233    }
234
235    /**
236     * Returns the field names used for the excerpt generation.<p>
237     *
238     * @return the field names used for the excerpt generation
239     */
240    public Set<String> getExcerptFields() {
241
242        return new HashSet<String>(getExcerptFieldNames());
243    }
244
245    /**
246     * Returns a list of the concrete Lucene search fields.<p>
247     *
248     * @return a list of lucene search fields
249     */
250    public List<CmsLuceneField> getLuceneFields() {
251
252        List<CmsLuceneField> result = new ArrayList<CmsLuceneField>();
253        for (CmsSearchField field : getFields()) {
254            if (field instanceof CmsLuceneField) {
255                result.add((CmsLuceneField)field);
256            }
257        }
258        return result;
259    }
260
261    /**
262     * Returns the field names used for a regular result.<p>
263     *
264     * @return the field names used for a regular result
265     */
266    public Set<String> getReturnFields() {
267
268        if (m_fieldAdded) {
269            for (CmsSearchField field : getLuceneFields()) {
270                if (field.isStored() && !LAZY_FIELDS.contains(field.getName())) {
271                    m_returnFields.add(field.getName());
272                }
273            }
274        }
275        m_fieldAdded = false;
276        return m_returnFields;
277    }
278}