001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: http://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: http://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search.solr;
033
034import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
035import org.opencms.configuration.I_CmsXmlConfiguration;
036import org.opencms.file.CmsFile;
037import org.opencms.file.CmsObject;
038import org.opencms.file.CmsProperty;
039import org.opencms.file.CmsPropertyDefinition;
040import org.opencms.file.CmsResource;
041import org.opencms.file.types.CmsResourceTypeJsp;
042import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
043import org.opencms.file.types.CmsResourceTypeXmlContent;
044import org.opencms.file.types.CmsResourceTypeXmlPage;
045import org.opencms.i18n.CmsLocaleManager;
046import org.opencms.loader.CmsResourceManager;
047import org.opencms.main.CmsException;
048import org.opencms.main.CmsLog;
049import org.opencms.main.OpenCms;
050import org.opencms.search.CmsSearchIndexSource;
051import org.opencms.search.CmsSearchUtil;
052import org.opencms.search.I_CmsSearchDocument;
053import org.opencms.search.documents.CmsDocumentDependency;
054import org.opencms.search.extractors.I_CmsExtractionResult;
055import org.opencms.search.fields.CmsLuceneField;
056import org.opencms.search.fields.CmsSearchField;
057import org.opencms.search.fields.CmsSearchFieldConfiguration;
058import org.opencms.search.fields.CmsSearchFieldMapping;
059import org.opencms.search.fields.CmsSearchFieldMappingType;
060import org.opencms.search.fields.I_CmsSearchFieldMapping;
061import org.opencms.util.CmsStringUtil;
062import org.opencms.util.CmsVfsUtil;
063import org.opencms.xml.CmsXmlContentDefinition;
064import org.opencms.xml.containerpage.CmsContainerElementBean;
065import org.opencms.xml.containerpage.CmsContainerPageBean;
066import org.opencms.xml.containerpage.CmsXmlContainerPage;
067import org.opencms.xml.containerpage.CmsXmlContainerPageFactory;
068import org.opencms.xml.content.I_CmsXmlContentHandler;
069
070import java.util.ArrayList;
071import java.util.Arrays;
072import java.util.Collection;
073import java.util.Collections;
074import java.util.Date;
075import java.util.HashMap;
076import java.util.List;
077import java.util.Locale;
078import java.util.Map;
079import java.util.Set;
080
081import org.apache.commons.logging.Log;
082import org.apache.solr.common.SolrInputDocument;
083
084/**
085 * The search field implementation for Solr.<p>
086 *
087 * @since 8.5.0
088 */
089public class CmsSolrFieldConfiguration extends CmsSearchFieldConfiguration {
090
091    /** The log object for this class. */
092    private static final Log LOG = CmsLog.getLog(CmsSolrFieldConfiguration.class);
093
094    /** The content locale for the indexed document is stored in order to save performance. */
095    private Collection<Locale> m_contentLocales;
096
097    /** A list of Solr fields. */
098    private Map<String, CmsSolrField> m_solrFields = new HashMap<String, CmsSolrField>();
099
100    /**
101     * Default constructor.<p>
102     */
103    public CmsSolrFieldConfiguration() {
104
105        super();
106    }
107
108    /**
109     * Adds the additional fields to the configuration, if they are not null.<p>
110     *
111     * @param additionalFields the additional fields to add
112     */
113    public void addAdditionalFields(List<CmsSolrField> additionalFields) {
114
115        if (additionalFields != null) {
116            for (CmsSolrField solrField : additionalFields) {
117                m_solrFields.put(solrField.getName(), solrField);
118            }
119        }
120    }
121
122    /**
123     * Returns all configured Solr fields.<p>
124     *
125     * @return all configured Solr fields
126     */
127    public Map<String, CmsSolrField> getSolrFields() {
128
129        return Collections.unmodifiableMap(m_solrFields);
130    }
131
132    /**
133     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#init()
134     */
135    @Override
136    public void init() {
137
138        super.init();
139        addAdditionalFields();
140    }
141
142    /**
143     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendAdditionalValuesToDcoument(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
144     */
145    @Override
146    protected I_CmsSearchDocument appendAdditionalValuesToDcoument(
147        I_CmsSearchDocument document,
148        CmsObject cms,
149        CmsResource resource,
150        I_CmsExtractionResult extractionResult,
151        List<CmsProperty> properties,
152        List<CmsProperty> propertiesSearched) {
153
154        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getName(), null);
155        if (mimeType != null) {
156            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_MIMETYPE), mimeType);
157        }
158
159        document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_FILENAME), resource.getName());
160
161        document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_VERSION), "" + resource.getVersion());
162
163        try {
164            if (CmsResourceTypeXmlContent.isXmlContent(resource)) {
165                I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource);
166                if ((handler != null) && handler.isContainerPageOnly()) {
167                    if (document.getDocument() instanceof SolrInputDocument) {
168                        SolrInputDocument doc = (SolrInputDocument)document.getDocument();
169                        doc.removeField(CmsSearchField.FIELD_SEARCH_EXCLUDE);
170                    } else {
171                        //TODO: Warning - but should not happen.
172                    }
173                    document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "true");
174                }
175            }
176        } catch (CmsException e) {
177            LOG.error(e.getMessage(), e);
178        }
179
180        List<String> searchExcludeOptions = document.getMultivaluedFieldAsStringList(
181            CmsSearchField.FIELD_SEARCH_EXCLUDE);
182        if ((searchExcludeOptions == null) || searchExcludeOptions.isEmpty()) {
183            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "false");
184        }
185        if (resource.getRootPath().startsWith("/system")
186            || (CmsResourceTypeJsp.getJSPTypeId() == resource.getTypeId())) {
187            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "gallery");
188        } else {
189            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "content");
190        }
191
192        document = appendFieldsForListSortOptions(document);
193
194        document = appendFieldsForListSearch(document, cms, resource);
195
196        if (resource.getRootPath().startsWith(OpenCms.getSiteManager().getSharedFolder())
197            || (null != OpenCms.getSiteManager().getSiteRoot(resource.getRootPath()))) {
198            appendSpellFields(document);
199        }
200
201        return document;
202    }
203
204    /**
205     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendDates(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
206     */
207    @Override
208    protected I_CmsSearchDocument appendDates(
209        I_CmsSearchDocument document,
210        CmsObject cms,
211        CmsResource resource,
212        I_CmsExtractionResult extractionResult,
213        List<CmsProperty> properties,
214        List<CmsProperty> propertiesSearched) {
215
216        document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), false);
217        document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), false);
218        document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false);
219        document.addDateField(CmsSearchField.FIELD_DATE_RELEASED, resource.getDateReleased(), false);
220        document.addDateField(CmsSearchField.FIELD_DATE_EXPIRED, resource.getDateExpired(), false);
221
222        return document;
223    }
224
225    /**
226     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMapping(org.opencms.search.I_CmsSearchDocument, org.opencms.search.fields.CmsSearchField, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
227     */
228    @Override
229    protected I_CmsSearchDocument appendFieldMapping(
230        I_CmsSearchDocument document,
231        CmsSearchField sfield,
232        CmsObject cms,
233        CmsResource resource,
234        I_CmsExtractionResult extractionResult,
235        List<CmsProperty> properties,
236        List<CmsProperty> propertiesSearched) {
237
238        CmsSolrField field = (CmsSolrField)sfield;
239        try {
240            StringBuffer text = new StringBuffer();
241            for (I_CmsSearchFieldMapping mapping : field.getMappings()) {
242                // loop over the mappings of the given field
243                if (extractionResult != null) {
244                    String mapResult = null;
245                    if ((field.getLocale() != null) && mapping.getType().equals(CmsSearchFieldMappingType.CONTENT)) {
246                        // this is a localized content field, try to retrieve the localized content extraction
247                        mapResult = extractionResult.getContent(field.getLocale());
248                        if (mapResult == null) {
249                            // no localized content extracted
250                            if (!(CmsResourceTypeXmlContent.isXmlContent(resource)
251                                || CmsResourceTypeXmlPage.isXmlPage(resource))) {
252                                // the resource is no XML content nor an XML page
253                                if ((m_contentLocales != null) && m_contentLocales.contains(field.getLocale())) {
254                                    // the resource to get the extracted content for has the locale of this field,
255                                    // so store the extraction content into this field
256                                    mapResult = extractionResult.getContent();
257                                }
258                            }
259                        }
260                    } else {
261                        // this is not a localized content field, just perform the regular mapping
262                        mapResult = mapping.getStringValue(
263                            cms,
264                            resource,
265                            extractionResult,
266                            properties,
267                            propertiesSearched);
268                    }
269                    if (text.length() > 0) {
270                        text.append('\n');
271                    }
272                    if (mapResult != null) {
273                        text.append(mapResult);
274                    } else if (mapping.getDefaultValue() != null) {
275                        // no mapping result found, but a default is configured
276                        text.append(mapping.getDefaultValue());
277                    }
278                } else if (mapping.getStringValue(
279                    cms,
280                    resource,
281                    extractionResult,
282                    properties,
283                    propertiesSearched) != null) {
284                        String value = mapping.getStringValue(
285                            cms,
286                            resource,
287                            extractionResult,
288                            properties,
289                            propertiesSearched);
290                        if (value != null) {
291                            document.addSearchField(field, value);
292                        }
293                    }
294            }
295            if ((text.length() <= 0) && (field.getDefaultValue() != null)) {
296                text.append(field.getDefaultValue());
297            }
298            if (text.length() > 0) {
299                document.addSearchField(field, text.toString());
300            }
301        } catch (Exception e) {
302            // nothing to do just log
303            LOG.error(e.getLocalizedMessage(), e);
304        }
305        return document;
306    }
307
308    /**
309     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMappings(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
310     */
311    @Override
312    protected I_CmsSearchDocument appendFieldMappings(
313        I_CmsSearchDocument document,
314        CmsObject cms,
315        CmsResource resource,
316        I_CmsExtractionResult extractionResult,
317        List<CmsProperty> properties,
318        List<CmsProperty> propertiesSearched) {
319
320        List<String> systemFields = new ArrayList<String>();
321        // append field mappings directly stored in the extraction result
322        if (null != extractionResult) {
323            Map<String, String> fieldMappings = extractionResult.getFieldMappings();
324            for (String fieldName : fieldMappings.keySet()) {
325                String value = fieldMappings.get(fieldName);
326                CmsSolrField f = new CmsSolrField(fieldName, null, null, null);
327                document.addSearchField(f, value);
328                systemFields.add(fieldName);
329            }
330        }
331
332        Set<CmsSearchField> mappedFields = getXSDMappings(cms, resource);
333        if (mappedFields != null) {
334            for (CmsSearchField field : mappedFields) {
335                if (!systemFields.contains(field.getName())) {
336                    document = appendFieldMapping(
337                        document,
338                        field,
339                        cms,
340                        resource,
341                        extractionResult,
342                        properties,
343                        propertiesSearched);
344                } else {
345                    LOG.error(
346                        Messages.get().getBundle().key(
347                            Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_2,
348                            resource.getRootPath(),
349                            field.getName()));
350                }
351            }
352        }
353
354        // add field mappings from elements of a container page
355        if (CmsResourceTypeXmlContainerPage.isContainerPage(resource)) {
356            document = appendFieldMappingsFromElementsOnThePage(document, cms, resource, systemFields);
357        } else {
358            try {
359                for (CmsResource detailOnlyPage : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) {
360                    try {
361                        document = appendFieldMappingsFromElementsOnThePage(
362                            document,
363                            cms,
364                            detailOnlyPage,
365                            systemFields);
366                    } catch (Throwable t) {
367                        LOG.warn(
368                            Messages.get().getBundle().key(
369                                Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_FOR_PAGE_2,
370                                null == resource ? "null" : resource.getRootPath(),
371                                null == detailOnlyPage ? "null" : detailOnlyPage.getRootPath()),
372                            t);
373                    }
374                }
375            } catch (Throwable t) {
376                LOG.warn(
377                    Messages.get().getBundle().key(
378                        Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_1,
379                        null == resource ? "null" : resource.getRootPath()),
380                    t);
381            }
382        }
383
384        for (CmsSolrField field : m_solrFields.values()) {
385            document = appendFieldMapping(
386                document,
387                field,
388                cms,
389                resource,
390                extractionResult,
391                properties,
392                propertiesSearched);
393        }
394
395        return document;
396    }
397
398    /**
399     * Adds search fields from elements on a container page to a container page's document.
400     * @param document The document for the container page
401     * @param cms The current CmsObject
402     * @param resource The resource of the container page
403     * @param systemFields The list of field names for fields where mappings to should be discarded, since these fields are used system internally.
404     * @return the manipulated document
405     */
406    protected I_CmsSearchDocument appendFieldMappingsFromElementsOnThePage(
407        I_CmsSearchDocument document,
408        CmsObject cms,
409        CmsResource resource,
410        List<String> systemFields) {
411
412        try {
413            CmsFile file = cms.readFile(resource);
414            CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file);
415            CmsContainerPageBean containerBean = containerPage.getContainerPage(cms);
416            if (containerBean != null) {
417                for (CmsContainerElementBean element : containerBean.getElements()) {
418                    element.initResource(cms);
419                    CmsResource elemResource = element.getResource();
420                    Set<CmsSearchField> mappedFields = getXSDMappingsForPage(cms, elemResource);
421                    if (mappedFields != null) {
422
423                        for (CmsSearchField field : mappedFields) {
424                            if (!systemFields.contains(field.getName())) {
425                                try {
426                                    I_CmsExtractionResult extractionResult = CmsSolrDocumentXmlContent.extractXmlContent(
427                                        cms,
428                                        elemResource,
429                                        getIndex());
430                                    document = appendFieldMapping(
431                                        document,
432                                        field,
433                                        cms,
434                                        elemResource,
435                                        extractionResult,
436                                        cms.readPropertyObjects(resource, false),
437                                        cms.readPropertyObjects(resource, true));
438                                } catch (Exception e) {
439                                    LOG.error(
440                                        Messages.get().getBundle().key(
441                                            Messages.LOG_SOLR_ERR_MAPPING_UNREADABLE_CONTENT_3,
442                                            elemResource.getRootPath(),
443                                            field.getName(),
444                                            resource.getRootPath()),
445                                        e);
446                                }
447                            } else {
448                                LOG.error(
449                                    Messages.get().getBundle().key(
450                                        Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_3,
451                                        elemResource.getRootPath(),
452                                        field.getName(),
453                                        resource.getRootPath()));
454                            }
455                        }
456                    }
457                }
458            }
459        } catch (CmsException e) {
460            // Should be thrown if element on the page does not exist anymore - this is possible, but not necessarily an error.
461            // Hence, just notice it in the debug log.
462            if (LOG.isDebugEnabled()) {
463                LOG.debug(e.getLocalizedMessage(), e);
464            }
465        }
466        return document;
467    }
468
469    /**
470     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendLocales(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
471     */
472    @Override
473    protected I_CmsSearchDocument appendLocales(
474        I_CmsSearchDocument document,
475        CmsObject cms,
476        CmsResource resource,
477        I_CmsExtractionResult extraction,
478        List<CmsProperty> properties,
479        List<CmsProperty> propertiesSearched) {
480
481        // append the resource locales
482        Collection<Locale> resourceLocales = new ArrayList<Locale>();
483        if ((extraction != null) && (!extraction.getLocales().isEmpty())) {
484
485            CmsResourceManager resMan = OpenCms.getResourceManager();
486            resourceLocales = extraction.getLocales();
487            boolean isGroup = false;
488            for (String groupType : Arrays.asList(
489                CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME,
490                CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME)) {
491                if (resMan.matchResourceType(groupType, resource.getTypeId())) {
492                    isGroup = true;
493                    break;
494                }
495            }
496            if (isGroup) {
497                // groups are locale independent, so they have to have *all* locales so they are found for each one
498                m_contentLocales = OpenCms.getLocaleManager().getAvailableLocales();
499            } else {
500                m_contentLocales = resourceLocales;
501            }
502        } else {
503            // For all other resources add all default locales
504            resourceLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource);
505
506            /*
507             * A problem is likely to arise when dealing with multilingual fields:
508             * Only values extracted from XML resources are written into the Solr locale-aware fields (e.g.
509             * "title_<locale>_s"), therefore sorting by them will not work as non-XML (unilingual) resources extract
510             * the information by the resource property facility and will not write to an Solr locale-aware field.
511             *
512             * The following code is used to fix this behavior, at least for "Title".
513             */
514
515            // Check all passed properties for "Title"...
516            for (final CmsProperty prop : propertiesSearched) {
517                if (prop.getName().equals(CmsPropertyDefinition.PROPERTY_TITLE)) {
518                    final String value = prop.getValue();
519
520                    // Write a Solr locale-aware field for every locale the system supports...
521                    final List<Locale> availableLocales = OpenCms.getLocaleManager().getAvailableLocales();
522                    for (final Locale locale : availableLocales) {
523                        final String lang = locale.getLanguage();
524                        // Don't proceed if a field has already written for this locale.
525                        if (!resourceLocales.contains(lang)) {
526                            final String effFieldName = CmsSearchFieldConfiguration.getLocaleExtendedName(
527                                CmsSearchField.FIELD_TITLE_UNSTORED,
528                                locale) + "_s";
529
530                            final CmsSolrField f = new CmsSolrField(effFieldName, null, null, null);
531                            document.addSearchField(f, value);
532                        }
533                    }
534                }
535            }
536            m_contentLocales = getContentLocales(cms, resource, extraction);
537        }
538
539        document.addResourceLocales(resourceLocales);
540        document.addContentLocales(m_contentLocales);
541
542        // append document dependencies if configured
543        if (hasLocaleDependencies()) {
544            CmsDocumentDependency dep = CmsDocumentDependency.load(cms, resource);
545            ((CmsSolrDocument)document).addDocumentDependency(cms, dep);
546        }
547        return document;
548    }
549
550    /**
551     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendProperties(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
552     */
553    @Override
554    protected I_CmsSearchDocument appendProperties(
555        I_CmsSearchDocument document,
556        CmsObject cms,
557        CmsResource resource,
558        I_CmsExtractionResult extraction,
559        List<CmsProperty> properties,
560        List<CmsProperty> propertiesSearched) {
561
562        for (CmsProperty prop : propertiesSearched) {
563            if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) {
564                String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue());
565                document.addSearchField(
566                    new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES, null, null, null),
567                    value);
568
569                // Also write the property using the dynamic field '_s' in order to prevent tokenization
570                // of the property. The resulting field is named '<property>_prop_s'.
571                document.addSearchField(
572                    new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES + "_s", null, null, null),
573                    value);
574            }
575        }
576
577        for (CmsProperty prop : properties) {
578            if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) {
579                String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue());
580                document.addSearchField(
581                    new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT, null, null, null),
582                    value);
583
584                // Also write the property using the dynamic field '_s' in order to prevent tokenization
585                // of the property. The resulting field is named '<property>_prop_nosearch_s'.
586                document.addSearchField(
587                    new CmsSolrField(
588                        prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT + "_s",
589                        null,
590                        null,
591                        null),
592                    value);
593            }
594        }
595        return document;
596    }
597
598    /**
599     * Retrieves the locales for an content, that is whether an XML content nor an XML page.<p>
600     *
601     * Uses following strategy:
602     * <ul>
603     * <li>first by file name</li>
604     * <li>then by detection and</li>
605     * <li>otherwise take the first configured default locale for this resource</li>
606     * </ul>
607     *
608     * @param cms the current CmsObject
609     * @param resource the resource to get the content locales for
610     * @param extraction the extraction result
611     *
612     * @return the determined locales for the given resource
613     */
614    protected List<Locale> getContentLocales(CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction) {
615
616        // try to detect locale by filename
617        Locale detectedLocale = CmsStringUtil.getLocaleForName(resource.getRootPath());
618        if (!OpenCms.getLocaleManager().getAvailableLocales(cms, resource).contains(detectedLocale)) {
619            detectedLocale = null;
620        }
621        // try to detect locale by language detector
622        if (getIndex().isLanguageDetection()
623            && (detectedLocale == null)
624            && (extraction != null)
625            && (extraction.getContent() != null)) {
626            detectedLocale = CmsStringUtil.getLocaleForText(extraction.getContent());
627        }
628        // take the detected locale or use the first configured default locale for this resource
629        List<Locale> result = new ArrayList<Locale>();
630        if (detectedLocale != null) {
631            // take the found locale
632            result.add(detectedLocale);
633        } else {
634
635            // take all locales set via locale-available or the configured default locales as fall-back for this resource
636            result.addAll(OpenCms.getLocaleManager().getAvailableLocales(cms, resource));
637            LOG.debug(Messages.get().getBundle().key(Messages.LOG_LANGUAGE_DETECTION_FAILED_1, resource));
638        }
639        return result;
640    }
641
642    /**
643     * Returns the search field mappings declared within the XSD.<p>
644     *
645     * @param cms the CmsObject
646     * @param resource the resource
647     *
648     * @return the fields to map
649     */
650    protected Set<CmsSearchField> getXSDMappings(CmsObject cms, CmsResource resource) {
651
652        try {
653            if (CmsResourceTypeXmlContent.isXmlContent(resource)) {
654                I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource);
655                if ((handler != null) && !handler.getSearchFields().isEmpty()) {
656                    return handler.getSearchFields();
657                }
658            }
659        } catch (CmsException e) {
660            LOG.error(e.getMessage(), e);
661        }
662        return null;
663    }
664
665    /**
666     * Returns the search field mappings declared within the XSD that should be applied to the container page.<p>
667     *
668     * @param cms the CmsObject
669     * @param resource the resource
670     *
671     * @return the fields to map
672     */
673    protected Set<CmsSearchField> getXSDMappingsForPage(CmsObject cms, CmsResource resource) {
674
675        try {
676            if (CmsResourceTypeXmlContent.isXmlContent(resource)) {
677                I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource);
678                if ((handler != null) && !handler.getSearchFieldsForPage().isEmpty()) {
679                    return handler.getSearchFieldsForPage();
680                }
681            }
682        } catch (CmsException e) {
683            LOG.error(e.getMessage(), e);
684        }
685        return null;
686    }
687
688    /**
689     * Adds additional fields to this field configuration.<p>
690     */
691    private void addAdditionalFields() {
692
693        /*
694         * Add fields from opencms-search.xml (Lucene fields)
695         */
696        for (CmsSearchField field : getFields()) {
697            if (field instanceof CmsLuceneField) {
698                CmsSolrField newSolrField = new CmsSolrField((CmsLuceneField)field);
699                m_solrFields.put(newSolrField.getName(), newSolrField);
700            }
701        }
702
703        /*
704         * Add the content fields (multiple for contents with more than one locale)
705         */
706        // add the content_<locale> fields to this configuration
707        CmsSolrField solrField = new CmsSolrField(CmsSearchField.FIELD_CONTENT, null, null, null);
708        solrField.addMapping(
709            new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT));
710        m_solrFields.put(solrField.getName(), solrField);
711        for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) {
712            solrField = new CmsSolrField(
713                CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale),
714                Collections.singletonList(locale.toString() + CmsSearchField.FIELD_EXCERPT),
715                locale,
716                null);
717            solrField.addMapping(
718                new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT));
719            m_solrFields.put(solrField.getName(), solrField);
720        }
721
722        /*
723         * Fields filled within appendFields
724         */
725        CmsSolrField sfield = new CmsSolrField(CmsSearchField.FIELD_MIMETYPE, null, null, null);
726        m_solrFields.put(sfield.getName(), sfield);
727
728        sfield = new CmsSolrField(CmsSearchField.FIELD_FILENAME, null, null, null);
729        m_solrFields.put(sfield.getName(), sfield);
730
731        sfield = new CmsSolrField(CmsSearchField.FIELD_VERSION, null, null, null);
732        m_solrFields.put(sfield.getName(), sfield);
733
734        sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_CHANNEL, null, null, null);
735        m_solrFields.put(sfield.getName(), sfield);
736
737        /*
738         * Fields with mapping
739         */
740        sfield = new CmsSolrField(CmsSearchField.FIELD_STATE, null, null, null);
741        CmsSearchFieldMapping map = new CmsSearchFieldMapping(
742            CmsSearchFieldMappingType.ATTRIBUTE,
743            CmsSearchField.FIELD_STATE);
744        sfield.addMapping(map);
745        m_solrFields.put(sfield.getName(), sfield);
746
747        sfield = new CmsSolrField(CmsSearchField.FIELD_USER_LAST_MODIFIED, null, null, null);
748        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_LAST_MODIFIED);
749        sfield.addMapping(map);
750        m_solrFields.put(sfield.getName(), sfield);
751
752        sfield = new CmsSolrField(CmsSearchField.FIELD_USER_CREATED, null, null, null);
753        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_CREATED);
754        sfield.addMapping(map);
755        m_solrFields.put(sfield.getName(), sfield);
756
757        sfield = new CmsSolrField(CmsSearchField.FIELD_META, null, null, null);
758        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE);
759        sfield.addMapping(map);
760        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_DESCRIPTION);
761        sfield.addMapping(map);
762        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, I_CmsXmlConfiguration.A_NAME);
763        sfield.addMapping(map);
764        m_solrFields.put(sfield.getName(), sfield);
765
766        sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_EXCLUDE, null, null, null);
767        map = new CmsSearchFieldMapping(
768            CmsSearchFieldMappingType.PROPERTY_SEARCH,
769            CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE);
770        sfield.addMapping(map);
771        m_solrFields.put(sfield.getName(), sfield);
772
773    }
774
775    /**
776     * Adds multiple fields to the document that are used to search in by the list app.
777     *
778     * <p>The fields are:
779     * <ul>
780     *  <li>description_{locale}</li>
781     *  <li>keywords_{locale}</li>
782     * </ul>
783     * for each of the locales the document is available in.</p>
784     *
785     * @param document the document to index with all other fields already added.
786     * @param cms the current context
787     * @param resource the resource that is indexed
788     * @param properties the direct properties of the resource
789     * @return the document extended by the fields used by the list.
790     */
791
792    private I_CmsSearchDocument appendFieldsForListSearch(
793        I_CmsSearchDocument document,
794        CmsObject cms,
795        CmsResource resource) {
796
797        List<String> locales = document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES);
798        for (String locale : locales) {
799            fillLocalizedFieldWithPropertyFallbacks(
800                cms,
801                document,
802                resource,
803                locale,
804                CmsSearchField.FIELD_DESCRIPTION,
805                CmsPropertyDefinition.PROPERTY_DESCRIPTION);
806            fillLocalizedFieldWithPropertyFallbacks(
807                cms,
808                document,
809                resource,
810                locale,
811                CmsSearchField.FIELD_KEYWORDS,
812                CmsPropertyDefinition.PROPERTY_KEYWORDS);
813        }
814
815        return document;
816
817    }
818
819    /**
820     * Adds multiple fields to the document that are used for the sort options in the list app.
821     *
822     * <p>The fields are:
823     * <ul>
824     *  <li>instancedate_dt</li>
825     *  <li>instancedatecurrenttill_dt</li>
826     *  <li>instancedaterange_dr</li>
827     *  <li>disptitle_s</li>
828     *  <li>disporder_i</li>
829     * </ul>
830     * and localized versions for each content locale.</p>
831     *
832     * @param document the document to index with all other fields already added.
833     * @return the document extended by the fields used by the list.
834     */
835    private I_CmsSearchDocument appendFieldsForListSortOptions(I_CmsSearchDocument document) {
836
837        // add non-localized fields
838        // add instance date
839        String fieldName = CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE;
840        Date instanceDate = document.getFieldValueAsDate(fieldName);
841        if ((null == instanceDate) || (instanceDate.getTime() == 0)) {
842            String instanceDateCopyField = document.getFieldValueAsString(
843                CmsPropertyDefinition.PROPERTY_INSTANCEDATE_COPYFIELD + CmsSearchField.FIELD_DYNAMIC_PROPERTIES);
844            if (null != instanceDateCopyField) {
845                instanceDate = document.getFieldValueAsDate(instanceDateCopyField);
846            }
847            if ((null == instanceDate) || (instanceDate.getTime() == 0)) {
848                instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_RELEASED);
849            }
850            if ((null == instanceDate) || (instanceDate.getTime() == 0)) {
851                instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_LASTMODIFIED);
852            }
853            document.addDateField(fieldName, instanceDate.getTime(), false);
854        }
855        // Set instancedaterange_dr
856        fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE + CmsSearchField.FIELD_POSTFIX_DATE_RANGE;
857        String instanceDateString = document.getFieldValueAsString(
858            CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE);
859        String instanceDateRangeString = "[" + instanceDateString + " TO " + instanceDateString + "]";
860        ((SolrInputDocument)document.getDocument()).setField(fieldName, instanceDateRangeString);
861        // Set instancedatecurrenttill_dt to instancedate_dt if not set yet
862        fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL + CmsSearchField.FIELD_POSTFIX_DATE;
863        Date instanceDateCurrentTill = document.getFieldValueAsDate(fieldName);
864        if ((null == instanceDateCurrentTill) || (instanceDateCurrentTill.getTime() == 0)) {
865            document.addDateField(fieldName, instanceDate.getTime(), false);
866        }
867        // add disp-title field
868        fieldName = CmsSearchField.FIELD_DISPTITLE + CmsSearchField.FIELD_POSTFIX_SORT;
869        String dispTitle = document.getFieldValueAsString(fieldName);
870        if (null == dispTitle) {
871            dispTitle = document.getFieldValueAsString(
872                CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT);
873            if (null == dispTitle) {
874                dispTitle = document.getFieldValueAsString(CmsSearchField.FIELD_FILENAME);
875            }
876            document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispTitle);
877        }
878
879        // add disp-order field
880        fieldName = CmsSearchField.FIELD_DISPORDER + CmsSearchField.FIELD_POSTFIX_INT;
881        String dispOrder = document.getFieldValueAsString(fieldName);
882        if (null == dispOrder) {
883            dispOrder = document.getFieldValueAsString(
884                CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER + CmsSearchField.FIELD_DYNAMIC_PROPERTIES);
885            if (null != dispOrder) {
886                try {
887                    int o = Integer.parseInt(dispOrder);
888                    dispOrder = String.valueOf(o);
889                } catch (NullPointerException | NumberFormatException e) {
890                    LOG.warn(
891                        "Property "
892                            + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER
893                            + " contains not a valid integer number.");
894                    dispOrder = "0";
895                }
896            } else {
897                dispOrder = "0";
898            }
899            document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispOrder);
900        }
901
902        // add localized fields
903        for (String locale : document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES)) {
904            // instance date
905            fieldName = CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE;
906            Date localeInstanceDate = document.getFieldValueAsDate(fieldName);
907            if ((null == localeInstanceDate) || (localeInstanceDate.getTime() == 0)) {
908                localeInstanceDate = instanceDate;
909                document.addDateField(fieldName, localeInstanceDate.getTime(), false);
910            }
911            // instance date range
912            fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE
913                + "_"
914                + locale
915                + CmsSearchField.FIELD_POSTFIX_DATE_RANGE;
916            String localeInstanceDateString = document.getFieldValueAsString(
917                CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE);
918            String localeInstanceDateRangeString = "["
919                + localeInstanceDateString
920                + " TO "
921                + localeInstanceDateString
922                + "]";
923            ((SolrInputDocument)document.getDocument()).setField(fieldName, localeInstanceDateRangeString);
924            // Set instancedatecurrenttill_dt to instancedate_dt if not set yet
925            fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL
926                + "_"
927                + locale
928                + CmsSearchField.FIELD_POSTFIX_DATE;
929            Date localeInstanceDateCurrentTill = document.getFieldValueAsDate(fieldName);
930            if ((null == localeInstanceDateCurrentTill) || (localeInstanceDateCurrentTill.getTime() == 0)) {
931                document.addDateField(fieldName, localeInstanceDate.getTime(), false);
932            }
933            // disp-title field for title display and sorting
934            fieldName = CmsSearchField.FIELD_DISPTITLE + "_" + locale + CmsSearchField.FIELD_POSTFIX_SORT;
935            if (null == document.getFieldValueAsString(fieldName)) {
936                String localizedTitle = document.getFieldValueAsString(
937                    CmsPropertyDefinition.PROPERTY_TITLE
938                        + "_"
939                        + locale
940                        + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT);
941                document.addSearchField(
942                    new CmsSolrField(fieldName, null, null, null),
943                    null == localizedTitle ? dispTitle : localizedTitle);
944            }
945            // disp-order field
946            fieldName = CmsSearchField.FIELD_DISPORDER + "_" + locale + CmsSearchField.FIELD_POSTFIX_INT;
947            if (null == document.getFieldValueAsString(fieldName)) {
948                String localizedOrder = document.getFieldValueAsString(
949                    CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER
950                        + "_"
951                        + locale
952                        + CmsSearchField.FIELD_DYNAMIC_PROPERTIES);
953                if (null != localizedOrder) {
954                    try {
955                        int o = Integer.parseInt(localizedOrder);
956                        localizedOrder = String.valueOf(o);
957                    } catch (NullPointerException | NumberFormatException e) {
958                        LOG.warn(
959                            "Property "
960                                + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER
961                                + "_"
962                                + locale
963                                + " contains not a valid integer number.");
964                    }
965                }
966                document.addSearchField(
967                    new CmsSolrField(fieldName, null, null, null),
968                    null == localizedOrder ? dispOrder : localizedOrder);
969            }
970        }
971
972        return document;
973    }
974
975    /**
976     * Copy the content and the title property of the document to a spell field / a language specific spell field.
977     * @param document the document that gets extended by the spell fields.
978     */
979    private void appendSpellFields(I_CmsSearchDocument document) {
980
981        /*
982         * Add the content fields (multiple for contents with more than one locale)
983         */
984        // add the content_<locale> fields to this configuration
985        String title = document.getFieldValueAsString(
986            CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT);
987        document.addSearchField(
988            new CmsSolrField(CmsSearchField.FIELD_SPELL, null, null, null),
989            document.getFieldValueAsString(CmsSearchField.FIELD_CONTENT) + "\n" + title);
990        for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) {
991            document.addSearchField(
992                new CmsSolrField(locale + "_" + CmsSearchField.FIELD_SPELL, null, locale, null),
993                document.getFieldValueAsString(
994                    CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale))
995                    + "\n"
996                    + title);
997        }
998    }
999
1000    /**
1001     * Fills the field with the name extended by "_{locale}" with the property value, if the field is not already present in the document.
1002     *
1003     * If the localized property is not present, the default property value is used to get the value.
1004     * Properties set on the indexed resource itself are always preferred. If no suitable property is present, the parent properties are used when given.
1005     *
1006     * @param cms the current context
1007     * @param document the document to add the field to.
1008     * @param resource the currently indexed resource.
1009     * @param locale the locale to add the field for.
1010     * @param fieldName the name of the field to add (without locale postfix)
1011     * @param propertyName the property name of the property to get the value from (without locale postfix)
1012     */
1013    private void fillLocalizedFieldWithPropertyFallbacks(
1014        CmsObject cms,
1015        I_CmsSearchDocument document,
1016        CmsResource resource,
1017        String locale,
1018        String fieldName,
1019        String propertyName) {
1020
1021        Locale l = CmsLocaleManager.getLocale(locale);
1022        String localeExtendedFieldName = getLocaleExtendedName(fieldName, locale);
1023
1024        if (!document.getFieldNames().contains(localeExtendedFieldName)) {
1025            String value = CmsVfsUtil.readPropertyValueWithFolderFallbackForDefaultFiles(
1026                cms,
1027                resource,
1028                propertyName,
1029                l);
1030            if (value != null) {
1031                document.addSearchField(new CmsSolrField(localeExtendedFieldName, null, null, null), value);
1032            }
1033        }
1034
1035    }
1036
1037    /**
1038     * Returns <code>true</code> if at least one of the index sources uses a VFS indexer that is able
1039     * to index locale dependent resources.<p>
1040     *
1041     * TODO This should be improved somehow
1042     *
1043     * @return <code>true</code> if this field configuration should resolve locale dependencies
1044     */
1045    private boolean hasLocaleDependencies() {
1046
1047        for (CmsSearchIndexSource source : getIndex().getSources()) {
1048            if (source.getIndexer().isLocaleDependenciesEnable()) {
1049                return true;
1050            }
1051        }
1052        return false;
1053    }
1054}