001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (https://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: https://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: https://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search.solr;
033
034import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
035import org.opencms.configuration.I_CmsXmlConfiguration;
036import org.opencms.file.CmsFile;
037import org.opencms.file.CmsObject;
038import org.opencms.file.CmsProperty;
039import org.opencms.file.CmsPropertyDefinition;
040import org.opencms.file.CmsResource;
041import org.opencms.file.types.CmsResourceTypeJsp;
042import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
043import org.opencms.file.types.CmsResourceTypeXmlContent;
044import org.opencms.file.types.CmsResourceTypeXmlPage;
045import org.opencms.i18n.CmsLocaleManager;
046import org.opencms.loader.CmsResourceManager;
047import org.opencms.main.CmsException;
048import org.opencms.main.CmsLog;
049import org.opencms.main.OpenCms;
050import org.opencms.search.CmsSearchIndexSource;
051import org.opencms.search.CmsSearchUtil;
052import org.opencms.search.I_CmsSearchDocument;
053import org.opencms.search.documents.CmsDocumentDependency;
054import org.opencms.search.extractors.I_CmsExtractionResult;
055import org.opencms.search.fields.CmsLuceneField;
056import org.opencms.search.fields.CmsSearchField;
057import org.opencms.search.fields.CmsSearchFieldConfiguration;
058import org.opencms.search.fields.CmsSearchFieldMapping;
059import org.opencms.search.fields.CmsSearchFieldMappingType;
060import org.opencms.search.fields.I_CmsSearchFieldMapping;
061import org.opencms.util.CmsStringUtil;
062import org.opencms.util.CmsVfsUtil;
063import org.opencms.xml.CmsXmlContentDefinition;
064import org.opencms.xml.containerpage.CmsContainerElementBean;
065import org.opencms.xml.containerpage.CmsContainerPageBean;
066import org.opencms.xml.containerpage.CmsXmlContainerPage;
067import org.opencms.xml.containerpage.CmsXmlContainerPageFactory;
068import org.opencms.xml.content.I_CmsXmlContentHandler;
069
070import java.util.ArrayList;
071import java.util.Arrays;
072import java.util.Collection;
073import java.util.Collections;
074import java.util.Date;
075import java.util.HashMap;
076import java.util.List;
077import java.util.Locale;
078import java.util.Map;
079import java.util.Set;
080
081import org.apache.commons.logging.Log;
082import org.apache.solr.common.SolrInputDocument;
083
084/**
085 * The search field implementation for Solr.<p>
086 *
087 * @since 8.5.0
088 */
089public class CmsSolrFieldConfiguration extends CmsSearchFieldConfiguration {
090
091    /** The log object for this class. */
092    private static final Log LOG = CmsLog.getLog(CmsSolrFieldConfiguration.class);
093
094    /** The content locale for the indexed document is stored in order to save performance. */
095    private Collection<Locale> m_contentLocales;
096
097    /** A list of Solr fields. */
098    private Map<String, CmsSolrField> m_solrFields = new HashMap<String, CmsSolrField>();
099
100    /**
101     * Default constructor.<p>
102     */
103    public CmsSolrFieldConfiguration() {
104
105        super();
106    }
107
108    /**
109     * Adds the additional fields to the configuration, if they are not null.<p>
110     *
111     * @param additionalFields the additional fields to add
112     */
113    public void addAdditionalFields(List<CmsSolrField> additionalFields) {
114
115        if (additionalFields != null) {
116            for (CmsSolrField solrField : additionalFields) {
117                m_solrFields.put(solrField.getName(), solrField);
118            }
119        }
120    }
121
122    /**
123     * Returns all configured Solr fields.<p>
124     *
125     * @return all configured Solr fields
126     */
127    public Map<String, CmsSolrField> getSolrFields() {
128
129        return Collections.unmodifiableMap(m_solrFields);
130    }
131
132    /**
133     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#init()
134     */
135    @Override
136    public void init() {
137
138        super.init();
139        addAdditionalFields();
140    }
141
142    /**
143     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendAdditionalValuesToDcoument(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
144     */
145    @Override
146    protected I_CmsSearchDocument appendAdditionalValuesToDcoument(
147        I_CmsSearchDocument document,
148        CmsObject cms,
149        CmsResource resource,
150        I_CmsExtractionResult extractionResult,
151        List<CmsProperty> properties,
152        List<CmsProperty> propertiesSearched) {
153
154        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getName(), null);
155        if (mimeType != null) {
156            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_MIMETYPE), mimeType);
157        }
158
159        document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_FILENAME), resource.getName());
160
161        document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_VERSION), "" + resource.getVersion());
162
163        try {
164            if (CmsResourceTypeXmlContent.isXmlContent(resource)) {
165                I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource);
166                if ((handler != null) && handler.isContainerPageOnly()) {
167                    if (document.getDocument() instanceof SolrInputDocument) {
168                        SolrInputDocument doc = (SolrInputDocument)document.getDocument();
169                        doc.removeField(CmsSearchField.FIELD_SEARCH_EXCLUDE);
170                    } else {
171                        //TODO: Warning - but should not happen.
172                    }
173                    document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "true");
174                }
175            }
176        } catch (CmsException e) {
177            LOG.error(e.getMessage(), e);
178        }
179
180        List<String> searchExcludeOptions = document.getMultivaluedFieldAsStringList(
181            CmsSearchField.FIELD_SEARCH_EXCLUDE);
182        if ((searchExcludeOptions == null) || searchExcludeOptions.isEmpty()) {
183            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "false");
184        }
185        if (resource.getRootPath().startsWith("/system")
186            || (CmsResourceTypeJsp.getJSPTypeId() == resource.getTypeId())) {
187            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "gallery");
188        } else {
189            document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "content");
190        }
191
192        document = appendFieldsForListSortOptions(document);
193
194        document = appendFieldsForListSearch(document, cms, resource);
195
196        if (resource.getRootPath().startsWith(OpenCms.getSiteManager().getSharedFolder())
197            || (null != OpenCms.getSiteManager().getSiteRoot(resource.getRootPath()))) {
198            appendSpellFields(document);
199        }
200
201        document = getIndex().applyDocumentTransformation(
202            document,
203            cms,
204            resource,
205            extractionResult,
206            properties,
207            propertiesSearched);
208
209        return document;
210    }
211
212    /**
213     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendDates(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
214     */
215    @Override
216    protected I_CmsSearchDocument appendDates(
217        I_CmsSearchDocument document,
218        CmsObject cms,
219        CmsResource resource,
220        I_CmsExtractionResult extractionResult,
221        List<CmsProperty> properties,
222        List<CmsProperty> propertiesSearched) {
223
224        document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), false);
225        document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), false);
226        document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false);
227        document.addDateField(CmsSearchField.FIELD_DATE_RELEASED, resource.getDateReleased(), false);
228        document.addDateField(CmsSearchField.FIELD_DATE_EXPIRED, resource.getDateExpired(), false);
229
230        return document;
231    }
232
233    /**
234     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMapping(org.opencms.search.I_CmsSearchDocument, org.opencms.search.fields.CmsSearchField, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
235     */
236    @Override
237    protected I_CmsSearchDocument appendFieldMapping(
238        I_CmsSearchDocument document,
239        CmsSearchField sfield,
240        CmsObject cms,
241        CmsResource resource,
242        I_CmsExtractionResult extractionResult,
243        List<CmsProperty> properties,
244        List<CmsProperty> propertiesSearched) {
245
246        CmsSolrField field = (CmsSolrField)sfield;
247        try {
248            StringBuffer text = new StringBuffer();
249            for (I_CmsSearchFieldMapping mapping : field.getMappings()) {
250                // loop over the mappings of the given field
251                if (extractionResult != null) {
252                    String mapResult = null;
253                    if ((field.getLocale() != null) && mapping.getType().equals(CmsSearchFieldMappingType.CONTENT)) {
254                        // this is a localized content field, try to retrieve the localized content extraction
255                        mapResult = extractionResult.getContent(field.getLocale());
256                        if (mapResult == null) {
257                            // no localized content extracted
258                            if (!(CmsResourceTypeXmlContent.isXmlContent(resource)
259                                || CmsResourceTypeXmlPage.isXmlPage(resource))) {
260                                // the resource is no XML content nor an XML page
261                                if ((m_contentLocales != null) && m_contentLocales.contains(field.getLocale())) {
262                                    // the resource to get the extracted content for has the locale of this field,
263                                    // so store the extraction content into this field
264                                    mapResult = extractionResult.getContent();
265                                }
266                            }
267                        }
268                    } else {
269                        // this is not a localized content field, just perform the regular mapping
270                        mapResult = mapping.getStringValue(
271                            cms,
272                            resource,
273                            extractionResult,
274                            properties,
275                            propertiesSearched);
276                    }
277                    if (text.length() > 0) {
278                        text.append('\n');
279                    }
280                    if (mapResult != null) {
281                        text.append(mapResult);
282                    } else if (mapping.getDefaultValue() != null) {
283                        // no mapping result found, but a default is configured
284                        text.append(mapping.getDefaultValue());
285                    }
286                } else if (mapping.getStringValue(
287                    cms,
288                    resource,
289                    extractionResult,
290                    properties,
291                    propertiesSearched) != null) {
292                        String value = mapping.getStringValue(
293                            cms,
294                            resource,
295                            extractionResult,
296                            properties,
297                            propertiesSearched);
298                        if (value != null) {
299                            document.addSearchField(field, value);
300                        }
301                    }
302            }
303            if ((text.length() <= 0) && (field.getDefaultValue() != null)) {
304                text.append(field.getDefaultValue());
305            }
306            if (text.length() > 0) {
307                document.addSearchField(field, text.toString());
308            }
309        } catch (Exception e) {
310            // nothing to do just log
311            LOG.error(e.getLocalizedMessage(), e);
312        }
313        return document;
314    }
315
316    /**
317     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMappings(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
318     */
319    @Override
320    protected I_CmsSearchDocument appendFieldMappings(
321        I_CmsSearchDocument document,
322        CmsObject cms,
323        CmsResource resource,
324        I_CmsExtractionResult extractionResult,
325        List<CmsProperty> properties,
326        List<CmsProperty> propertiesSearched) {
327
328        List<String> systemFields = new ArrayList<String>();
329        // append field mappings directly stored in the extraction result
330        if (null != extractionResult) {
331            Map<String, String> fieldMappings = extractionResult.getFieldMappings();
332            for (String fieldName : fieldMappings.keySet()) {
333                String value = fieldMappings.get(fieldName);
334                CmsSolrField f = new CmsSolrField(fieldName, null, null, null);
335                document.addSearchField(f, value);
336                systemFields.add(fieldName);
337            }
338        }
339
340        Set<CmsSearchField> mappedFields = getXSDMappings(cms, resource);
341        if (mappedFields != null) {
342            for (CmsSearchField field : mappedFields) {
343                if (!systemFields.contains(field.getName())) {
344                    document = appendFieldMapping(
345                        document,
346                        field,
347                        cms,
348                        resource,
349                        extractionResult,
350                        properties,
351                        propertiesSearched);
352                } else {
353                    LOG.error(
354                        Messages.get().getBundle().key(
355                            Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_2,
356                            resource.getRootPath(),
357                            field.getName()));
358                }
359            }
360        }
361
362        // add field mappings from elements of a container page
363        if (CmsResourceTypeXmlContainerPage.isContainerPage(resource)) {
364            document = appendFieldMappingsFromElementsOnThePage(document, cms, resource, systemFields);
365        } else {
366            try {
367                for (CmsResource detailOnlyPage : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) {
368                    try {
369                        document = appendFieldMappingsFromElementsOnThePage(
370                            document,
371                            cms,
372                            detailOnlyPage,
373                            systemFields);
374                    } catch (Throwable t) {
375                        LOG.warn(
376                            Messages.get().getBundle().key(
377                                Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_FOR_PAGE_2,
378                                null == resource ? "null" : resource.getRootPath(),
379                                null == detailOnlyPage ? "null" : detailOnlyPage.getRootPath()),
380                            t);
381                    }
382                }
383            } catch (Throwable t) {
384                LOG.warn(
385                    Messages.get().getBundle().key(
386                        Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_1,
387                        null == resource ? "null" : resource.getRootPath()),
388                    t);
389            }
390        }
391
392        for (CmsSolrField field : m_solrFields.values()) {
393            document = appendFieldMapping(
394                document,
395                field,
396                cms,
397                resource,
398                extractionResult,
399                properties,
400                propertiesSearched);
401        }
402
403        return document;
404    }
405
406    /**
407     * Adds search fields from elements on a container page to a container page's document.
408     * @param document The document for the container page
409     * @param cms The current CmsObject
410     * @param resource The resource of the container page
411     * @param systemFields The list of field names for fields where mappings to should be discarded, since these fields are used system internally.
412     * @return the manipulated document
413     */
414    protected I_CmsSearchDocument appendFieldMappingsFromElementsOnThePage(
415        I_CmsSearchDocument document,
416        CmsObject cms,
417        CmsResource resource,
418        List<String> systemFields) {
419
420        try {
421            CmsFile file = cms.readFile(resource);
422            CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file);
423            CmsContainerPageBean containerBean = containerPage.getContainerPage(cms);
424            if (containerBean != null) {
425                for (CmsContainerElementBean element : containerBean.getElements()) {
426                    element.initResource(cms);
427                    CmsResource elemResource = element.getResource();
428                    Set<CmsSearchField> mappedFields = getXSDMappingsForPage(cms, elemResource);
429                    if (mappedFields != null) {
430
431                        for (CmsSearchField field : mappedFields) {
432                            if (!systemFields.contains(field.getName())) {
433                                try {
434                                    I_CmsExtractionResult extractionResult = CmsSolrDocumentXmlContent.extractXmlContent(
435                                        cms,
436                                        elemResource,
437                                        getIndex());
438                                    document = appendFieldMapping(
439                                        document,
440                                        field,
441                                        cms,
442                                        elemResource,
443                                        extractionResult,
444                                        cms.readPropertyObjects(resource, false),
445                                        cms.readPropertyObjects(resource, true));
446                                } catch (Exception e) {
447                                    LOG.error(
448                                        Messages.get().getBundle().key(
449                                            Messages.LOG_SOLR_ERR_MAPPING_UNREADABLE_CONTENT_3,
450                                            elemResource.getRootPath(),
451                                            field.getName(),
452                                            resource.getRootPath()),
453                                        e);
454                                }
455                            } else {
456                                LOG.error(
457                                    Messages.get().getBundle().key(
458                                        Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_3,
459                                        elemResource.getRootPath(),
460                                        field.getName(),
461                                        resource.getRootPath()));
462                            }
463                        }
464                    }
465                }
466            }
467        } catch (CmsException e) {
468            // Should be thrown if element on the page does not exist anymore - this is possible, but not necessarily an error.
469            // Hence, just notice it in the debug log.
470            if (LOG.isDebugEnabled()) {
471                LOG.debug(e.getLocalizedMessage(), e);
472            }
473        }
474        return document;
475    }
476
477    /**
478     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendLocales(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
479     */
480    @Override
481    protected I_CmsSearchDocument appendLocales(
482        I_CmsSearchDocument document,
483        CmsObject cms,
484        CmsResource resource,
485        I_CmsExtractionResult extraction,
486        List<CmsProperty> properties,
487        List<CmsProperty> propertiesSearched) {
488
489        // append the resource locales
490        Collection<Locale> resourceLocales = new ArrayList<Locale>();
491        if ((extraction != null) && (!extraction.getLocales().isEmpty())) {
492
493            CmsResourceManager resMan = OpenCms.getResourceManager();
494            resourceLocales = extraction.getLocales();
495            boolean isGroup = false;
496            for (String groupType : Arrays.asList(
497                CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME,
498                CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME)) {
499                if (resMan.matchResourceType(groupType, resource.getTypeId())) {
500                    isGroup = true;
501                    break;
502                }
503            }
504            if (isGroup) {
505                // groups are locale independent, so they have to have *all* locales so they are found for each one
506                m_contentLocales = OpenCms.getLocaleManager().getAvailableLocales();
507            } else {
508                m_contentLocales = resourceLocales;
509            }
510        } else {
511            // For all other resources add all default locales
512            resourceLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource);
513
514            /*
515             * A problem is likely to arise when dealing with multilingual fields:
516             * Only values extracted from XML resources are written into the Solr locale-aware fields (e.g.
517             * "title_<locale>_s"), therefore sorting by them will not work as non-XML (unilingual) resources extract
518             * the information by the resource property facility and will not write to an Solr locale-aware field.
519             *
520             * The following code is used to fix this behavior, at least for "Title".
521             */
522
523            // Check all passed properties for "Title"...
524            for (final CmsProperty prop : propertiesSearched) {
525                if (prop.getName().equals(CmsPropertyDefinition.PROPERTY_TITLE)) {
526                    final String value = prop.getValue();
527
528                    // Write a Solr locale-aware field for every locale the system supports...
529                    final List<Locale> availableLocales = OpenCms.getLocaleManager().getAvailableLocales();
530                    for (final Locale locale : availableLocales) {
531                        final String lang = locale.getLanguage();
532                        // Don't proceed if a field has already written for this locale.
533                        if (!resourceLocales.contains(lang)) {
534                            final String effFieldName = CmsSearchFieldConfiguration.getLocaleExtendedName(
535                                CmsSearchField.FIELD_TITLE_UNSTORED,
536                                locale) + "_s";
537
538                            final CmsSolrField f = new CmsSolrField(effFieldName, null, null, null);
539                            document.addSearchField(f, value);
540                        }
541                    }
542                }
543            }
544            m_contentLocales = getContentLocales(cms, resource, extraction);
545        }
546
547        document.addResourceLocales(resourceLocales);
548        document.addContentLocales(m_contentLocales);
549
550        // append document dependencies if configured
551        if (hasLocaleDependencies()) {
552            CmsDocumentDependency dep = CmsDocumentDependency.load(cms, resource);
553            ((CmsSolrDocument)document).addDocumentDependency(cms, dep);
554        }
555        return document;
556    }
557
558    /**
559     * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendProperties(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List)
560     */
561    @Override
562    protected I_CmsSearchDocument appendProperties(
563        I_CmsSearchDocument document,
564        CmsObject cms,
565        CmsResource resource,
566        I_CmsExtractionResult extraction,
567        List<CmsProperty> properties,
568        List<CmsProperty> propertiesSearched) {
569
570        for (CmsProperty prop : propertiesSearched) {
571            if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) {
572                String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue());
573                document.addSearchField(
574                    new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES, null, null, null),
575                    value);
576
577                // Also write the property using the dynamic field '_s' in order to prevent tokenization
578                // of the property. The resulting field is named '<property>_prop_s'.
579                document.addSearchField(
580                    new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES + "_s", null, null, null),
581                    value);
582            }
583        }
584
585        for (CmsProperty prop : properties) {
586            if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) {
587                String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue());
588                document.addSearchField(
589                    new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT, null, null, null),
590                    value);
591
592                // Also write the property using the dynamic field '_s' in order to prevent tokenization
593                // of the property. The resulting field is named '<property>_prop_nosearch_s'.
594                document.addSearchField(
595                    new CmsSolrField(
596                        prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT + "_s",
597                        null,
598                        null,
599                        null),
600                    value);
601            }
602        }
603        return document;
604    }
605
606    /**
607     * Retrieves the locales for an content, that is whether an XML content nor an XML page.<p>
608     *
609     * Uses following strategy:
610     * <ul>
611     * <li>first by file name</li>
612     * <li>then by detection and</li>
613     * <li>otherwise take the first configured default locale for this resource</li>
614     * </ul>
615     *
616     * @param cms the current CmsObject
617     * @param resource the resource to get the content locales for
618     * @param extraction the extraction result
619     *
620     * @return the determined locales for the given resource
621     */
622    protected List<Locale> getContentLocales(CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction) {
623
624        // try to detect locale by filename
625        Locale detectedLocale = CmsStringUtil.getLocaleForName(resource.getRootPath());
626        if (!OpenCms.getLocaleManager().getAvailableLocales(cms, resource).contains(detectedLocale)) {
627            detectedLocale = null;
628        }
629        // try to detect locale by language detector
630        if (getIndex().isLanguageDetection()
631            && (detectedLocale == null)
632            && (extraction != null)
633            && (extraction.getContent() != null)) {
634            detectedLocale = CmsStringUtil.getLocaleForText(extraction.getContent());
635        }
636        // take the detected locale or use the first configured default locale for this resource
637        List<Locale> result = new ArrayList<Locale>();
638        if (detectedLocale != null) {
639            // take the found locale
640            result.add(detectedLocale);
641        } else {
642
643            // take all locales set via locale-available or the configured default locales as fall-back for this resource
644            result.addAll(OpenCms.getLocaleManager().getAvailableLocales(cms, resource));
645            LOG.debug(Messages.get().getBundle().key(Messages.LOG_LANGUAGE_DETECTION_FAILED_1, resource));
646        }
647        return result;
648    }
649
650    /**
651     * Returns the search field mappings declared within the XSD.<p>
652     *
653     * @param cms the CmsObject
654     * @param resource the resource
655     *
656     * @return the fields to map
657     */
658    protected Set<CmsSearchField> getXSDMappings(CmsObject cms, CmsResource resource) {
659
660        try {
661            if (CmsResourceTypeXmlContent.isXmlContent(resource)) {
662                I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource);
663                if ((handler != null) && !handler.getSearchFields().isEmpty()) {
664                    return handler.getSearchFields();
665                }
666            }
667        } catch (CmsException e) {
668            LOG.error(e.getMessage(), e);
669        }
670        return null;
671    }
672
673    /**
674     * Returns the search field mappings declared within the XSD that should be applied to the container page.<p>
675     *
676     * @param cms the CmsObject
677     * @param resource the resource
678     *
679     * @return the fields to map
680     */
681    protected Set<CmsSearchField> getXSDMappingsForPage(CmsObject cms, CmsResource resource) {
682
683        try {
684            if (CmsResourceTypeXmlContent.isXmlContent(resource)) {
685                I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource);
686                if ((handler != null) && !handler.getSearchFieldsForPage().isEmpty()) {
687                    return handler.getSearchFieldsForPage();
688                }
689            }
690        } catch (CmsException e) {
691            LOG.error(e.getMessage(), e);
692        }
693        return null;
694    }
695
696    /**
697     * Adds additional fields to this field configuration.<p>
698     */
699    private void addAdditionalFields() {
700
701        /*
702         * Add fields from opencms-search.xml (Lucene fields)
703         */
704        for (CmsSearchField field : getFields()) {
705            if (field instanceof CmsLuceneField) {
706                CmsSolrField newSolrField = new CmsSolrField((CmsLuceneField)field);
707                m_solrFields.put(newSolrField.getName(), newSolrField);
708            }
709        }
710
711        /*
712         * Add the content fields (multiple for contents with more than one locale)
713         */
714        // add the content_<locale> fields to this configuration
715        CmsSolrField solrField = new CmsSolrField(CmsSearchField.FIELD_CONTENT, null, null, null);
716        solrField.addMapping(
717            new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT));
718        m_solrFields.put(solrField.getName(), solrField);
719        for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) {
720            solrField = new CmsSolrField(
721                CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale),
722                Collections.singletonList(locale.toString() + CmsSearchField.FIELD_EXCERPT),
723                locale,
724                null);
725            solrField.addMapping(
726                new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT));
727            m_solrFields.put(solrField.getName(), solrField);
728        }
729
730        /*
731         * Fields filled within appendFields
732         */
733        CmsSolrField sfield = new CmsSolrField(CmsSearchField.FIELD_MIMETYPE, null, null, null);
734        m_solrFields.put(sfield.getName(), sfield);
735
736        sfield = new CmsSolrField(CmsSearchField.FIELD_FILENAME, null, null, null);
737        m_solrFields.put(sfield.getName(), sfield);
738
739        sfield = new CmsSolrField(CmsSearchField.FIELD_VERSION, null, null, null);
740        m_solrFields.put(sfield.getName(), sfield);
741
742        sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_CHANNEL, null, null, null);
743        m_solrFields.put(sfield.getName(), sfield);
744
745        /*
746         * Fields with mapping
747         */
748        sfield = new CmsSolrField(CmsSearchField.FIELD_STATE, null, null, null);
749        CmsSearchFieldMapping map = new CmsSearchFieldMapping(
750            CmsSearchFieldMappingType.ATTRIBUTE,
751            CmsSearchField.FIELD_STATE);
752        sfield.addMapping(map);
753        m_solrFields.put(sfield.getName(), sfield);
754
755        sfield = new CmsSolrField(CmsSearchField.FIELD_USER_LAST_MODIFIED, null, null, null);
756        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_LAST_MODIFIED);
757        sfield.addMapping(map);
758        m_solrFields.put(sfield.getName(), sfield);
759
760        sfield = new CmsSolrField(CmsSearchField.FIELD_USER_CREATED, null, null, null);
761        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_CREATED);
762        sfield.addMapping(map);
763        m_solrFields.put(sfield.getName(), sfield);
764
765        sfield = new CmsSolrField(CmsSearchField.FIELD_META, null, null, null);
766        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE);
767        sfield.addMapping(map);
768        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_DESCRIPTION);
769        sfield.addMapping(map);
770        map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, I_CmsXmlConfiguration.A_NAME);
771        sfield.addMapping(map);
772        m_solrFields.put(sfield.getName(), sfield);
773
774        sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_EXCLUDE, null, null, null);
775        map = new CmsSearchFieldMapping(
776            CmsSearchFieldMappingType.PROPERTY_SEARCH,
777            CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE);
778        sfield.addMapping(map);
779        m_solrFields.put(sfield.getName(), sfield);
780
781    }
782
783    /**
784     * Adds multiple fields to the document that are used to search in by the list app.
785     *
786     * <p>The fields are:
787     * <ul>
788     *  <li>description_{locale}</li>
789     *  <li>keywords_{locale}</li>
790     * </ul>
791     * for each of the locales the document is available in.</p>
792     *
793     * @param document the document to index with all other fields already added.
794     * @param cms the current context
795     * @param resource the resource that is indexed
796     * @param properties the direct properties of the resource
797     * @return the document extended by the fields used by the list.
798     */
799
800    private I_CmsSearchDocument appendFieldsForListSearch(
801        I_CmsSearchDocument document,
802        CmsObject cms,
803        CmsResource resource) {
804
805        List<String> locales = document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES);
806        for (String locale : locales) {
807            fillLocalizedFieldWithPropertyFallbacks(
808                cms,
809                document,
810                resource,
811                locale,
812                CmsSearchField.FIELD_DESCRIPTION,
813                CmsPropertyDefinition.PROPERTY_DESCRIPTION);
814            fillLocalizedFieldWithPropertyFallbacks(
815                cms,
816                document,
817                resource,
818                locale,
819                CmsSearchField.FIELD_KEYWORDS,
820                CmsPropertyDefinition.PROPERTY_KEYWORDS);
821        }
822
823        return document;
824
825    }
826
827    /**
828     * Adds multiple fields to the document that are used for the sort options in the list app.
829     *
830     * <p>The fields are:
831     * <ul>
832     *  <li>instancedate_dt</li>
833     *  <li>instancedatecurrenttill_dt</li>
834     *  <li>instancedaterange_dr</li>
835     *  <li>disptitle_s</li>
836     *  <li>disporder_i</li>
837     * </ul>
838     * and localized versions for each content locale.</p>
839     *
840     * @param document the document to index with all other fields already added.
841     * @return the document extended by the fields used by the list.
842     */
843    private I_CmsSearchDocument appendFieldsForListSortOptions(I_CmsSearchDocument document) {
844
845        // add non-localized fields
846        // add instance date
847        String fieldName = CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE;
848        Date instanceDate = document.getFieldValueAsDate(fieldName);
849        if ((null == instanceDate) || (instanceDate.getTime() == 0)) {
850            String instanceDateCopyField = document.getFieldValueAsString(
851                CmsPropertyDefinition.PROPERTY_INSTANCEDATE_COPYFIELD + CmsSearchField.FIELD_DYNAMIC_PROPERTIES);
852            if (null != instanceDateCopyField) {
853                instanceDate = document.getFieldValueAsDate(instanceDateCopyField);
854            }
855            if ((null == instanceDate) || (instanceDate.getTime() == 0)) {
856                instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_RELEASED);
857            }
858            if ((null == instanceDate) || (instanceDate.getTime() == 0)) {
859                instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_LASTMODIFIED);
860            }
861            document.addDateField(fieldName, instanceDate.getTime(), false);
862        }
863        // Set instancedaterange_dr
864        fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE + CmsSearchField.FIELD_POSTFIX_DATE_RANGE;
865        String instanceDateString = document.getFieldValueAsString(
866            CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE);
867        String instanceDateRangeString = "[" + instanceDateString + " TO " + instanceDateString + "]";
868        ((SolrInputDocument)document.getDocument()).setField(fieldName, instanceDateRangeString);
869        // Set instancedatecurrenttill_dt to instancedate_dt if not set yet
870        fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL + CmsSearchField.FIELD_POSTFIX_DATE;
871        Date instanceDateCurrentTill = document.getFieldValueAsDate(fieldName);
872        if ((null == instanceDateCurrentTill) || (instanceDateCurrentTill.getTime() == 0)) {
873            document.addDateField(fieldName, instanceDate.getTime(), false);
874        }
875        // add disp-title field
876        fieldName = CmsSearchField.FIELD_DISPTITLE + CmsSearchField.FIELD_POSTFIX_SORT;
877        String dispTitle = document.getFieldValueAsString(fieldName);
878        if (null == dispTitle) {
879            dispTitle = document.getFieldValueAsString(
880                CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT);
881            if (null == dispTitle) {
882                dispTitle = document.getFieldValueAsString(CmsSearchField.FIELD_FILENAME);
883            }
884            document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispTitle);
885        }
886
887        // add disp-order field
888        fieldName = CmsSearchField.FIELD_DISPORDER + CmsSearchField.FIELD_POSTFIX_INT;
889        String dispOrder = document.getFieldValueAsString(fieldName);
890        if (null == dispOrder) {
891            dispOrder = document.getFieldValueAsString(
892                CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER + CmsSearchField.FIELD_DYNAMIC_PROPERTIES);
893            if (null != dispOrder) {
894                try {
895                    int o = Integer.parseInt(dispOrder);
896                    dispOrder = String.valueOf(o);
897                } catch (NullPointerException | NumberFormatException e) {
898                    LOG.warn(
899                        "Property "
900                            + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER
901                            + " contains not a valid integer number.");
902                    dispOrder = "0";
903                }
904            } else {
905                dispOrder = "0";
906            }
907            document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispOrder);
908        }
909
910        // add localized fields
911        for (String locale : document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES)) {
912            // instance date
913            fieldName = CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE;
914            Date localeInstanceDate = document.getFieldValueAsDate(fieldName);
915            if ((null == localeInstanceDate) || (localeInstanceDate.getTime() == 0)) {
916                localeInstanceDate = instanceDate;
917                document.addDateField(fieldName, localeInstanceDate.getTime(), false);
918            }
919            // instance date range
920            fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE
921                + "_"
922                + locale
923                + CmsSearchField.FIELD_POSTFIX_DATE_RANGE;
924            String localeInstanceDateString = document.getFieldValueAsString(
925                CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE);
926            String localeInstanceDateRangeString = "["
927                + localeInstanceDateString
928                + " TO "
929                + localeInstanceDateString
930                + "]";
931            ((SolrInputDocument)document.getDocument()).setField(fieldName, localeInstanceDateRangeString);
932            // Set instancedatecurrenttill_dt to instancedate_dt if not set yet
933            fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL
934                + "_"
935                + locale
936                + CmsSearchField.FIELD_POSTFIX_DATE;
937            Date localeInstanceDateCurrentTill = document.getFieldValueAsDate(fieldName);
938            if ((null == localeInstanceDateCurrentTill) || (localeInstanceDateCurrentTill.getTime() == 0)) {
939                document.addDateField(fieldName, localeInstanceDate.getTime(), false);
940            }
941            // disp-title field for title display and sorting
942            fieldName = CmsSearchField.FIELD_DISPTITLE + "_" + locale + CmsSearchField.FIELD_POSTFIX_SORT;
943            if (null == document.getFieldValueAsString(fieldName)) {
944                String localizedTitle = document.getFieldValueAsString(
945                    CmsPropertyDefinition.PROPERTY_TITLE
946                        + "_"
947                        + locale
948                        + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT);
949                document.addSearchField(
950                    new CmsSolrField(fieldName, null, null, null),
951                    null == localizedTitle ? dispTitle : localizedTitle);
952            }
953            // disp-order field
954            fieldName = CmsSearchField.FIELD_DISPORDER + "_" + locale + CmsSearchField.FIELD_POSTFIX_INT;
955            if (null == document.getFieldValueAsString(fieldName)) {
956                String localizedOrder = document.getFieldValueAsString(
957                    CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER
958                        + "_"
959                        + locale
960                        + CmsSearchField.FIELD_DYNAMIC_PROPERTIES);
961                if (null != localizedOrder) {
962                    try {
963                        int o = Integer.parseInt(localizedOrder);
964                        localizedOrder = String.valueOf(o);
965                    } catch (NullPointerException | NumberFormatException e) {
966                        LOG.warn(
967                            "Property "
968                                + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER
969                                + "_"
970                                + locale
971                                + " contains not a valid integer number.");
972                    }
973                }
974                document.addSearchField(
975                    new CmsSolrField(fieldName, null, null, null),
976                    null == localizedOrder ? dispOrder : localizedOrder);
977            }
978        }
979
980        return document;
981    }
982
983    /**
984     * Copy the content and the title property of the document to a spell field / a language specific spell field.
985     * @param document the document that gets extended by the spell fields.
986     */
987    private void appendSpellFields(I_CmsSearchDocument document) {
988
989        /*
990         * Add the content fields (multiple for contents with more than one locale)
991         */
992        // add the content_<locale> fields to this configuration
993        String title = document.getFieldValueAsString(
994            CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT);
995        document.addSearchField(
996            new CmsSolrField(CmsSearchField.FIELD_SPELL, null, null, null),
997            document.getFieldValueAsString(CmsSearchField.FIELD_CONTENT) + "\n" + title);
998        for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) {
999            document.addSearchField(
1000                new CmsSolrField(locale + "_" + CmsSearchField.FIELD_SPELL, null, locale, null),
1001                document.getFieldValueAsString(
1002                    CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale))
1003                    + "\n"
1004                    + title);
1005        }
1006    }
1007
1008    /**
1009     * Fills the field with the name extended by "_{locale}" with the property value, if the field is not already present in the document.
1010     *
1011     * If the localized property is not present, the default property value is used to get the value.
1012     * Properties set on the indexed resource itself are always preferred. If no suitable property is present, the parent properties are used when given.
1013     *
1014     * @param cms the current context
1015     * @param document the document to add the field to.
1016     * @param resource the currently indexed resource.
1017     * @param locale the locale to add the field for.
1018     * @param fieldName the name of the field to add (without locale postfix)
1019     * @param propertyName the property name of the property to get the value from (without locale postfix)
1020     */
1021    private void fillLocalizedFieldWithPropertyFallbacks(
1022        CmsObject cms,
1023        I_CmsSearchDocument document,
1024        CmsResource resource,
1025        String locale,
1026        String fieldName,
1027        String propertyName) {
1028
1029        Locale l = CmsLocaleManager.getLocale(locale);
1030        String localeExtendedFieldName = getLocaleExtendedName(fieldName, locale);
1031
1032        if (!document.getFieldNames().contains(localeExtendedFieldName)) {
1033            String value = CmsVfsUtil.readPropertyValueWithFolderFallbackForDefaultFiles(
1034                cms,
1035                resource,
1036                propertyName,
1037                l);
1038            if (value != null) {
1039                document.addSearchField(new CmsSolrField(localeExtendedFieldName, null, null, null), value);
1040            }
1041        }
1042
1043    }
1044
1045    /**
1046     * Returns <code>true</code> if at least one of the index sources uses a VFS indexer that is able
1047     * to index locale dependent resources.<p>
1048     *
1049     * TODO This should be improved somehow
1050     *
1051     * @return <code>true</code> if this field configuration should resolve locale dependencies
1052     */
1053    private boolean hasLocaleDependencies() {
1054
1055        for (CmsSearchIndexSource source : getIndex().getSources()) {
1056            if (source.getIndexer().isLocaleDependenciesEnable()) {
1057                return true;
1058            }
1059        }
1060        return false;
1061    }
1062}