001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: http://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: http://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search.solr;
033
034import org.opencms.acacia.shared.I_CmsSerialDateValue;
035import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
036import org.opencms.file.CmsFile;
037import org.opencms.file.CmsObject;
038import org.opencms.file.CmsPropertyDefinition;
039import org.opencms.file.CmsResource;
040import org.opencms.file.types.CmsResourceTypeXmlContent;
041import org.opencms.i18n.CmsLocaleManager;
042import org.opencms.main.CmsException;
043import org.opencms.main.CmsLog;
044import org.opencms.main.OpenCms;
045import org.opencms.search.CmsIndexException;
046import org.opencms.search.CmsSearchUtil;
047import org.opencms.search.I_CmsSearchIndex;
048import org.opencms.search.documents.A_CmsVfsDocument;
049import org.opencms.search.documents.CmsIndexNoContentException;
050import org.opencms.search.documents.Messages;
051import org.opencms.search.extractors.CmsExtractionResult;
052import org.opencms.search.extractors.I_CmsExtractionResult;
053import org.opencms.search.fields.CmsSearchField;
054import org.opencms.search.fields.CmsSearchFieldConfiguration;
055import org.opencms.search.galleries.CmsGalleryNameMacroResolver;
056import org.opencms.util.CmsStringUtil;
057import org.opencms.util.CmsUUID;
058import org.opencms.widgets.serialdate.CmsSerialDateBeanFactory;
059import org.opencms.widgets.serialdate.CmsSerialDateValue;
060import org.opencms.widgets.serialdate.I_CmsSerialDateBean;
061import org.opencms.xml.A_CmsXmlDocument;
062import org.opencms.xml.CmsXmlContentDefinition;
063import org.opencms.xml.CmsXmlUtils;
064import org.opencms.xml.content.CmsXmlContent;
065import org.opencms.xml.content.CmsXmlContentFactory;
066import org.opencms.xml.content.I_CmsContentValueAdjustment;
067import org.opencms.xml.content.I_CmsXmlContentHandler;
068import org.opencms.xml.types.CmsXmlDateTimeValue;
069import org.opencms.xml.types.CmsXmlHtmlValue;
070import org.opencms.xml.types.CmsXmlNestedContentDefinition;
071import org.opencms.xml.types.CmsXmlSerialDateValue;
072import org.opencms.xml.types.I_CmsXmlContentValue;
073import org.opencms.xml.types.I_CmsXmlContentValue.CmsSearchContentConfig;
074import org.opencms.xml.types.I_CmsXmlSchemaType;
075
076import java.util.ArrayList;
077import java.util.Collections;
078import java.util.HashMap;
079import java.util.HashSet;
080import java.util.LinkedHashMap;
081import java.util.List;
082import java.util.Locale;
083import java.util.Map;
084import java.util.Set;
085import java.util.function.Consumer;
086
087import org.apache.commons.logging.Log;
088
089import com.google.common.collect.Sets;
090
091/**
092 * Special document text extraction factory for Solr index.<p>
093 *
094 * @since 8.5.0
095 */
096public class CmsSolrDocumentXmlContent extends A_CmsVfsDocument {
097
098    /**
099     * The gallery name is determined by resolving the macros in a string which can either come from a field mapped
100     * to the gallery name, or the title, or from default values for those fields. This class is used to select the
101     * value to use and performs the macro substitution.
102     */
103    private static class GalleryNameChooser {
104
105        /** CMS context for this instance. */
106        private CmsObject m_cms;
107
108        /** Current XML content. */
109        private A_CmsXmlDocument m_content;
110
111        /** Default value of field mapped to gallery name. */
112        private String m_defaultGalleryNameValue;
113
114        /** Default value of field mapped to title. */
115        private String m_defaultTitleValue;
116
117        /** Current locale. */
118        private Locale m_locale;
119
120        /** Content value mapped to Description property. */
121        private String m_mappedDescriptionValue;
122
123        /** Content value mapped to gallery description. */
124        private String m_mappedGalleryDescriptionValue;
125
126        /** Content value mapped to gallery name. */
127        private String m_mappedGalleryNameValue;
128
129        /** Content value mapped to title. */
130        private String m_mappedTitleValue;
131
132        /**
133         * Creates a new instance.<p>
134         *
135         * @param cms the CMS context
136         * @param content the XML content
137         * @param locale the locale in the XML content
138         */
139        public GalleryNameChooser(CmsObject cms, A_CmsXmlDocument content, Locale locale) {
140
141            m_cms = cms;
142            m_content = content;
143            m_locale = locale;
144        }
145
146        /**
147         * Selects the description displayed in the gallery.<p>
148         *
149         * This method assumes that all the available values have been set via the setters of this class.
150         *
151         * @return the description
152         *
153         * @throws CmsException of something goes wrong
154         */
155        public String getDescription() throws CmsException {
156
157            return getDescription(m_locale);
158        }
159
160        /**
161            * Selects the description displayed in the gallery.<p>
162            *
163            * This method assumes that all the available values have been set via the setters of this class.
164            *
165            * @param locale the locale to get the description in
166            *
167            * @return the description
168            *
169            * @throws CmsException of something goes wrong
170            */
171        public String getDescription(Locale locale) throws CmsException {
172
173            String result = null;
174            for (String resultCandidateWithMacros : new String[] {
175                m_mappedGalleryDescriptionValue,
176                m_mappedDescriptionValue}) {
177                if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) {
178                    CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, locale);
179                    result = resolver.resolveMacros(resultCandidateWithMacros);
180                    return result;
181                }
182            }
183            result = m_cms.readPropertyObject(
184                m_content.getFile(),
185                CmsPropertyDefinition.PROPERTY_DESCRIPTION,
186                false).getValue();
187            return result;
188        }
189
190        /**
191         * Selects the gallery name.<p>
192         *
193         * This method assumes that all the available values have been set via the setters of this class.
194         *
195         * @return the gallery name
196         *
197         * @throws CmsException of something goes wrong
198         */
199        public String getGalleryName() throws CmsException {
200
201            return getGalleryName(m_locale);
202        }
203
204        /**
205        * Selects the gallery name.<p>
206        *
207        * This method assumes that all the available values have been set via the setters of this class.
208        *
209        * @param locale the locale to get the gallery name in
210        *
211        * @return the gallery name
212        *
213        * @throws CmsException of something goes wrong
214        */
215        public String getGalleryName(Locale locale) throws CmsException {
216
217            String result = null;
218            for (String resultCandidateWithMacros : new String[] {
219                // Prioritize gallery name over title, and actual content values over defaults
220                m_mappedGalleryNameValue,
221                m_defaultGalleryNameValue,
222                m_mappedTitleValue,
223                m_defaultTitleValue}) {
224                if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) {
225                    CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, locale);
226                    result = resolver.resolveMacros(resultCandidateWithMacros);
227                    return result;
228                }
229            }
230            result = m_cms.readPropertyObject(
231                m_content.getFile(),
232                CmsPropertyDefinition.PROPERTY_TITLE,
233                false).getValue();
234            return result;
235        }
236
237        /**
238         * Sets the defaultGalleryNameValue.<p>
239         *
240         * @param defaultGalleryNameValue the defaultGalleryNameValue to set
241         */
242        public void setDefaultGalleryNameValue(String defaultGalleryNameValue) {
243
244            m_defaultGalleryNameValue = defaultGalleryNameValue;
245        }
246
247        /**
248         * Sets the defaultTitleValue.<p>
249         *
250         * @param defaultTitleValue the defaultTitleValue to set
251         */
252        public void setDefaultTitleValue(String defaultTitleValue) {
253
254            m_defaultTitleValue = defaultTitleValue;
255        }
256
257        /**
258         * Sets the mapped description value.<p>
259         *
260         * @param mappedDescriptionValue the mappedDescriptionValue to set
261         */
262        public void setMappedDescriptionValue(String mappedDescriptionValue) {
263
264            m_mappedDescriptionValue = mappedDescriptionValue;
265        }
266
267        /**
268         * Sets the name from a value mapped via 'galleryDescription'.
269         *
270         * @param mappedGalleryDescriptionValue the value that has been mapped
271         */
272        public void setMappedGalleryDescriptionValue(String mappedGalleryDescriptionValue) {
273
274            m_mappedGalleryDescriptionValue = mappedGalleryDescriptionValue;
275        }
276
277        /**
278         * Sets the mappedGalleryNameValue.<p>
279         *
280         * @param mappedGalleryNameValue the mappedGalleryNameValue to set
281         */
282        public void setMappedGalleryNameValue(String mappedGalleryNameValue) {
283
284            m_mappedGalleryNameValue = mappedGalleryNameValue;
285        }
286
287        /**
288         * Sets the mappedTitleValue.<p>
289         *
290         * @param mappedTitleValue the mappedTitleValue to set
291         */
292        public void setMappedTitleValue(String mappedTitleValue) {
293
294            m_mappedTitleValue = mappedTitleValue;
295        }
296    }
297
298    /** Mapping name used to indicate that the value should be used for the gallery description. */
299    public static final String MAPPING_GALLERY_DESCRIPTION = "galleryDescription";
300
301    /** Mapping name used to indicate that the value should be used for the gallery name. */
302    public static final String MAPPING_GALLERY_NAME = "galleryName";
303
304    /** The log object for this class. */
305    private static final Log LOG = CmsLog.getLog(CmsSolrDocumentXmlContent.class);
306
307    /**
308     * Public constructor.<p>
309     *
310     * @param name the name for the document type
311     */
312    public CmsSolrDocumentXmlContent(String name) {
313
314        super(name);
315    }
316
317    /**
318     * Collects a list of all possible XPaths for a content definition.<p>
319     *
320     * @param cms the CMS context to use
321     * @param def the content definition
322     * @param path the path of the given content definition
323     * @param result the set used to collect the XPaths
324     */
325    public static void collectSchemaXpathsForSimpleValues(
326        CmsObject cms,
327        CmsXmlContentDefinition def,
328        String path,
329        Set<String> result) {
330
331        List<I_CmsXmlSchemaType> nestedTypes = def.getTypeSequence();
332        for (I_CmsXmlSchemaType nestedType : nestedTypes) {
333            String subPath = path + "/" + nestedType.getName();
334            if (nestedType instanceof CmsXmlNestedContentDefinition) {
335                CmsXmlContentDefinition nestedDef = ((CmsXmlNestedContentDefinition)nestedType).getNestedContentDefinition();
336                collectSchemaXpathsForSimpleValues(cms, nestedDef, subPath, result);
337            } else {
338                result.add(subPath);
339            }
340        }
341    }
342
343    /**
344     * Extracts the content of a single XML content resource.<p>
345     *
346     * @param cms the cms context
347     * @param resource the resource
348     * @param index the used index
349     *
350     * @return the extraction result
351     *
352     * @throws CmsException in case reading or unmarshalling the content fails
353     */
354    public static CmsExtractionResult extractXmlContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index)
355    throws CmsException {
356
357        return extractXmlContent(cms, resource, index, null);
358    }
359
360    /**
361     * Extracts the content of a single XML content resource.<p>
362     *
363     * @param cms the cms context
364     * @param resource the resource
365     * @param index the used index
366     * @param forceLocale if set, only the content values for the given locale will be extracted
367     *
368     * @return the extraction result
369     *
370     * @throws CmsException in case reading or unmarshalling the content fails
371     */
372    public static CmsExtractionResult extractXmlContent(
373        CmsObject cms,
374        CmsResource resource,
375        I_CmsSearchIndex index,
376        Locale forceLocale)
377    throws CmsException {
378
379        return extractXmlContent(cms, resource, index, forceLocale, null);
380    }
381
382    /**
383     * Extracts the content of a single XML content resource.<p>
384     *
385     * @param cms the cms context
386     * @param resource the resource
387     * @param index the used index
388     * @param forceLocale if set, only the content values for the given locale will be extracted
389     * @param alreadyExtracted keeps track of ids of contents which have already been extracted
390     *
391     * @return the extraction result
392     *
393     * @throws CmsException in case reading or unmarshalling the content fails
394     */
395    public static CmsExtractionResult extractXmlContent(
396        CmsObject cms,
397        CmsResource resource,
398        I_CmsSearchIndex index,
399        Locale forceLocale,
400        Set<CmsUUID> alreadyExtracted)
401    throws CmsException {
402
403        return extractXmlContent(cms, resource, index, forceLocale, alreadyExtracted, content -> {
404            /*do nothing with the content*/});
405
406    }
407
408    /**
409     * Extracts the content of a single XML content resource.<p>
410     *
411     * @param cms the cms context
412     * @param resource the resource
413     * @param index the used index
414     * @param forceLocale if set, only the content values for the given locale will be extracted
415     * @param alreadyExtracted keeps track of ids of contents which have already been extracted
416     * @param contentConsumer gets called with the unmarshalled content object
417     *
418     * @return the extraction result
419     *
420     * @throws CmsException in case reading or unmarshalling the content fails
421     */
422    public static CmsExtractionResult extractXmlContent(
423        CmsObject cms,
424        CmsResource resource,
425        I_CmsSearchIndex index,
426        Locale forceLocale,
427        Set<CmsUUID> alreadyExtracted,
428        Consumer<A_CmsXmlDocument> contentConsumer)
429    throws CmsException {
430
431        if (null == alreadyExtracted) {
432            alreadyExtracted = Collections.emptySet();
433        }
434        // un-marshal the content
435        CmsFile file = cms.readFile(resource);
436        if (file.getLength() <= 0) {
437            throw new CmsIndexNoContentException(
438                Messages.get().container(Messages.ERR_NO_CONTENT_1, resource.getRootPath()));
439        }
440        A_CmsXmlDocument xmlContent = CmsXmlContentFactory.unmarshal(cms, file);
441        if (contentConsumer != null) {
442            contentConsumer.accept(xmlContent);
443        }
444
445        // initialize some variables
446        Map<Locale, LinkedHashMap<String, String>> items = new HashMap<Locale, LinkedHashMap<String, String>>();
447        Map<String, String> fieldMappings = new HashMap<String, String>();
448        List<Locale> contentLocales = forceLocale != null
449        ? Collections.singletonList(forceLocale)
450        : xmlContent.getLocales();
451        Locale resourceLocale = index.getLocaleForResource(cms, resource, contentLocales);
452
453        LinkedHashMap<String, String> localeItems = null;
454        GalleryNameChooser galleryNameChooser = null;
455        // loop over the locales of the content
456        for (Locale locale : contentLocales) {
457            galleryNameChooser = new GalleryNameChooser(cms, xmlContent, locale);
458            localeItems = new LinkedHashMap<String, String>();
459            StringBuffer textContent = new StringBuffer();
460            // store the locales of the content as space separated field
461            // loop over the available element paths of the current content locale
462            List<String> paths = xmlContent.getNames(locale);
463            for (String xpath : paths) {
464
465                // try to get the value extraction for the current element path
466                String extracted = null;
467                I_CmsXmlContentValue value = xmlContent.getValue(xpath, locale);
468                try {
469                    //the new DatePointField.createField dose not support milliseconds
470                    if (value instanceof CmsXmlDateTimeValue) {
471                        extracted = CmsSearchUtil.getDateAsIso8601(((CmsXmlDateTimeValue)value).getDateTimeValue());
472                    } else {
473                        extracted = value.getPlainText(cms);
474                        if (CmsStringUtil.isEmptyOrWhitespaceOnly(extracted)
475                            && value.isSimpleType()
476                            && !(value instanceof CmsXmlHtmlValue)) {
477                            // no text value for simple type, so take the string value as item
478                            // prevent this for elements of type "OpenCmsHtml", since this causes problematic values
479                            // being indexed, e.g., <iframe ...></iframe>
480                            // TODO: Why is this special handling needed at all???
481                            extracted = value.getStringValue(cms);
482                        }
483                    }
484                } catch (Exception e) {
485                    // it can happen that a exception is thrown while extracting a single value
486                    LOG.warn(Messages.get().container(Messages.LOG_EXTRACT_VALUE_2, xpath, resource), e);
487                }
488
489                // put the extraction to the items and to the textual content
490                if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
491                    localeItems.put(xpath, extracted);
492                }
493                CmsSearchContentConfig searchContentConfig = xmlContent.getHandler().getSearchContentConfig(value);
494                switch (searchContentConfig.getSearchContentType()) {
495                    case TRUE:
496                        if (null != searchContentConfig.getAdjustmentClass()) {
497                            Class<I_CmsContentValueAdjustment> adjustmentClass;
498                            try {
499                                //We cast by purpose and catch the exception if we fail.
500                                adjustmentClass = (Class<I_CmsContentValueAdjustment>)Class.forName(
501                                    searchContentConfig.getAdjustmentClass());
502                                I_CmsContentValueAdjustment adjustment = adjustmentClass.getConstructor().newInstance();
503                                String adjustedValue = adjustment.getAdjustedValue(
504                                    cms,
505                                    xmlContent,
506                                    locale,
507                                    xpath,
508                                    extracted);
509                                if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(adjustedValue)) {
510                                    textContent.append(adjustedValue);
511                                    textContent.append('\n');
512                                }
513                                break;
514                            } catch (Throwable t) {
515                                String logMessage = "Cannot adjust value via configured class in searchsetting for \""
516                                    + value.getPath()
517                                    + "\" in content \""
518                                    + resource.getRootPath()
519                                    + "\". Using the unadjusted value.";
520                                if (LOG.isDebugEnabled()) {
521                                    LOG.debug(logMessage, t);
522                                } else {
523                                    LOG.error(logMessage);
524                                }
525                            }
526                        }
527                        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
528                            textContent.append(extracted);
529                            textContent.append('\n');
530                        }
531                        break;
532                    case CONTENT:
533                        // TODO: Potentially extend to allow for indexing of non-xml-contents as well.
534                        String potentialLinkValue = value.getStringValue(cms);
535                        try {
536                            if ((null != potentialLinkValue)
537                                && !potentialLinkValue.isEmpty()
538                                && cms.existsResource(potentialLinkValue)) {
539                                CmsResource linkedRes = cms.readResource(potentialLinkValue);
540                                if (CmsResourceTypeXmlContent.isXmlContent(linkedRes)
541                                    && !alreadyExtracted.contains(linkedRes.getStructureId())) {
542                                    Set<CmsUUID> newAlreadyExtracted = new HashSet<>(alreadyExtracted);
543                                    newAlreadyExtracted.add(resource.getStructureId());
544                                    I_CmsExtractionResult exRes = CmsSolrDocumentXmlContent.extractXmlContent(
545                                        cms,
546                                        linkedRes,
547                                        index,
548                                        locale,
549                                        newAlreadyExtracted);
550                                    String exContent = exRes.getContent(locale);
551                                    if ((exContent != null) && !exContent.trim().isEmpty()) {
552                                        textContent.append(exContent.trim());
553                                        textContent.append('\n');
554                                        break; // Success - we break here to not repeatedly programm a warning.
555                                    }
556                                }
557                            }
558                            if (LOG.isInfoEnabled()) {
559                                LOG.info(
560                                    "When indexing resource "
561                                        + resource.getRootPath()
562                                        + ", the elements value "
563                                        + value.getPath()
564                                        + " in locale "
565                                        + locale
566                                        + " does not contain a link to an XML content. Hence, the linked element's content is not added to the content indexed for the resource itself.");
567                            }
568                        } catch (Throwable t) {
569                            LOG.error(
570                                "Failed to add content of resource (site path) "
571                                    + potentialLinkValue
572                                    + " to content of resource (root path) "
573                                    + resource.getRootPath()
574                                    + " when indexing the resource for locale "
575                                    + locale
576                                    + ". Skipping this content part.",
577                                t);
578                        }
579                        break;
580                    default:
581                        // we do not index the content element for the content field.
582                        break;
583                }
584
585                List<String> mappings = xmlContent.getHandler().getMappings(value.getPath());
586                if (mappings.size() > 0) {
587                    // mappings are defined, lets check if we have mappings that interest us
588                    for (String mapping : mappings) {
589                        if (mapping.startsWith(I_CmsXmlContentHandler.MAPTO_PROPERTY)) {
590                            // this is a property mapping
591                            String propertyName = mapping.substring(mapping.lastIndexOf(':') + 1);
592                            if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)
593                                || CmsPropertyDefinition.PROPERTY_DESCRIPTION.equals(propertyName)) {
594
595                                if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
596                                    if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)) {
597                                        galleryNameChooser.setMappedTitleValue(extracted);
598                                    } else {
599                                        // if field is not title, it must be description
600                                        galleryNameChooser.setMappedDescriptionValue(extracted);
601                                    }
602                                }
603                            }
604                        } else if (mapping.equals(MAPPING_GALLERY_NAME)) {
605                            galleryNameChooser.setMappedGalleryNameValue(value.getPlainText(cms));
606                        } else if (mapping.equals(MAPPING_GALLERY_DESCRIPTION)) {
607                            galleryNameChooser.setMappedGalleryDescriptionValue(value.getPlainText(cms));
608                        }
609                    }
610                }
611                if (value instanceof CmsXmlSerialDateValue) {
612                    if ((null != extracted) && !extracted.isEmpty()) {
613                        I_CmsSerialDateValue serialDateValue = new CmsSerialDateValue(extracted);
614                        I_CmsSerialDateBean serialDateBean = CmsSerialDateBeanFactory.createSerialDateBean(
615                            serialDateValue);
616                        if (null != serialDateBean) {
617                            StringBuffer values = new StringBuffer();
618                            StringBuffer endValues = new StringBuffer();
619                            StringBuffer currentTillValues = new StringBuffer();
620                            for (Long eventDate : serialDateBean.getDatesAsLong()) {
621                                values.append("\n").append(eventDate.toString());
622                                long endDate = null != serialDateBean.getEventDuration()
623                                ? eventDate.longValue() + serialDateBean.getEventDuration().longValue()
624                                : eventDate.longValue();
625                                endValues.append("\n").append(Long.toString(endDate));
626                                // Special treatment for events that end at 00:00:
627                                // To not show them at the day after they ended, one millisecond is removed from the end time
628                                // for the "currenttill"-time
629                                currentTillValues.append("\n").append(
630                                    serialDateValue.isCurrentTillEnd()
631                                    ? Long.valueOf(
632                                        serialDateValue.endsAtMidNight() && (endDate > eventDate.longValue())
633                                        ? endDate - 1L
634                                        : endDate)
635                                    : eventDate);
636                            }
637                            fieldMappings.put(CmsSearchField.FIELD_SERIESDATES, values.substring(1));
638                            fieldMappings.put(CmsSearchField.FIELD_SERIESDATES_END, endValues.substring(1));
639                            fieldMappings.put(
640                                CmsSearchField.FIELD_SERIESDATES_CURRENT_TILL,
641                                currentTillValues.substring(1));
642                            fieldMappings.put(
643                                CmsSearchField.FIELD_SERIESDATES_TYPE,
644                                serialDateValue.getDateType().toString());
645                        } else {
646                            LOG.warn(
647                                "Serial date value \""
648                                    + value.getStringValue(cms)
649                                    + "\" at element \""
650                                    + value.getPath()
651                                    + "\" is invalid. No dates are indexed for resource \""
652                                    + resource.getRootPath()
653                                    + "\".");
654                        }
655                    }
656                }
657            }
658
659            Set<String> xpaths = Sets.newHashSet();
660            collectSchemaXpathsForSimpleValues(cms, xmlContent.getContentDefinition(), "", xpaths);
661            for (String xpath : xpaths) {
662                // mappings always are stored with indexes, so we add them to the xpath
663                List<String> mappings = xmlContent.getHandler().getMappings(CmsXmlUtils.createXpath(xpath, 1));
664                for (String mapping : mappings) {
665
666                    if (mapping.equals(MAPPING_GALLERY_NAME)
667                        || mapping.equals(
668                            I_CmsXmlContentHandler.MAPTO_PROPERTY + CmsPropertyDefinition.PROPERTY_TITLE)) {
669                        String defaultValue = xmlContent.getHandler().getDefault(
670                            cms,
671                            xmlContent.getFile(),
672                            null,
673                            xpath,
674                            locale);
675                        if (mapping.equals(MAPPING_GALLERY_NAME)) {
676                            galleryNameChooser.setDefaultGalleryNameValue(defaultValue);
677                        } else {
678                            galleryNameChooser.setDefaultTitleValue(defaultValue);
679                        }
680                    }
681                }
682            }
683
684            final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName(
685                CmsSearchField.FIELD_TITLE_UNSTORED,
686                locale) + "_s";
687            final String galleryNameValue = galleryNameChooser.getGalleryName();
688            fieldMappings.put(galleryTitleFieldKey, galleryNameValue);
689            fieldMappings.put(
690                CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_DESCRIPTION, locale) + "_s",
691                galleryNameChooser.getDescription());
692
693            // handle the textual content
694            if (textContent.length() > 0) {
695                // add the textual content with a localized key to the items
696                //String key = CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale);
697                //items.put(key, textContent.toString());
698                // use the default locale of this resource as general text content for the extraction result
699                localeItems.put(I_CmsExtractionResult.ITEM_CONTENT, textContent.toString());
700            }
701            items.put(locale, localeItems);
702        }
703        // if the content is locale independent, it should have only one content locale, but that should be indexed for all available locales.
704        // TODO: One could think of different indexing behavior, i.e., index only for getDefaultLocales(cms,resource)
705        //       But using getAvailableLocales(cms,resource) does not work, because locale-available is set to "en" for all that content.
706        if ((xmlContent instanceof CmsXmlContent) && ((CmsXmlContent)xmlContent).isLocaleIndependent()) {
707            if (forceLocale != null) {
708                items.put(forceLocale, localeItems);
709            } else {
710                for (Locale l : OpenCms.getLocaleManager().getAvailableLocales()) {
711                    items.put(l, localeItems);
712                    if (null != galleryNameChooser) {
713                        final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName(
714                            CmsSearchField.FIELD_TITLE_UNSTORED,
715                            l) + "_s";
716                        fieldMappings.put(galleryTitleFieldKey, galleryNameChooser.getGalleryName(l));
717                        fieldMappings.put(
718                            CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_DESCRIPTION, l)
719                                + "_s",
720                            galleryNameChooser.getDescription(l));
721                    }
722                }
723            }
724        }
725        // add the locales that have been indexed for this document as item and return the extraction result
726        // fieldMappings.put(CmsSearchField.FIELD_RESOURCE_LOCALES, locales.toString().trim());
727        return new CmsExtractionResult(resourceLocale, items, fieldMappings);
728
729    }
730
731    /**
732     * @see org.opencms.search.documents.CmsDocumentXmlContent#extractContent(org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.I_CmsSearchIndex)
733     */
734    @Override
735    public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index)
736    throws CmsException {
737
738        logContentExtraction(resource, index);
739
740        try {
741            I_CmsExtractionResult result = null;
742            List<I_CmsExtractionResult> ex = new ArrayList<I_CmsExtractionResult>();
743            for (CmsResource detailContainers : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) {
744                CmsSolrDocumentContainerPage containerpageExtractor = new CmsSolrDocumentContainerPage("");
745                String localeTemp = detailContainers.getRootPath();
746                localeTemp = CmsResource.getParentFolder(localeTemp);
747                localeTemp = CmsResource.getName(localeTemp);
748                localeTemp = localeTemp.substring(0, localeTemp.length() - 1);
749                Locale locale = CmsLocaleManager.getLocale(localeTemp);
750                if (CmsDetailOnlyContainerUtil.useSingleLocaleDetailContainers(
751                    OpenCms.getSiteManager().getSiteRoot(resource.getRootPath()))
752                    && locale.equals(CmsLocaleManager.getDefaultLocale())) {
753                    // in case of single locale detail containers do not force the locale
754                    locale = null;
755                }
756                I_CmsExtractionResult containersExtractionResult = containerpageExtractor.extractContent(
757                    cms,
758                    detailContainers,
759                    index,
760                    locale);
761                // only use the locales of the resource itself, not the ones of the detail containers page
762                containersExtractionResult.getContentItems().remove(CmsSearchField.FIELD_RESOURCE_LOCALES);
763
764                ex.add(containersExtractionResult);
765            }
766            result = extractXmlContent(cms, resource, index);
767            result = result.merge(ex);
768            return result;
769
770        } catch (Throwable t) {
771            throw new CmsIndexException(Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource), t);
772        }
773    }
774
775    /**
776     * Solr index content is stored in multiple languages, so the result is NOT locale dependent.<p>
777     *
778     * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend()
779     */
780    public boolean isLocaleDependend() {
781
782        return false;
783    }
784
785    /**
786     * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache()
787     */
788    @Override
789    public boolean isOnlyDependentOnContent() {
790
791        return false;
792    }
793
794    /**
795     * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache()
796     */
797    public boolean isUsingCache() {
798
799        return false;
800    }
801}