001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.solr;
029
030import org.opencms.ade.configuration.CmsADEConfigData;
031import org.opencms.ade.configuration.CmsFormatterUtils;
032import org.opencms.file.CmsFile;
033import org.opencms.file.CmsObject;
034import org.opencms.file.CmsResource;
035import org.opencms.main.CmsException;
036import org.opencms.main.CmsLog;
037import org.opencms.main.OpenCms;
038import org.opencms.search.CmsIndexException;
039import org.opencms.search.I_CmsSearchIndex;
040import org.opencms.search.documents.Messages;
041import org.opencms.search.extractors.CmsExtractionResult;
042import org.opencms.search.extractors.I_CmsExtractionResult;
043import org.opencms.xml.containerpage.CmsContainerBean;
044import org.opencms.xml.containerpage.CmsContainerElementBean;
045import org.opencms.xml.containerpage.CmsContainerPageBean;
046import org.opencms.xml.containerpage.CmsFormatterConfiguration;
047import org.opencms.xml.containerpage.CmsXmlContainerPage;
048import org.opencms.xml.containerpage.CmsXmlContainerPageFactory;
049import org.opencms.xml.containerpage.I_CmsFormatterBean;
050
051import java.util.ArrayList;
052import java.util.HashMap;
053import java.util.LinkedHashMap;
054import java.util.List;
055import java.util.Locale;
056import java.util.Map;
057
058import org.apache.commons.logging.Log;
059
060/**
061 * Lucene document factory class to extract index data from a resource
062 * of type <code>CmsResourceTypeContainerPage</code>.<p>
063 *
064 * @since 8.5.0
065 */
066public class CmsSolrDocumentContainerPage extends CmsSolrDocumentXmlContent {
067
068    /** The log object for this class. */
069    private static final Log LOG = CmsLog.getLog(CmsSolrDocumentContainerPage.class);
070
071    /** The solr document type name for xml-contents. */
072    public static final String TYPE_CONTAINERPAGE_SOLR = "containerpage-solr";
073
074    /**
075     * Creates a new instance of this lucene document factory.<p>
076     *
077     * @param name name of the document type
078     */
079    public CmsSolrDocumentContainerPage(String name) {
080
081        super(name);
082    }
083
084    /**
085     * Returns the raw text content of a VFS resource of type <code>CmsResourceTypeContainerPage</code>.<p>
086     *
087     * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, I_CmsSearchIndex)
088     */
089    @Override
090    public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index)
091    throws CmsException {
092
093        return extractContent(cms, resource, index, null);
094    }
095
096    /**
097     * Extracts the content of a given index resource according to the resource file type and the
098     * configuration of the given index.<p>
099     *
100     * @param cms the cms object
101     * @param resource the resource to extract the content from
102     * @param index the index to extract the content for
103     * @param forceLocale if set, only the content values for the given locale will be extracted
104     *
105     * @return the extracted content of the resource
106     *
107     * @throws CmsException if something goes wrong
108     */
109    public I_CmsExtractionResult extractContent(
110        CmsObject cms,
111        CmsResource resource,
112        I_CmsSearchIndex index,
113        Locale forceLocale)
114    throws CmsException {
115
116        logContentExtraction(resource, index);
117        I_CmsExtractionResult ex = null;
118        try {
119            CmsFile file = readFile(cms, resource);
120            CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file);
121
122            List<I_CmsExtractionResult> all = new ArrayList<I_CmsExtractionResult>();
123            CmsContainerPageBean containerBean = containerPage.getContainerPage(cms);
124            if (containerBean != null) {
125                for (Map.Entry<String, CmsContainerBean> entry : containerBean.getContainers().entrySet()) {
126                    String containerName = entry.getKey();
127                    for (CmsContainerElementBean element : entry.getValue().getElements()) {
128                        // check all elements in this container
129                        // get the formatter configuration for this element
130                        try {
131                            element.initResource(cms);
132                            CmsResource elementResource = element.getResource();
133                            if (!(cms.readProject(index.getProject()).isOnlineProject()
134                                && elementResource.isExpired(System.currentTimeMillis()))) {
135                                CmsADEConfigData adeConfig = OpenCms.getADEManager().lookupConfigurationWithCache(
136                                    cms,
137                                    file.getRootPath());
138                                CmsFormatterConfiguration formatters = adeConfig.getFormatters(
139                                    cms,
140                                    element.getResource());
141                                boolean shouldExtractElement = false;
142                                if ((formatters != null)
143                                    && (element.getFormatterId() != null)
144                                    && (formatters.isSearchContent(element.getFormatterId())
145                                        || adeConfig.isSearchContentFormatter(element.getFormatterId()))) {
146                                    // the content of this element must be included for the container page
147                                    shouldExtractElement = true;
148                                } else if (formatters != null) {
149                                    String key = CmsFormatterUtils.getFormatterKey(containerName, element);
150                                    I_CmsFormatterBean formatter = adeConfig.findFormatter(key);
151                                    if (formatter != null) {
152                                        shouldExtractElement = formatter.isSearchContent();
153                                    }
154                                }
155                                if (LOG.isDebugEnabled()) {
156                                    LOG.debug(
157                                        "Should extract element "
158                                            + element.getResource().getRootPath()
159                                            + ": "
160                                            + shouldExtractElement);
161                                }
162                                if (shouldExtractElement) {
163                                    all.add(
164                                        CmsSolrDocumentXmlContent.extractXmlContent(
165                                            cms,
166                                            elementResource,
167                                            index,
168                                            forceLocale));
169                                }
170
171                            }
172                        } catch (Exception e) {
173                            LOG.debug(
174                                Messages.get().getBundle().key(
175                                    Messages.LOG_SKIPPING_CONTAINERPAGE_ELEMENT_WITH_UNREADABLE_RESOURCE_2,
176                                    file.getRootPath(),
177                                    element.getId()),
178                                e);
179                        }
180                    }
181                }
182            }
183            // we have to overwrite the resource and content locales with the one from this container page
184            // TODO: Is this really the wanted behavior? It seems to be done like this before.
185            Map<String, String> fieldMappings = new HashMap<String, String>(1);
186            // Add to each container page the contents in all available locales,
187            // in case one containerpage is used in multiple languages.
188            List<Locale> localesAvailable = OpenCms.getLocaleManager().getAvailableLocales(cms, resource);
189            Map<Locale, LinkedHashMap<String, String>> multilingualValues = new HashMap<Locale, LinkedHashMap<String, String>>(
190                localesAvailable.size());
191            for (Locale localeAvailable : localesAvailable) {
192                multilingualValues.put(localeAvailable, new LinkedHashMap<String, String>());
193            }
194            Locale locale = forceLocale != null
195            ? forceLocale
196            : index.getLocaleForResource(cms, resource, containerPage.getLocales());
197            ex = new CmsExtractionResult(locale, multilingualValues, fieldMappings);
198            ex = ex.merge(all);
199            return ex;
200        } catch (Exception e) {
201            throw new CmsIndexException(
202                Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()),
203                e);
204        }
205    }
206
207    /**
208     * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend()
209     */
210    @Override
211    public boolean isLocaleDependend() {
212
213        return true;
214    }
215
216    /**
217     * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache()
218     */
219    @Override
220    public boolean isUsingCache() {
221
222        return true;
223    }
224}