001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.documents;
029
030import org.opencms.ade.configuration.CmsADEConfigData;
031import org.opencms.ade.configuration.CmsFormatterUtils;
032import org.opencms.file.CmsFile;
033import org.opencms.file.CmsObject;
034import org.opencms.file.CmsResource;
035import org.opencms.main.CmsException;
036import org.opencms.main.CmsLog;
037import org.opencms.main.OpenCms;
038import org.opencms.search.CmsIndexException;
039import org.opencms.search.I_CmsSearchDocument;
040import org.opencms.search.I_CmsSearchIndex;
041import org.opencms.search.extractors.CmsExtractionResult;
042import org.opencms.search.extractors.I_CmsExtractionResult;
043import org.opencms.util.CmsStringUtil;
044import org.opencms.xml.A_CmsXmlDocument;
045import org.opencms.xml.containerpage.CmsContainerBean;
046import org.opencms.xml.containerpage.CmsContainerElementBean;
047import org.opencms.xml.containerpage.CmsContainerPageBean;
048import org.opencms.xml.containerpage.CmsFormatterConfiguration;
049import org.opencms.xml.containerpage.CmsXmlContainerPage;
050import org.opencms.xml.containerpage.CmsXmlContainerPageFactory;
051import org.opencms.xml.containerpage.I_CmsFormatterBean;
052import org.opencms.xml.content.CmsXmlContentFactory;
053import org.opencms.xml.types.I_CmsXmlContentValue;
054
055import java.util.LinkedHashMap;
056import java.util.List;
057import java.util.Locale;
058import java.util.Map;
059
060import org.apache.commons.logging.Log;
061
062/**
063 * Lucene document factory class to extract index data from a resource
064 * of type <code>CmsResourceTypeContainerPage</code>.<p>
065 *
066 * @since 8.0
067 */
068public class CmsDocumentContainerPage extends A_CmsVfsDocument {
069
070    /** The log object for this class. */
071    private static final Log LOG = CmsLog.getLog(CmsDocumentContainerPage.class);
072
073    /**
074     * Creates a new instance of this lucene document factory.<p>
075     *
076     * @param name name of the document type
077     */
078    public CmsDocumentContainerPage(String name) {
079
080        super(name);
081    }
082
083    /**
084     * Generates a new lucene document instance from contents of the given resource for the provided index.<p>
085     *
086     * For container pages, we must not cache based on the container page content age,
087     * since the content of the included elements may change any time.
088     */
089    @Override
090    public I_CmsSearchDocument createDocument(CmsObject cms, CmsResource resource, I_CmsSearchIndex index)
091    throws CmsException {
092
093        // extract the content from the resource
094        I_CmsExtractionResult content = null;
095
096        if (index.isExtractingContent()) {
097            // do full text content extraction only if required
098
099            try {
100                content = extractContent(cms, resource, index);
101            } catch (Exception e) {
102                // text extraction failed for document - continue indexing meta information only
103                LOG.error(Messages.get().getBundle().key(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e);
104            }
105        }
106
107        // create the Lucene document according to the index field configuration
108        return index.getFieldConfiguration().createDocument(cms, resource, index, content);
109    }
110
111    /**
112     * Returns the raw text content of a VFS resource of type <code>CmsResourceTypeContainerPage</code>.<p>
113     *
114     * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, I_CmsSearchIndex)
115     */
116    public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index)
117    throws CmsException {
118
119        logContentExtraction(resource, index);
120        try {
121            CmsFile file = readFile(cms, resource);
122            CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file);
123            Locale locale = index.getLocaleForResource(cms, resource, null);
124
125            // initialize return values
126            StringBuffer content = new StringBuffer();
127            LinkedHashMap<String, String> items = new LinkedHashMap<String, String>();
128
129            CmsContainerPageBean containerBean = containerPage.getContainerPage(cms);
130            for (Map.Entry<String, CmsContainerBean> entry : containerBean.getContainers().entrySet()) {
131                for (CmsContainerElementBean element : entry.getValue().getElements()) {
132                    // check all elements in this container
133
134                    // get the formatter configuration for this element
135                    element.initResource(cms);
136                    CmsADEConfigData adeConfig = OpenCms.getADEManager().lookupConfigurationWithCache(
137                        cms,
138                        file.getRootPath());
139                    CmsFormatterConfiguration formatters = adeConfig.getFormatters(cms, element.getResource());
140
141                    boolean foundFormatterWithSearchContentByKey = false;
142                    String formatterKey = CmsFormatterUtils.getFormatterKey(entry.getValue().getName(), element);
143                    if (formatterKey != null) {
144                        I_CmsFormatterBean formatter = adeConfig.findFormatter(formatterKey);
145                        if (formatter != null) {
146                            foundFormatterWithSearchContentByKey = true;
147                        }
148                    }
149                    if (foundFormatterWithSearchContentByKey
150                        || formatters.isSearchContent(element.getFormatterId())
151                        || adeConfig.isSearchContentFormatter(element.getFormatterId())) {
152                        // the content of this element must be included for the container page
153
154                        element.initResource(cms);
155                        CmsFile elementFile = readFile(cms, element.getResource());
156                        A_CmsXmlDocument elementContent = CmsXmlContentFactory.unmarshal(cms, elementFile);
157                        List<String> elementNames = elementContent.getNames(locale);
158                        for (String xpath : elementNames) {
159                            // xpath will have the form "Text[1]" or "Nested[1]/Text[1]"
160                            I_CmsXmlContentValue value = elementContent.getValue(xpath, locale);
161                            if (value.getContentDefinition().getContentHandler().isSearchable(value)) {
162                                // the content value is searchable
163                                String extracted = value.getPlainText(cms);
164                                if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) {
165                                    items.put(elementFile.getRootPath() + "/" + xpath, extracted);
166                                    content.append(extracted);
167                                    content.append('\n');
168                                }
169                            }
170                        }
171                    }
172                }
173            }
174
175            return new CmsExtractionResult(content.toString(), items);
176
177        } catch (Exception e) {
178            throw new CmsIndexException(
179                Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()),
180                e);
181        }
182    }
183
184    /**
185     * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend()
186     */
187    public boolean isLocaleDependend() {
188
189        return true;
190    }
191
192    /**
193     * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache()
194     */
195    public boolean isUsingCache() {
196
197        return true;
198    }
199}