001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search.solr; 029 030import org.opencms.ade.configuration.CmsADEConfigData; 031import org.opencms.ade.configuration.CmsFormatterUtils; 032import org.opencms.file.CmsFile; 033import org.opencms.file.CmsObject; 034import org.opencms.file.CmsResource; 035import org.opencms.main.CmsException; 036import org.opencms.main.CmsLog; 037import org.opencms.main.OpenCms; 038import org.opencms.search.CmsIndexException; 039import org.opencms.search.I_CmsSearchIndex; 040import org.opencms.search.documents.Messages; 041import org.opencms.search.extractors.CmsExtractionResult; 042import org.opencms.search.extractors.I_CmsExtractionResult; 043import org.opencms.xml.containerpage.CmsContainerBean; 044import org.opencms.xml.containerpage.CmsContainerElementBean; 045import org.opencms.xml.containerpage.CmsContainerPageBean; 046import org.opencms.xml.containerpage.CmsFormatterConfiguration; 047import org.opencms.xml.containerpage.CmsXmlContainerPage; 048import org.opencms.xml.containerpage.CmsXmlContainerPageFactory; 049import org.opencms.xml.containerpage.I_CmsFormatterBean; 050 051import java.util.ArrayList; 052import java.util.HashMap; 053import java.util.LinkedHashMap; 054import java.util.List; 055import java.util.Locale; 056import java.util.Map; 057 058import org.apache.commons.logging.Log; 059 060/** 061 * Lucene document factory class to extract index data from a resource 062 * of type <code>CmsResourceTypeContainerPage</code>.<p> 063 * 064 * @since 8.5.0 065 */ 066public class CmsSolrDocumentContainerPage extends CmsSolrDocumentXmlContent { 067 068 /** The log object for this class. */ 069 private static final Log LOG = CmsLog.getLog(CmsSolrDocumentContainerPage.class); 070 071 /** The solr document type name for xml-contents. */ 072 public static final String TYPE_CONTAINERPAGE_SOLR = "containerpage-solr"; 073 074 /** 075 * Creates a new instance of this lucene document factory.<p> 076 * 077 * @param name name of the document type 078 */ 079 public CmsSolrDocumentContainerPage(String name) { 080 081 super(name); 082 } 083 084 /** 085 * Returns the raw text content of a VFS resource of type <code>CmsResourceTypeContainerPage</code>.<p> 086 * 087 * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, I_CmsSearchIndex) 088 */ 089 @Override 090 public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index) 091 throws CmsException { 092 093 return extractContent(cms, resource, index, null); 094 } 095 096 /** 097 * Extracts the content of a given index resource according to the resource file type and the 098 * configuration of the given index.<p> 099 * 100 * @param cms the cms object 101 * @param resource the resource to extract the content from 102 * @param index the index to extract the content for 103 * @param forceLocale if set, only the content values for the given locale will be extracted 104 * 105 * @return the extracted content of the resource 106 * 107 * @throws CmsException if something goes wrong 108 */ 109 public I_CmsExtractionResult extractContent( 110 CmsObject cms, 111 CmsResource resource, 112 I_CmsSearchIndex index, 113 Locale forceLocale) 114 throws CmsException { 115 116 logContentExtraction(resource, index); 117 I_CmsExtractionResult ex = null; 118 try { 119 CmsFile file = readFile(cms, resource); 120 CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file); 121 122 List<I_CmsExtractionResult> all = new ArrayList<I_CmsExtractionResult>(); 123 CmsContainerPageBean containerBean = containerPage.getContainerPage(cms); 124 if (containerBean != null) { 125 for (Map.Entry<String, CmsContainerBean> entry : containerBean.getContainers().entrySet()) { 126 String containerName = entry.getKey(); 127 for (CmsContainerElementBean element : entry.getValue().getElements()) { 128 // check all elements in this container 129 // get the formatter configuration for this element 130 try { 131 element.initResource(cms); 132 CmsResource elementResource = element.getResource(); 133 if (!(cms.readProject(index.getProject()).isOnlineProject() 134 && elementResource.isExpired(System.currentTimeMillis()))) { 135 CmsADEConfigData adeConfig = OpenCms.getADEManager().lookupConfigurationWithCache( 136 cms, 137 file.getRootPath()); 138 CmsFormatterConfiguration formatters = adeConfig.getFormatters( 139 cms, 140 element.getResource()); 141 boolean shouldExtractElement = false; 142 if ((formatters != null) 143 && (element.getFormatterId() != null) 144 && (formatters.isSearchContent(element.getFormatterId()) 145 || adeConfig.isSearchContentFormatter(element.getFormatterId()))) { 146 // the content of this element must be included for the container page 147 shouldExtractElement = true; 148 } else if (formatters != null) { 149 String key = CmsFormatterUtils.getFormatterKey(containerName, element); 150 I_CmsFormatterBean formatter = adeConfig.findFormatter(key); 151 if (formatter != null) { 152 shouldExtractElement = formatter.isSearchContent(); 153 } 154 } 155 if (LOG.isDebugEnabled()) { 156 LOG.debug( 157 "Should extract element " 158 + element.getResource().getRootPath() 159 + ": " 160 + shouldExtractElement); 161 } 162 if (shouldExtractElement) { 163 all.add( 164 CmsSolrDocumentXmlContent.extractXmlContent( 165 cms, 166 elementResource, 167 index, 168 forceLocale)); 169 } 170 171 } 172 } catch (Exception e) { 173 LOG.debug( 174 Messages.get().getBundle().key( 175 Messages.LOG_SKIPPING_CONTAINERPAGE_ELEMENT_WITH_UNREADABLE_RESOURCE_2, 176 file.getRootPath(), 177 element.getId()), 178 e); 179 } 180 } 181 } 182 } 183 // we have to overwrite the resource and content locales with the one from this container page 184 // TODO: Is this really the wanted behavior? It seems to be done like this before. 185 Map<String, String> fieldMappings = new HashMap<String, String>(1); 186 // Add to each container page the contents in all available locales, 187 // in case one containerpage is used in multiple languages. 188 List<Locale> localesAvailable = OpenCms.getLocaleManager().getAvailableLocales(cms, resource); 189 Map<Locale, LinkedHashMap<String, String>> multilingualValues = new HashMap<Locale, LinkedHashMap<String, String>>( 190 localesAvailable.size()); 191 for (Locale localeAvailable : localesAvailable) { 192 multilingualValues.put(localeAvailable, new LinkedHashMap<String, String>()); 193 } 194 Locale locale = forceLocale != null 195 ? forceLocale 196 : index.getLocaleForResource(cms, resource, containerPage.getLocales()); 197 ex = new CmsExtractionResult(locale, multilingualValues, fieldMappings); 198 ex = ex.merge(all); 199 return ex; 200 } catch (Exception e) { 201 throw new CmsIndexException( 202 Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), 203 e); 204 } 205 } 206 207 /** 208 * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend() 209 */ 210 @Override 211 public boolean isLocaleDependend() { 212 213 return true; 214 } 215 216}