001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.solr; 033 034import org.opencms.acacia.shared.I_CmsSerialDateValue; 035import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 036import org.opencms.file.CmsFile; 037import org.opencms.file.CmsObject; 038import org.opencms.file.CmsPropertyDefinition; 039import org.opencms.file.CmsResource; 040import org.opencms.file.types.CmsResourceTypeXmlContent; 041import org.opencms.i18n.CmsLocaleManager; 042import org.opencms.main.CmsException; 043import org.opencms.main.CmsLog; 044import org.opencms.main.OpenCms; 045import org.opencms.search.CmsIndexException; 046import org.opencms.search.CmsSearchUtil; 047import org.opencms.search.I_CmsSearchIndex; 048import org.opencms.search.documents.A_CmsVfsDocument; 049import org.opencms.search.documents.CmsIndexNoContentException; 050import org.opencms.search.documents.Messages; 051import org.opencms.search.extractors.CmsExtractionResult; 052import org.opencms.search.extractors.I_CmsExtractionResult; 053import org.opencms.search.fields.CmsSearchField; 054import org.opencms.search.fields.CmsSearchFieldConfiguration; 055import org.opencms.search.galleries.CmsGalleryNameMacroResolver; 056import org.opencms.util.CmsStringUtil; 057import org.opencms.util.CmsUUID; 058import org.opencms.widgets.serialdate.CmsSerialDateBeanFactory; 059import org.opencms.widgets.serialdate.CmsSerialDateValue; 060import org.opencms.widgets.serialdate.I_CmsSerialDateBean; 061import org.opencms.xml.A_CmsXmlDocument; 062import org.opencms.xml.CmsXmlContentDefinition; 063import org.opencms.xml.CmsXmlUtils; 064import org.opencms.xml.content.CmsXmlContent; 065import org.opencms.xml.content.CmsXmlContentFactory; 066import org.opencms.xml.content.I_CmsXmlContentHandler; 067import org.opencms.xml.types.CmsXmlDateTimeValue; 068import org.opencms.xml.types.CmsXmlHtmlValue; 069import org.opencms.xml.types.CmsXmlNestedContentDefinition; 070import org.opencms.xml.types.CmsXmlSerialDateValue; 071import org.opencms.xml.types.I_CmsXmlContentValue; 072import org.opencms.xml.types.I_CmsXmlSchemaType; 073 074import java.util.ArrayList; 075import java.util.Collections; 076import java.util.HashMap; 077import java.util.HashSet; 078import java.util.LinkedHashMap; 079import java.util.List; 080import java.util.Locale; 081import java.util.Map; 082import java.util.Set; 083import java.util.function.Consumer; 084 085import org.apache.commons.logging.Log; 086 087import com.google.common.collect.Sets; 088 089/** 090 * Special document text extraction factory for Solr index.<p> 091 * 092 * @since 8.5.0 093 */ 094public class CmsSolrDocumentXmlContent extends A_CmsVfsDocument { 095 096 /** 097 * The gallery name is determined by resolving the macros in a string which can either come from a field mapped 098 * to the gallery name, or the title, or from default values for those fields. This class is used to select the 099 * value to use and performs the macro substitution. 100 */ 101 private static class GalleryNameChooser { 102 103 /** CMS context for this instance. */ 104 private CmsObject m_cms; 105 106 /** Current XML content. */ 107 private A_CmsXmlDocument m_content; 108 109 /** Default value of field mapped to gallery name. */ 110 private String m_defaultGalleryNameValue; 111 112 /** Default value of field mapped to title. */ 113 private String m_defaultTitleValue; 114 115 /** Current locale. */ 116 private Locale m_locale; 117 118 /** Content value mapped to Description property. */ 119 private String m_mappedDescriptionValue; 120 121 /** Content value mapped to gallery description. */ 122 private String m_mappedGalleryDescriptionValue; 123 124 /** Content value mapped to gallery name. */ 125 private String m_mappedGalleryNameValue; 126 127 /** Content value mapped to title. */ 128 private String m_mappedTitleValue; 129 130 /** 131 * Creates a new instance.<p> 132 * 133 * @param cms the CMS context 134 * @param content the XML content 135 * @param locale the locale in the XML content 136 */ 137 public GalleryNameChooser(CmsObject cms, A_CmsXmlDocument content, Locale locale) { 138 139 m_cms = cms; 140 m_content = content; 141 m_locale = locale; 142 } 143 144 /** 145 * Selects the description displayed in the gallery.<p> 146 * 147 * This method assumes that all the available values have been set via the setters of this class. 148 * 149 * @return the description 150 * 151 * @throws CmsException of something goes wrong 152 */ 153 public String getDescription() throws CmsException { 154 155 return getDescription(m_locale); 156 } 157 158 /** 159 * Selects the description displayed in the gallery.<p> 160 * 161 * This method assumes that all the available values have been set via the setters of this class. 162 * 163 * @param locale the locale to get the description in 164 * 165 * @return the description 166 * 167 * @throws CmsException of something goes wrong 168 */ 169 public String getDescription(Locale locale) throws CmsException { 170 171 String result = null; 172 for (String resultCandidateWithMacros : new String[] { 173 m_mappedGalleryDescriptionValue, 174 m_mappedDescriptionValue}) { 175 if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) { 176 CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, locale); 177 result = resolver.resolveMacros(resultCandidateWithMacros); 178 return result; 179 } 180 } 181 result = m_cms.readPropertyObject( 182 m_content.getFile(), 183 CmsPropertyDefinition.PROPERTY_DESCRIPTION, 184 false).getValue(); 185 return result; 186 } 187 188 /** 189 * Selects the gallery name.<p> 190 * 191 * This method assumes that all the available values have been set via the setters of this class. 192 * 193 * @return the gallery name 194 * 195 * @throws CmsException of something goes wrong 196 */ 197 public String getGalleryName() throws CmsException { 198 199 return getGalleryName(m_locale); 200 } 201 202 /** 203 * Selects the gallery name.<p> 204 * 205 * This method assumes that all the available values have been set via the setters of this class. 206 * 207 * @param locale the locale to get the gallery name in 208 * 209 * @return the gallery name 210 * 211 * @throws CmsException of something goes wrong 212 */ 213 public String getGalleryName(Locale locale) throws CmsException { 214 215 String result = null; 216 for (String resultCandidateWithMacros : new String[] { 217 // Prioritize gallery name over title, and actual content values over defaults 218 m_mappedGalleryNameValue, 219 m_defaultGalleryNameValue, 220 m_mappedTitleValue, 221 m_defaultTitleValue}) { 222 if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) { 223 CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, locale); 224 result = resolver.resolveMacros(resultCandidateWithMacros); 225 return result; 226 } 227 } 228 result = m_cms.readPropertyObject( 229 m_content.getFile(), 230 CmsPropertyDefinition.PROPERTY_TITLE, 231 false).getValue(); 232 return result; 233 } 234 235 /** 236 * Sets the defaultGalleryNameValue.<p> 237 * 238 * @param defaultGalleryNameValue the defaultGalleryNameValue to set 239 */ 240 public void setDefaultGalleryNameValue(String defaultGalleryNameValue) { 241 242 m_defaultGalleryNameValue = defaultGalleryNameValue; 243 } 244 245 /** 246 * Sets the defaultTitleValue.<p> 247 * 248 * @param defaultTitleValue the defaultTitleValue to set 249 */ 250 public void setDefaultTitleValue(String defaultTitleValue) { 251 252 m_defaultTitleValue = defaultTitleValue; 253 } 254 255 /** 256 * Sets the mapped description value.<p> 257 * 258 * @param mappedDescriptionValue the mappedDescriptionValue to set 259 */ 260 public void setMappedDescriptionValue(String mappedDescriptionValue) { 261 262 m_mappedDescriptionValue = mappedDescriptionValue; 263 } 264 265 /** 266 * Sets the name from a value mapped via 'galleryDescription'. 267 * 268 * @param mappedGalleryDescriptionValue the value that has been mapped 269 */ 270 public void setMappedGalleryDescriptionValue(String mappedGalleryDescriptionValue) { 271 272 m_mappedGalleryDescriptionValue = mappedGalleryDescriptionValue; 273 } 274 275 /** 276 * Sets the mappedGalleryNameValue.<p> 277 * 278 * @param mappedGalleryNameValue the mappedGalleryNameValue to set 279 */ 280 public void setMappedGalleryNameValue(String mappedGalleryNameValue) { 281 282 m_mappedGalleryNameValue = mappedGalleryNameValue; 283 } 284 285 /** 286 * Sets the mappedTitleValue.<p> 287 * 288 * @param mappedTitleValue the mappedTitleValue to set 289 */ 290 public void setMappedTitleValue(String mappedTitleValue) { 291 292 m_mappedTitleValue = mappedTitleValue; 293 } 294 } 295 296 /** Mapping name used to indicate that the value should be used for the gallery description. */ 297 public static final String MAPPING_GALLERY_DESCRIPTION = "galleryDescription"; 298 299 /** Mapping name used to indicate that the value should be used for the gallery name. */ 300 public static final String MAPPING_GALLERY_NAME = "galleryName"; 301 302 /** The log object for this class. */ 303 private static final Log LOG = CmsLog.getLog(CmsSolrDocumentXmlContent.class); 304 305 /** 306 * Public constructor.<p> 307 * 308 * @param name the name for the document type 309 */ 310 public CmsSolrDocumentXmlContent(String name) { 311 312 super(name); 313 } 314 315 /** 316 * Collects a list of all possible XPaths for a content definition.<p> 317 * 318 * @param cms the CMS context to use 319 * @param def the content definition 320 * @param path the path of the given content definition 321 * @param result the set used to collect the XPaths 322 */ 323 public static void collectSchemaXpathsForSimpleValues( 324 CmsObject cms, 325 CmsXmlContentDefinition def, 326 String path, 327 Set<String> result) { 328 329 List<I_CmsXmlSchemaType> nestedTypes = def.getTypeSequence(); 330 for (I_CmsXmlSchemaType nestedType : nestedTypes) { 331 String subPath = path + "/" + nestedType.getName(); 332 if (nestedType instanceof CmsXmlNestedContentDefinition) { 333 CmsXmlContentDefinition nestedDef = ((CmsXmlNestedContentDefinition)nestedType).getNestedContentDefinition(); 334 collectSchemaXpathsForSimpleValues(cms, nestedDef, subPath, result); 335 } else { 336 result.add(subPath); 337 } 338 } 339 } 340 341 /** 342 * Extracts the content of a single XML content resource.<p> 343 * 344 * @param cms the cms context 345 * @param resource the resource 346 * @param index the used index 347 * 348 * @return the extraction result 349 * 350 * @throws CmsException in case reading or unmarshalling the content fails 351 */ 352 public static CmsExtractionResult extractXmlContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index) 353 throws CmsException { 354 355 return extractXmlContent(cms, resource, index, null); 356 } 357 358 /** 359 * Extracts the content of a single XML content resource.<p> 360 * 361 * @param cms the cms context 362 * @param resource the resource 363 * @param index the used index 364 * @param forceLocale if set, only the content values for the given locale will be extracted 365 * 366 * @return the extraction result 367 * 368 * @throws CmsException in case reading or unmarshalling the content fails 369 */ 370 public static CmsExtractionResult extractXmlContent( 371 CmsObject cms, 372 CmsResource resource, 373 I_CmsSearchIndex index, 374 Locale forceLocale) 375 throws CmsException { 376 377 return extractXmlContent(cms, resource, index, forceLocale, null); 378 } 379 380 /** 381 * Extracts the content of a single XML content resource.<p> 382 * 383 * @param cms the cms context 384 * @param resource the resource 385 * @param index the used index 386 * @param forceLocale if set, only the content values for the given locale will be extracted 387 * 388 * @return the extraction result 389 * 390 * @throws CmsException in case reading or unmarshalling the content fails 391 */ 392 public static CmsExtractionResult extractXmlContent( 393 CmsObject cms, 394 CmsResource resource, 395 I_CmsSearchIndex index, 396 Locale forceLocale, 397 Set<CmsUUID> alreadyExtracted) 398 throws CmsException { 399 400 return extractXmlContent( 401 cms, 402 resource, 403 index, 404 forceLocale, 405 alreadyExtracted, 406 content -> {/*do nothing with the content*/}); 407 408 } 409 410 /** 411 * Extracts the content of a single XML content resource.<p> 412 * 413 * @param cms the cms context 414 * @param resource the resource 415 * @param index the used index 416 * @param forceLocale if set, only the content values for the given locale will be extracted 417 * @param alreadyExtracted keeps track of ids of contents which have already been extracted 418 * @param contentConsumer gets called with the unmarshalled content object 419 * 420 * @return the extraction result 421 * 422 * @throws CmsException in case reading or unmarshalling the content fails 423 */ 424 public static CmsExtractionResult extractXmlContent( 425 CmsObject cms, 426 CmsResource resource, 427 I_CmsSearchIndex index, 428 Locale forceLocale, 429 Set<CmsUUID> alreadyExtracted, 430 Consumer<A_CmsXmlDocument> contentConsumer) 431 throws CmsException { 432 433 if (null == alreadyExtracted) { 434 alreadyExtracted = Collections.emptySet(); 435 } 436 // un-marshal the content 437 CmsFile file = cms.readFile(resource); 438 if (file.getLength() <= 0) { 439 throw new CmsIndexNoContentException( 440 Messages.get().container(Messages.ERR_NO_CONTENT_1, resource.getRootPath())); 441 } 442 A_CmsXmlDocument xmlContent = CmsXmlContentFactory.unmarshal(cms, file); 443 if (contentConsumer != null) { 444 contentConsumer.accept(xmlContent); 445 } 446 447 // initialize some variables 448 Map<Locale, LinkedHashMap<String, String>> items = new HashMap<Locale, LinkedHashMap<String, String>>(); 449 Map<String, String> fieldMappings = new HashMap<String, String>(); 450 List<Locale> contentLocales = forceLocale != null 451 ? Collections.singletonList(forceLocale) 452 : xmlContent.getLocales(); 453 Locale resourceLocale = index.getLocaleForResource(cms, resource, contentLocales); 454 455 LinkedHashMap<String, String> localeItems = null; 456 GalleryNameChooser galleryNameChooser = null; 457 // loop over the locales of the content 458 for (Locale locale : contentLocales) { 459 galleryNameChooser = new GalleryNameChooser(cms, xmlContent, locale); 460 localeItems = new LinkedHashMap<String, String>(); 461 StringBuffer textContent = new StringBuffer(); 462 // store the locales of the content as space separated field 463 // loop over the available element paths of the current content locale 464 List<String> paths = xmlContent.getNames(locale); 465 for (String xpath : paths) { 466 467 // try to get the value extraction for the current element path 468 String extracted = null; 469 I_CmsXmlContentValue value = xmlContent.getValue(xpath, locale); 470 try { 471 //the new DatePointField.createField dose not support milliseconds 472 if (value instanceof CmsXmlDateTimeValue) { 473 extracted = CmsSearchUtil.getDateAsIso8601(((CmsXmlDateTimeValue)value).getDateTimeValue()); 474 } else { 475 extracted = value.getPlainText(cms); 476 if (CmsStringUtil.isEmptyOrWhitespaceOnly(extracted) 477 && value.isSimpleType() 478 && !(value instanceof CmsXmlHtmlValue)) { 479 // no text value for simple type, so take the string value as item 480 // prevent this for elements of type "OpenCmsHtml", since this causes problematic values 481 // being indexed, e.g., <iframe ...></iframe> 482 // TODO: Why is this special handling needed at all??? 483 extracted = value.getStringValue(cms); 484 } 485 } 486 } catch (Exception e) { 487 // it can happen that a exception is thrown while extracting a single value 488 LOG.warn(Messages.get().container(Messages.LOG_EXTRACT_VALUE_2, xpath, resource), e); 489 } 490 491 // put the extraction to the items and to the textual content 492 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { 493 localeItems.put(xpath, extracted); 494 } 495 switch (xmlContent.getHandler().getSearchContentType(value)) { 496 case TRUE: 497 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { 498 textContent.append(extracted); 499 textContent.append('\n'); 500 } 501 break; 502 case CONTENT: 503 // TODO: Potentially extend to allow for indexing of non-xml-contents as well. 504 String potentialLinkValue = value.getStringValue(cms); 505 try { 506 if ((null != potentialLinkValue) 507 && !potentialLinkValue.isEmpty() 508 && cms.existsResource(potentialLinkValue)) { 509 CmsResource linkedRes = cms.readResource(potentialLinkValue); 510 if (CmsResourceTypeXmlContent.isXmlContent(linkedRes) 511 && !alreadyExtracted.contains(linkedRes.getStructureId())) { 512 Set<CmsUUID> newAlreadyExtracted = new HashSet<>(alreadyExtracted); 513 newAlreadyExtracted.add(resource.getStructureId()); 514 I_CmsExtractionResult exRes = CmsSolrDocumentXmlContent.extractXmlContent( 515 cms, 516 linkedRes, 517 index, 518 locale, 519 newAlreadyExtracted); 520 String exContent = exRes.getContent(locale); 521 if ((exContent != null) && !exContent.trim().isEmpty()) { 522 textContent.append(exContent.trim()); 523 textContent.append('\n'); 524 break; // Success - we break here to not repeatedly programm a warning. 525 } 526 } 527 } 528 if (LOG.isInfoEnabled()) { 529 LOG.info( 530 "When indexing resource " 531 + resource.getRootPath() 532 + ", the elements value " 533 + value.getPath() 534 + " in locale " 535 + locale 536 + " does not contain a link to an XML content. Hence, the linked element's content is not added to the content indexed for the resource itself."); 537 } 538 } catch (Throwable t) { 539 LOG.error( 540 "Failed to add content of resource (site path) " 541 + potentialLinkValue 542 + " to content of resource (root path) " 543 + resource.getRootPath() 544 + " when indexing the resource for locale " 545 + locale 546 + ". Skipping this content part.", 547 t); 548 } 549 break; 550 default: 551 // we do not index the content element for the content field. 552 break; 553 } 554 555 List<String> mappings = xmlContent.getHandler().getMappings(value.getPath()); 556 if (mappings.size() > 0) { 557 // mappings are defined, lets check if we have mappings that interest us 558 for (String mapping : mappings) { 559 if (mapping.startsWith(I_CmsXmlContentHandler.MAPTO_PROPERTY)) { 560 // this is a property mapping 561 String propertyName = mapping.substring(mapping.lastIndexOf(':') + 1); 562 if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName) 563 || CmsPropertyDefinition.PROPERTY_DESCRIPTION.equals(propertyName)) { 564 565 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { 566 if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)) { 567 galleryNameChooser.setMappedTitleValue(extracted); 568 } else { 569 // if field is not title, it must be description 570 galleryNameChooser.setMappedDescriptionValue(extracted); 571 } 572 } 573 } 574 } else if (mapping.equals(MAPPING_GALLERY_NAME)) { 575 galleryNameChooser.setMappedGalleryNameValue(value.getPlainText(cms)); 576 } else if (mapping.equals(MAPPING_GALLERY_DESCRIPTION)) { 577 galleryNameChooser.setMappedGalleryDescriptionValue(value.getPlainText(cms)); 578 } 579 } 580 } 581 if (value instanceof CmsXmlSerialDateValue) { 582 if ((null != extracted) && !extracted.isEmpty()) { 583 I_CmsSerialDateValue serialDateValue = new CmsSerialDateValue(extracted); 584 I_CmsSerialDateBean serialDateBean = CmsSerialDateBeanFactory.createSerialDateBean( 585 serialDateValue); 586 if (null != serialDateBean) { 587 StringBuffer values = new StringBuffer(); 588 StringBuffer endValues = new StringBuffer(); 589 StringBuffer currentTillValues = new StringBuffer(); 590 for (Long eventDate : serialDateBean.getDatesAsLong()) { 591 values.append("\n").append(eventDate.toString()); 592 long endDate = null != serialDateBean.getEventDuration() 593 ? eventDate.longValue() + serialDateBean.getEventDuration().longValue() 594 : eventDate.longValue(); 595 endValues.append("\n").append(Long.toString(endDate)); 596 // Special treatment for events that end at 00:00: 597 // To not show them at the day after they ended, one millisecond is removed from the end time 598 // for the "currenttill"-time 599 currentTillValues.append("\n").append( 600 serialDateValue.isCurrentTillEnd() 601 ? Long.valueOf( 602 serialDateValue.endsAtMidNight() && (endDate > eventDate.longValue()) 603 ? endDate - 1L 604 : endDate) 605 : eventDate); 606 } 607 fieldMappings.put(CmsSearchField.FIELD_SERIESDATES, values.substring(1)); 608 fieldMappings.put(CmsSearchField.FIELD_SERIESDATES_END, endValues.substring(1)); 609 fieldMappings.put( 610 CmsSearchField.FIELD_SERIESDATES_CURRENT_TILL, 611 currentTillValues.substring(1)); 612 fieldMappings.put( 613 CmsSearchField.FIELD_SERIESDATES_TYPE, 614 serialDateValue.getDateType().toString()); 615 } else { 616 LOG.warn( 617 "Serial date value \"" 618 + value.getStringValue(cms) 619 + "\" at element \"" 620 + value.getPath() 621 + "\" is invalid. No dates are indexed for resource \"" 622 + resource.getRootPath() 623 + "\"."); 624 } 625 } 626 } 627 } 628 629 Set<String> xpaths = Sets.newHashSet(); 630 collectSchemaXpathsForSimpleValues(cms, xmlContent.getContentDefinition(), "", xpaths); 631 for (String xpath : xpaths) { 632 // mappings always are stored with indexes, so we add them to the xpath 633 List<String> mappings = xmlContent.getHandler().getMappings(CmsXmlUtils.createXpath(xpath, 1)); 634 for (String mapping : mappings) { 635 636 if (mapping.equals(MAPPING_GALLERY_NAME) 637 || mapping.equals( 638 I_CmsXmlContentHandler.MAPTO_PROPERTY + CmsPropertyDefinition.PROPERTY_TITLE)) { 639 String defaultValue = xmlContent.getHandler().getDefault( 640 cms, 641 xmlContent.getFile(), 642 null, 643 xpath, 644 locale); 645 if (mapping.equals(MAPPING_GALLERY_NAME)) { 646 galleryNameChooser.setDefaultGalleryNameValue(defaultValue); 647 } else { 648 galleryNameChooser.setDefaultTitleValue(defaultValue); 649 } 650 } 651 } 652 } 653 654 final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName( 655 CmsSearchField.FIELD_TITLE_UNSTORED, 656 locale) + "_s"; 657 final String galleryNameValue = galleryNameChooser.getGalleryName(); 658 fieldMappings.put(galleryTitleFieldKey, galleryNameValue); 659 fieldMappings.put( 660 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_DESCRIPTION, locale) + "_s", 661 galleryNameChooser.getDescription()); 662 663 // handle the textual content 664 if (textContent.length() > 0) { 665 // add the textual content with a localized key to the items 666 //String key = CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale); 667 //items.put(key, textContent.toString()); 668 // use the default locale of this resource as general text content for the extraction result 669 localeItems.put(I_CmsExtractionResult.ITEM_CONTENT, textContent.toString()); 670 } 671 items.put(locale, localeItems); 672 } 673 // if the content is locale independent, it should have only one content locale, but that should be indexed for all available locales. 674 // TODO: One could think of different indexing behavior, i.e., index only for getDefaultLocales(cms,resource) 675 // But using getAvailableLocales(cms,resource) does not work, because locale-available is set to "en" for all that content. 676 if ((xmlContent instanceof CmsXmlContent) && ((CmsXmlContent)xmlContent).isLocaleIndependent()) { 677 if (forceLocale != null) { 678 items.put(forceLocale, localeItems); 679 } else { 680 for (Locale l : OpenCms.getLocaleManager().getAvailableLocales()) { 681 items.put(l, localeItems); 682 if (null != galleryNameChooser) { 683 final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName( 684 CmsSearchField.FIELD_TITLE_UNSTORED, 685 l) + "_s"; 686 fieldMappings.put(galleryTitleFieldKey, galleryNameChooser.getGalleryName(l)); 687 fieldMappings.put( 688 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_DESCRIPTION, l) 689 + "_s", 690 galleryNameChooser.getDescription(l)); 691 } 692 } 693 } 694 } 695 // add the locales that have been indexed for this document as item and return the extraction result 696 // fieldMappings.put(CmsSearchField.FIELD_RESOURCE_LOCALES, locales.toString().trim()); 697 return new CmsExtractionResult(resourceLocale, items, fieldMappings); 698 699 } 700 701 /** 702 * @see org.opencms.search.documents.CmsDocumentXmlContent#extractContent(org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.I_CmsSearchIndex) 703 */ 704 @Override 705 public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index) 706 throws CmsException { 707 708 logContentExtraction(resource, index); 709 710 try { 711 I_CmsExtractionResult result = null; 712 List<I_CmsExtractionResult> ex = new ArrayList<I_CmsExtractionResult>(); 713 for (CmsResource detailContainers : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) { 714 CmsSolrDocumentContainerPage containerpageExtractor = new CmsSolrDocumentContainerPage(""); 715 String localeTemp = detailContainers.getRootPath(); 716 localeTemp = CmsResource.getParentFolder(localeTemp); 717 localeTemp = CmsResource.getName(localeTemp); 718 localeTemp = localeTemp.substring(0, localeTemp.length() - 1); 719 Locale locale = CmsLocaleManager.getLocale(localeTemp); 720 if (CmsDetailOnlyContainerUtil.useSingleLocaleDetailContainers( 721 OpenCms.getSiteManager().getSiteRoot(resource.getRootPath())) 722 && locale.equals(CmsLocaleManager.getDefaultLocale())) { 723 // in case of single locale detail containers do not force the locale 724 locale = null; 725 } 726 I_CmsExtractionResult containersExtractionResult = containerpageExtractor.extractContent( 727 cms, 728 detailContainers, 729 index, 730 locale); 731 // only use the locales of the resource itself, not the ones of the detail containers page 732 containersExtractionResult.getContentItems().remove(CmsSearchField.FIELD_RESOURCE_LOCALES); 733 734 ex.add(containersExtractionResult); 735 } 736 result = extractXmlContent(cms, resource, index); 737 result = result.merge(ex); 738 return result; 739 740 } catch (Throwable t) { 741 throw new CmsIndexException(Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource), t); 742 } 743 } 744 745 /** 746 * Solr index content is stored in multiple languages, so the result is NOT locale dependent.<p> 747 * 748 * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend() 749 */ 750 public boolean isLocaleDependend() { 751 752 return false; 753 } 754 755 /** 756 * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache() 757 */ 758 public boolean isUsingCache() { 759 760 return true; 761 } 762}