001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.solr; 033 034import org.opencms.acacia.shared.I_CmsSerialDateValue; 035import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 036import org.opencms.file.CmsFile; 037import org.opencms.file.CmsObject; 038import org.opencms.file.CmsPropertyDefinition; 039import org.opencms.file.CmsResource; 040import org.opencms.file.types.CmsResourceTypeXmlContent; 041import org.opencms.i18n.CmsLocaleManager; 042import org.opencms.main.CmsException; 043import org.opencms.main.CmsLog; 044import org.opencms.main.OpenCms; 045import org.opencms.search.CmsIndexException; 046import org.opencms.search.CmsSearchUtil; 047import org.opencms.search.I_CmsSearchIndex; 048import org.opencms.search.documents.A_CmsVfsDocument; 049import org.opencms.search.documents.CmsIndexNoContentException; 050import org.opencms.search.documents.Messages; 051import org.opencms.search.extractors.CmsExtractionResult; 052import org.opencms.search.extractors.I_CmsExtractionResult; 053import org.opencms.search.fields.CmsSearchField; 054import org.opencms.search.fields.CmsSearchFieldConfiguration; 055import org.opencms.search.galleries.CmsGalleryNameMacroResolver; 056import org.opencms.util.CmsStringUtil; 057import org.opencms.util.CmsUUID; 058import org.opencms.widgets.serialdate.CmsSerialDateBeanFactory; 059import org.opencms.widgets.serialdate.CmsSerialDateValue; 060import org.opencms.widgets.serialdate.I_CmsSerialDateBean; 061import org.opencms.xml.A_CmsXmlDocument; 062import org.opencms.xml.CmsXmlContentDefinition; 063import org.opencms.xml.CmsXmlUtils; 064import org.opencms.xml.content.CmsXmlContent; 065import org.opencms.xml.content.CmsXmlContentFactory; 066import org.opencms.xml.content.I_CmsContentValueAdjustment; 067import org.opencms.xml.content.I_CmsXmlContentHandler; 068import org.opencms.xml.types.CmsXmlDateTimeValue; 069import org.opencms.xml.types.CmsXmlHtmlValue; 070import org.opencms.xml.types.CmsXmlNestedContentDefinition; 071import org.opencms.xml.types.CmsXmlSerialDateValue; 072import org.opencms.xml.types.I_CmsXmlContentValue; 073import org.opencms.xml.types.I_CmsXmlContentValue.CmsSearchContentConfig; 074import org.opencms.xml.types.I_CmsXmlSchemaType; 075 076import java.util.ArrayList; 077import java.util.Collections; 078import java.util.HashMap; 079import java.util.HashSet; 080import java.util.LinkedHashMap; 081import java.util.List; 082import java.util.Locale; 083import java.util.Map; 084import java.util.Set; 085import java.util.function.Consumer; 086 087import org.apache.commons.logging.Log; 088 089import com.google.common.collect.Sets; 090 091/** 092 * Special document text extraction factory for Solr index.<p> 093 * 094 * @since 8.5.0 095 */ 096public class CmsSolrDocumentXmlContent extends A_CmsVfsDocument { 097 098 /** 099 * The gallery name is determined by resolving the macros in a string which can either come from a field mapped 100 * to the gallery name, or the title, or from default values for those fields. This class is used to select the 101 * value to use and performs the macro substitution. 102 */ 103 private static class GalleryNameChooser { 104 105 /** CMS context for this instance. */ 106 private CmsObject m_cms; 107 108 /** Current XML content. */ 109 private A_CmsXmlDocument m_content; 110 111 /** Default value of field mapped to gallery name. */ 112 private String m_defaultGalleryNameValue; 113 114 /** Default value of field mapped to title. */ 115 private String m_defaultTitleValue; 116 117 /** Current locale. */ 118 private Locale m_locale; 119 120 /** Content value mapped to Description property. */ 121 private String m_mappedDescriptionValue; 122 123 /** Content value mapped to gallery description. */ 124 private String m_mappedGalleryDescriptionValue; 125 126 /** Content value mapped to gallery name. */ 127 private String m_mappedGalleryNameValue; 128 129 /** Content value mapped to title. */ 130 private String m_mappedTitleValue; 131 132 /** 133 * Creates a new instance.<p> 134 * 135 * @param cms the CMS context 136 * @param content the XML content 137 * @param locale the locale in the XML content 138 */ 139 public GalleryNameChooser(CmsObject cms, A_CmsXmlDocument content, Locale locale) { 140 141 m_cms = cms; 142 m_content = content; 143 m_locale = locale; 144 } 145 146 /** 147 * Selects the description displayed in the gallery.<p> 148 * 149 * This method assumes that all the available values have been set via the setters of this class. 150 * 151 * @return the description 152 * 153 * @throws CmsException of something goes wrong 154 */ 155 public String getDescription() throws CmsException { 156 157 return getDescription(m_locale); 158 } 159 160 /** 161 * Selects the description displayed in the gallery.<p> 162 * 163 * This method assumes that all the available values have been set via the setters of this class. 164 * 165 * @param locale the locale to get the description in 166 * 167 * @return the description 168 * 169 * @throws CmsException of something goes wrong 170 */ 171 public String getDescription(Locale locale) throws CmsException { 172 173 String result = null; 174 for (String resultCandidateWithMacros : new String[] { 175 m_mappedGalleryDescriptionValue, 176 m_mappedDescriptionValue}) { 177 if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) { 178 CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, locale); 179 result = resolver.resolveMacros(resultCandidateWithMacros); 180 return result; 181 } 182 } 183 result = m_cms.readPropertyObject( 184 m_content.getFile(), 185 CmsPropertyDefinition.PROPERTY_DESCRIPTION, 186 false).getValue(); 187 return result; 188 } 189 190 /** 191 * Selects the gallery name.<p> 192 * 193 * This method assumes that all the available values have been set via the setters of this class. 194 * 195 * @return the gallery name 196 * 197 * @throws CmsException of something goes wrong 198 */ 199 public String getGalleryName() throws CmsException { 200 201 return getGalleryName(m_locale); 202 } 203 204 /** 205 * Selects the gallery name.<p> 206 * 207 * This method assumes that all the available values have been set via the setters of this class. 208 * 209 * @param locale the locale to get the gallery name in 210 * 211 * @return the gallery name 212 * 213 * @throws CmsException of something goes wrong 214 */ 215 public String getGalleryName(Locale locale) throws CmsException { 216 217 String result = null; 218 for (String resultCandidateWithMacros : new String[] { 219 // Prioritize gallery name over title, and actual content values over defaults 220 m_mappedGalleryNameValue, 221 m_defaultGalleryNameValue, 222 m_mappedTitleValue, 223 m_defaultTitleValue}) { 224 if (!CmsStringUtil.isEmptyOrWhitespaceOnly(resultCandidateWithMacros)) { 225 CmsGalleryNameMacroResolver resolver = new CmsGalleryNameMacroResolver(m_cms, m_content, locale); 226 result = resolver.resolveMacros(resultCandidateWithMacros); 227 return result; 228 } 229 } 230 result = m_cms.readPropertyObject( 231 m_content.getFile(), 232 CmsPropertyDefinition.PROPERTY_TITLE, 233 false).getValue(); 234 return result; 235 } 236 237 /** 238 * Sets the defaultGalleryNameValue.<p> 239 * 240 * @param defaultGalleryNameValue the defaultGalleryNameValue to set 241 */ 242 public void setDefaultGalleryNameValue(String defaultGalleryNameValue) { 243 244 m_defaultGalleryNameValue = defaultGalleryNameValue; 245 } 246 247 /** 248 * Sets the defaultTitleValue.<p> 249 * 250 * @param defaultTitleValue the defaultTitleValue to set 251 */ 252 public void setDefaultTitleValue(String defaultTitleValue) { 253 254 m_defaultTitleValue = defaultTitleValue; 255 } 256 257 /** 258 * Sets the mapped description value.<p> 259 * 260 * @param mappedDescriptionValue the mappedDescriptionValue to set 261 */ 262 public void setMappedDescriptionValue(String mappedDescriptionValue) { 263 264 m_mappedDescriptionValue = mappedDescriptionValue; 265 } 266 267 /** 268 * Sets the name from a value mapped via 'galleryDescription'. 269 * 270 * @param mappedGalleryDescriptionValue the value that has been mapped 271 */ 272 public void setMappedGalleryDescriptionValue(String mappedGalleryDescriptionValue) { 273 274 m_mappedGalleryDescriptionValue = mappedGalleryDescriptionValue; 275 } 276 277 /** 278 * Sets the mappedGalleryNameValue.<p> 279 * 280 * @param mappedGalleryNameValue the mappedGalleryNameValue to set 281 */ 282 public void setMappedGalleryNameValue(String mappedGalleryNameValue) { 283 284 m_mappedGalleryNameValue = mappedGalleryNameValue; 285 } 286 287 /** 288 * Sets the mappedTitleValue.<p> 289 * 290 * @param mappedTitleValue the mappedTitleValue to set 291 */ 292 public void setMappedTitleValue(String mappedTitleValue) { 293 294 m_mappedTitleValue = mappedTitleValue; 295 } 296 } 297 298 /** Mapping name used to indicate that the value should be used for the gallery description. */ 299 public static final String MAPPING_GALLERY_DESCRIPTION = "galleryDescription"; 300 301 /** Mapping name used to indicate that the value should be used for the gallery name. */ 302 public static final String MAPPING_GALLERY_NAME = "galleryName"; 303 304 /** The log object for this class. */ 305 private static final Log LOG = CmsLog.getLog(CmsSolrDocumentXmlContent.class); 306 307 /** 308 * Public constructor.<p> 309 * 310 * @param name the name for the document type 311 */ 312 public CmsSolrDocumentXmlContent(String name) { 313 314 super(name); 315 } 316 317 /** 318 * Collects a list of all possible XPaths for a content definition.<p> 319 * 320 * @param cms the CMS context to use 321 * @param def the content definition 322 * @param path the path of the given content definition 323 * @param result the set used to collect the XPaths 324 */ 325 public static void collectSchemaXpathsForSimpleValues( 326 CmsObject cms, 327 CmsXmlContentDefinition def, 328 String path, 329 Set<String> result) { 330 331 List<I_CmsXmlSchemaType> nestedTypes = def.getTypeSequence(); 332 for (I_CmsXmlSchemaType nestedType : nestedTypes) { 333 String subPath = path + "/" + nestedType.getName(); 334 if (nestedType instanceof CmsXmlNestedContentDefinition) { 335 CmsXmlContentDefinition nestedDef = ((CmsXmlNestedContentDefinition)nestedType).getNestedContentDefinition(); 336 collectSchemaXpathsForSimpleValues(cms, nestedDef, subPath, result); 337 } else { 338 result.add(subPath); 339 } 340 } 341 } 342 343 /** 344 * Extracts the content of a single XML content resource.<p> 345 * 346 * @param cms the cms context 347 * @param resource the resource 348 * @param index the used index 349 * 350 * @return the extraction result 351 * 352 * @throws CmsException in case reading or unmarshalling the content fails 353 */ 354 public static CmsExtractionResult extractXmlContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index) 355 throws CmsException { 356 357 return extractXmlContent(cms, resource, index, null); 358 } 359 360 /** 361 * Extracts the content of a single XML content resource.<p> 362 * 363 * @param cms the cms context 364 * @param resource the resource 365 * @param index the used index 366 * @param forceLocale if set, only the content values for the given locale will be extracted 367 * 368 * @return the extraction result 369 * 370 * @throws CmsException in case reading or unmarshalling the content fails 371 */ 372 public static CmsExtractionResult extractXmlContent( 373 CmsObject cms, 374 CmsResource resource, 375 I_CmsSearchIndex index, 376 Locale forceLocale) 377 throws CmsException { 378 379 return extractXmlContent(cms, resource, index, forceLocale, null); 380 } 381 382 /** 383 * Extracts the content of a single XML content resource.<p> 384 * 385 * @param cms the cms context 386 * @param resource the resource 387 * @param index the used index 388 * @param forceLocale if set, only the content values for the given locale will be extracted 389 * @param alreadyExtracted keeps track of ids of contents which have already been extracted 390 * 391 * @return the extraction result 392 * 393 * @throws CmsException in case reading or unmarshalling the content fails 394 */ 395 public static CmsExtractionResult extractXmlContent( 396 CmsObject cms, 397 CmsResource resource, 398 I_CmsSearchIndex index, 399 Locale forceLocale, 400 Set<CmsUUID> alreadyExtracted) 401 throws CmsException { 402 403 return extractXmlContent(cms, resource, index, forceLocale, alreadyExtracted, content -> { 404 /*do nothing with the content*/}); 405 406 } 407 408 /** 409 * Extracts the content of a single XML content resource.<p> 410 * 411 * @param cms the cms context 412 * @param resource the resource 413 * @param index the used index 414 * @param forceLocale if set, only the content values for the given locale will be extracted 415 * @param alreadyExtracted keeps track of ids of contents which have already been extracted 416 * @param contentConsumer gets called with the unmarshalled content object 417 * 418 * @return the extraction result 419 * 420 * @throws CmsException in case reading or unmarshalling the content fails 421 */ 422 public static CmsExtractionResult extractXmlContent( 423 CmsObject cms, 424 CmsResource resource, 425 I_CmsSearchIndex index, 426 Locale forceLocale, 427 Set<CmsUUID> alreadyExtracted, 428 Consumer<A_CmsXmlDocument> contentConsumer) 429 throws CmsException { 430 431 if (null == alreadyExtracted) { 432 alreadyExtracted = Collections.emptySet(); 433 } 434 // un-marshal the content 435 CmsFile file = cms.readFile(resource); 436 if (file.getLength() <= 0) { 437 throw new CmsIndexNoContentException( 438 Messages.get().container(Messages.ERR_NO_CONTENT_1, resource.getRootPath())); 439 } 440 A_CmsXmlDocument xmlContent = CmsXmlContentFactory.unmarshal(cms, file); 441 if (contentConsumer != null) { 442 contentConsumer.accept(xmlContent); 443 } 444 445 // initialize some variables 446 Map<Locale, LinkedHashMap<String, String>> items = new HashMap<Locale, LinkedHashMap<String, String>>(); 447 Map<String, String> fieldMappings = new HashMap<String, String>(); 448 List<Locale> contentLocales = forceLocale != null 449 ? Collections.singletonList(forceLocale) 450 : xmlContent.getLocales(); 451 Locale resourceLocale = index.getLocaleForResource(cms, resource, contentLocales); 452 453 LinkedHashMap<String, String> localeItems = null; 454 GalleryNameChooser galleryNameChooser = null; 455 // loop over the locales of the content 456 for (Locale locale : contentLocales) { 457 galleryNameChooser = new GalleryNameChooser(cms, xmlContent, locale); 458 localeItems = new LinkedHashMap<String, String>(); 459 StringBuffer textContent = new StringBuffer(); 460 // store the locales of the content as space separated field 461 // loop over the available element paths of the current content locale 462 List<String> paths = xmlContent.getNames(locale); 463 for (String xpath : paths) { 464 465 // try to get the value extraction for the current element path 466 String extracted = null; 467 I_CmsXmlContentValue value = xmlContent.getValue(xpath, locale); 468 try { 469 //the new DatePointField.createField dose not support milliseconds 470 if (value instanceof CmsXmlDateTimeValue) { 471 extracted = CmsSearchUtil.getDateAsIso8601(((CmsXmlDateTimeValue)value).getDateTimeValue()); 472 } else { 473 extracted = value.getPlainText(cms); 474 if (CmsStringUtil.isEmptyOrWhitespaceOnly(extracted) 475 && value.isSimpleType() 476 && !(value instanceof CmsXmlHtmlValue)) { 477 // no text value for simple type, so take the string value as item 478 // prevent this for elements of type "OpenCmsHtml", since this causes problematic values 479 // being indexed, e.g., <iframe ...></iframe> 480 // TODO: Why is this special handling needed at all??? 481 extracted = value.getStringValue(cms); 482 } 483 } 484 } catch (Exception e) { 485 // it can happen that a exception is thrown while extracting a single value 486 LOG.warn(Messages.get().container(Messages.LOG_EXTRACT_VALUE_2, xpath, resource), e); 487 } 488 489 // put the extraction to the items and to the textual content 490 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { 491 localeItems.put(xpath, extracted); 492 } 493 CmsSearchContentConfig searchContentConfig = xmlContent.getHandler().getSearchContentConfig(value); 494 switch (searchContentConfig.getSearchContentType()) { 495 case TRUE: 496 if (null != searchContentConfig.getAdjustmentClass()) { 497 Class<I_CmsContentValueAdjustment> adjustmentClass; 498 try { 499 //We cast by purpose and catch the exception if we fail. 500 adjustmentClass = (Class<I_CmsContentValueAdjustment>)Class.forName( 501 searchContentConfig.getAdjustmentClass()); 502 I_CmsContentValueAdjustment adjustment = adjustmentClass.getConstructor().newInstance(); 503 String adjustedValue = adjustment.getAdjustedValue( 504 cms, 505 xmlContent, 506 locale, 507 xpath, 508 extracted); 509 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(adjustedValue)) { 510 textContent.append(adjustedValue); 511 textContent.append('\n'); 512 } 513 break; 514 } catch (Throwable t) { 515 String logMessage = "Cannot adjust value via configured class in searchsetting for \"" 516 + value.getPath() 517 + "\" in content \"" 518 + resource.getRootPath() 519 + "\". Using the unadjusted value."; 520 if (LOG.isDebugEnabled()) { 521 LOG.debug(logMessage, t); 522 } else { 523 LOG.error(logMessage); 524 } 525 } 526 } 527 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { 528 textContent.append(extracted); 529 textContent.append('\n'); 530 } 531 break; 532 case CONTENT: 533 // TODO: Potentially extend to allow for indexing of non-xml-contents as well. 534 String potentialLinkValue = value.getStringValue(cms); 535 try { 536 if ((null != potentialLinkValue) 537 && !potentialLinkValue.isEmpty() 538 && cms.existsResource(potentialLinkValue)) { 539 CmsResource linkedRes = cms.readResource(potentialLinkValue); 540 if (CmsResourceTypeXmlContent.isXmlContent(linkedRes) 541 && !alreadyExtracted.contains(linkedRes.getStructureId())) { 542 Set<CmsUUID> newAlreadyExtracted = new HashSet<>(alreadyExtracted); 543 newAlreadyExtracted.add(resource.getStructureId()); 544 I_CmsExtractionResult exRes = CmsSolrDocumentXmlContent.extractXmlContent( 545 cms, 546 linkedRes, 547 index, 548 locale, 549 newAlreadyExtracted); 550 String exContent = exRes.getContent(locale); 551 if ((exContent != null) && !exContent.trim().isEmpty()) { 552 textContent.append(exContent.trim()); 553 textContent.append('\n'); 554 break; // Success - we break here to not repeatedly programm a warning. 555 } 556 } 557 } 558 if (LOG.isInfoEnabled()) { 559 LOG.info( 560 "When indexing resource " 561 + resource.getRootPath() 562 + ", the elements value " 563 + value.getPath() 564 + " in locale " 565 + locale 566 + " does not contain a link to an XML content. Hence, the linked element's content is not added to the content indexed for the resource itself."); 567 } 568 } catch (Throwable t) { 569 LOG.error( 570 "Failed to add content of resource (site path) " 571 + potentialLinkValue 572 + " to content of resource (root path) " 573 + resource.getRootPath() 574 + " when indexing the resource for locale " 575 + locale 576 + ". Skipping this content part.", 577 t); 578 } 579 break; 580 default: 581 // we do not index the content element for the content field. 582 break; 583 } 584 585 List<String> mappings = xmlContent.getHandler().getMappings(value.getPath()); 586 if (mappings.size() > 0) { 587 // mappings are defined, lets check if we have mappings that interest us 588 for (String mapping : mappings) { 589 if (mapping.startsWith(I_CmsXmlContentHandler.MAPTO_PROPERTY)) { 590 // this is a property mapping 591 String propertyName = mapping.substring(mapping.lastIndexOf(':') + 1); 592 if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName) 593 || CmsPropertyDefinition.PROPERTY_DESCRIPTION.equals(propertyName)) { 594 595 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(extracted)) { 596 if (CmsPropertyDefinition.PROPERTY_TITLE.equals(propertyName)) { 597 galleryNameChooser.setMappedTitleValue(extracted); 598 } else { 599 // if field is not title, it must be description 600 galleryNameChooser.setMappedDescriptionValue(extracted); 601 } 602 } 603 } 604 } else if (mapping.equals(MAPPING_GALLERY_NAME)) { 605 galleryNameChooser.setMappedGalleryNameValue(value.getPlainText(cms)); 606 } else if (mapping.equals(MAPPING_GALLERY_DESCRIPTION)) { 607 galleryNameChooser.setMappedGalleryDescriptionValue(value.getPlainText(cms)); 608 } 609 } 610 } 611 if (value instanceof CmsXmlSerialDateValue) { 612 if ((null != extracted) && !extracted.isEmpty()) { 613 I_CmsSerialDateValue serialDateValue = new CmsSerialDateValue(extracted); 614 I_CmsSerialDateBean serialDateBean = CmsSerialDateBeanFactory.createSerialDateBean( 615 serialDateValue); 616 if (null != serialDateBean) { 617 StringBuffer values = new StringBuffer(); 618 StringBuffer endValues = new StringBuffer(); 619 StringBuffer currentTillValues = new StringBuffer(); 620 for (Long eventDate : serialDateBean.getDatesAsLong()) { 621 values.append("\n").append(eventDate.toString()); 622 long endDate = null != serialDateBean.getEventDuration() 623 ? eventDate.longValue() + serialDateBean.getEventDuration().longValue() 624 : eventDate.longValue(); 625 endValues.append("\n").append(Long.toString(endDate)); 626 // Special treatment for events that end at 00:00: 627 // To not show them at the day after they ended, one millisecond is removed from the end time 628 // for the "currenttill"-time 629 currentTillValues.append("\n").append( 630 serialDateValue.isCurrentTillEnd() 631 ? Long.valueOf( 632 serialDateValue.endsAtMidNight() && (endDate > eventDate.longValue()) 633 ? endDate - 1L 634 : endDate) 635 : eventDate); 636 } 637 fieldMappings.put(CmsSearchField.FIELD_SERIESDATES, values.substring(1)); 638 fieldMappings.put(CmsSearchField.FIELD_SERIESDATES_END, endValues.substring(1)); 639 fieldMappings.put( 640 CmsSearchField.FIELD_SERIESDATES_CURRENT_TILL, 641 currentTillValues.substring(1)); 642 fieldMappings.put( 643 CmsSearchField.FIELD_SERIESDATES_TYPE, 644 serialDateValue.getDateType().toString()); 645 } else { 646 LOG.warn( 647 "Serial date value \"" 648 + value.getStringValue(cms) 649 + "\" at element \"" 650 + value.getPath() 651 + "\" is invalid. No dates are indexed for resource \"" 652 + resource.getRootPath() 653 + "\"."); 654 } 655 } 656 } 657 } 658 659 Set<String> xpaths = Sets.newHashSet(); 660 collectSchemaXpathsForSimpleValues(cms, xmlContent.getContentDefinition(), "", xpaths); 661 for (String xpath : xpaths) { 662 // mappings always are stored with indexes, so we add them to the xpath 663 List<String> mappings = xmlContent.getHandler().getMappings(CmsXmlUtils.createXpath(xpath, 1)); 664 for (String mapping : mappings) { 665 666 if (mapping.equals(MAPPING_GALLERY_NAME) 667 || mapping.equals( 668 I_CmsXmlContentHandler.MAPTO_PROPERTY + CmsPropertyDefinition.PROPERTY_TITLE)) { 669 String defaultValue = xmlContent.getHandler().getDefault( 670 cms, 671 xmlContent.getFile(), 672 null, 673 xpath, 674 locale); 675 if (mapping.equals(MAPPING_GALLERY_NAME)) { 676 galleryNameChooser.setDefaultGalleryNameValue(defaultValue); 677 } else { 678 galleryNameChooser.setDefaultTitleValue(defaultValue); 679 } 680 } 681 } 682 } 683 684 final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName( 685 CmsSearchField.FIELD_TITLE_UNSTORED, 686 locale) + "_s"; 687 final String galleryNameValue = galleryNameChooser.getGalleryName(); 688 fieldMappings.put(galleryTitleFieldKey, galleryNameValue); 689 fieldMappings.put( 690 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_DESCRIPTION, locale) + "_s", 691 galleryNameChooser.getDescription()); 692 693 // handle the textual content 694 if (textContent.length() > 0) { 695 // add the textual content with a localized key to the items 696 //String key = CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale); 697 //items.put(key, textContent.toString()); 698 // use the default locale of this resource as general text content for the extraction result 699 localeItems.put(I_CmsExtractionResult.ITEM_CONTENT, textContent.toString()); 700 } 701 items.put(locale, localeItems); 702 } 703 // if the content is locale independent, it should have only one content locale, but that should be indexed for all available locales. 704 // TODO: One could think of different indexing behavior, i.e., index only for getDefaultLocales(cms,resource) 705 // But using getAvailableLocales(cms,resource) does not work, because locale-available is set to "en" for all that content. 706 if ((xmlContent instanceof CmsXmlContent) && ((CmsXmlContent)xmlContent).isLocaleIndependent()) { 707 if (forceLocale != null) { 708 items.put(forceLocale, localeItems); 709 } else { 710 for (Locale l : OpenCms.getLocaleManager().getAvailableLocales()) { 711 items.put(l, localeItems); 712 if (null != galleryNameChooser) { 713 final String galleryTitleFieldKey = CmsSearchFieldConfiguration.getLocaleExtendedName( 714 CmsSearchField.FIELD_TITLE_UNSTORED, 715 l) + "_s"; 716 fieldMappings.put(galleryTitleFieldKey, galleryNameChooser.getGalleryName(l)); 717 fieldMappings.put( 718 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_DESCRIPTION, l) 719 + "_s", 720 galleryNameChooser.getDescription(l)); 721 } 722 } 723 } 724 } 725 // add the locales that have been indexed for this document as item and return the extraction result 726 // fieldMappings.put(CmsSearchField.FIELD_RESOURCE_LOCALES, locales.toString().trim()); 727 return new CmsExtractionResult(resourceLocale, items, fieldMappings); 728 729 } 730 731 /** 732 * @see org.opencms.search.documents.CmsDocumentXmlContent#extractContent(org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.I_CmsSearchIndex) 733 */ 734 @Override 735 public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, I_CmsSearchIndex index) 736 throws CmsException { 737 738 logContentExtraction(resource, index); 739 740 try { 741 I_CmsExtractionResult result = null; 742 List<I_CmsExtractionResult> ex = new ArrayList<I_CmsExtractionResult>(); 743 for (CmsResource detailContainers : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) { 744 CmsSolrDocumentContainerPage containerpageExtractor = new CmsSolrDocumentContainerPage(""); 745 String localeTemp = detailContainers.getRootPath(); 746 localeTemp = CmsResource.getParentFolder(localeTemp); 747 localeTemp = CmsResource.getName(localeTemp); 748 localeTemp = localeTemp.substring(0, localeTemp.length() - 1); 749 Locale locale = CmsLocaleManager.getLocale(localeTemp); 750 if (CmsDetailOnlyContainerUtil.useSingleLocaleDetailContainers( 751 OpenCms.getSiteManager().getSiteRoot(resource.getRootPath())) 752 && locale.equals(CmsLocaleManager.getDefaultLocale())) { 753 // in case of single locale detail containers do not force the locale 754 locale = null; 755 } 756 I_CmsExtractionResult containersExtractionResult = containerpageExtractor.extractContent( 757 cms, 758 detailContainers, 759 index, 760 locale); 761 // only use the locales of the resource itself, not the ones of the detail containers page 762 containersExtractionResult.getContentItems().remove(CmsSearchField.FIELD_RESOURCE_LOCALES); 763 764 ex.add(containersExtractionResult); 765 } 766 result = extractXmlContent(cms, resource, index); 767 result = result.merge(ex); 768 return result; 769 770 } catch (Throwable t) { 771 throw new CmsIndexException(Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource), t); 772 } 773 } 774 775 /** 776 * Solr index content is stored in multiple languages, so the result is NOT locale dependent.<p> 777 * 778 * @see org.opencms.search.documents.I_CmsDocumentFactory#isLocaleDependend() 779 */ 780 public boolean isLocaleDependend() { 781 782 return false; 783 } 784 785 /** 786 * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache() 787 */ 788 @Override 789 public boolean isOnlyDependentOnContent() { 790 791 return false; 792 } 793 794 /** 795 * @see org.opencms.search.documents.I_CmsDocumentFactory#isUsingCache() 796 */ 797 public boolean isUsingCache() { 798 799 return false; 800 } 801}