001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (https://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: https://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: https://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.solr; 033 034import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 035import org.opencms.configuration.I_CmsXmlConfiguration; 036import org.opencms.file.CmsFile; 037import org.opencms.file.CmsObject; 038import org.opencms.file.CmsProperty; 039import org.opencms.file.CmsPropertyDefinition; 040import org.opencms.file.CmsResource; 041import org.opencms.file.types.CmsResourceTypeJsp; 042import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 043import org.opencms.file.types.CmsResourceTypeXmlContent; 044import org.opencms.file.types.CmsResourceTypeXmlPage; 045import org.opencms.i18n.CmsLocaleManager; 046import org.opencms.loader.CmsResourceManager; 047import org.opencms.main.CmsException; 048import org.opencms.main.CmsLog; 049import org.opencms.main.OpenCms; 050import org.opencms.search.CmsSearchIndexSource; 051import org.opencms.search.CmsSearchUtil; 052import org.opencms.search.I_CmsSearchDocument; 053import org.opencms.search.documents.CmsDocumentDependency; 054import org.opencms.search.extractors.I_CmsExtractionResult; 055import org.opencms.search.fields.CmsLuceneField; 056import org.opencms.search.fields.CmsSearchField; 057import org.opencms.search.fields.CmsSearchFieldConfiguration; 058import org.opencms.search.fields.CmsSearchFieldMapping; 059import org.opencms.search.fields.CmsSearchFieldMappingType; 060import org.opencms.search.fields.I_CmsSearchFieldMapping; 061import org.opencms.util.CmsStringUtil; 062import org.opencms.util.CmsVfsUtil; 063import org.opencms.xml.CmsXmlContentDefinition; 064import org.opencms.xml.containerpage.CmsContainerElementBean; 065import org.opencms.xml.containerpage.CmsContainerPageBean; 066import org.opencms.xml.containerpage.CmsXmlContainerPage; 067import org.opencms.xml.containerpage.CmsXmlContainerPageFactory; 068import org.opencms.xml.content.I_CmsXmlContentHandler; 069 070import java.util.ArrayList; 071import java.util.Arrays; 072import java.util.Collection; 073import java.util.Collections; 074import java.util.Date; 075import java.util.HashMap; 076import java.util.List; 077import java.util.Locale; 078import java.util.Map; 079import java.util.Set; 080 081import org.apache.commons.logging.Log; 082import org.apache.solr.common.SolrInputDocument; 083 084/** 085 * The search field implementation for Solr.<p> 086 * 087 * @since 8.5.0 088 */ 089public class CmsSolrFieldConfiguration extends CmsSearchFieldConfiguration { 090 091 /** The log object for this class. */ 092 private static final Log LOG = CmsLog.getLog(CmsSolrFieldConfiguration.class); 093 094 /** The content locale for the indexed document is stored in order to save performance. */ 095 private Collection<Locale> m_contentLocales; 096 097 /** A list of Solr fields. */ 098 private Map<String, CmsSolrField> m_solrFields = new HashMap<String, CmsSolrField>(); 099 100 /** 101 * Default constructor.<p> 102 */ 103 public CmsSolrFieldConfiguration() { 104 105 super(); 106 } 107 108 /** 109 * Adds the additional fields to the configuration, if they are not null.<p> 110 * 111 * @param additionalFields the additional fields to add 112 */ 113 public void addAdditionalFields(List<CmsSolrField> additionalFields) { 114 115 if (additionalFields != null) { 116 for (CmsSolrField solrField : additionalFields) { 117 m_solrFields.put(solrField.getName(), solrField); 118 } 119 } 120 } 121 122 /** 123 * Returns all configured Solr fields.<p> 124 * 125 * @return all configured Solr fields 126 */ 127 public Map<String, CmsSolrField> getSolrFields() { 128 129 return Collections.unmodifiableMap(m_solrFields); 130 } 131 132 /** 133 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#init() 134 */ 135 @Override 136 public void init() { 137 138 super.init(); 139 addAdditionalFields(); 140 } 141 142 /** 143 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendAdditionalValuesToDcoument(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 144 */ 145 @Override 146 protected I_CmsSearchDocument appendAdditionalValuesToDcoument( 147 I_CmsSearchDocument document, 148 CmsObject cms, 149 CmsResource resource, 150 I_CmsExtractionResult extractionResult, 151 List<CmsProperty> properties, 152 List<CmsProperty> propertiesSearched) { 153 154 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getName(), null); 155 if (mimeType != null) { 156 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_MIMETYPE), mimeType); 157 } 158 159 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_FILENAME), resource.getName()); 160 161 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_VERSION), "" + resource.getVersion()); 162 163 try { 164 if (CmsResourceTypeXmlContent.isXmlContent(resource)) { 165 I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource); 166 if ((handler != null) && handler.isContainerPageOnly()) { 167 if (document.getDocument() instanceof SolrInputDocument) { 168 SolrInputDocument doc = (SolrInputDocument)document.getDocument(); 169 doc.removeField(CmsSearchField.FIELD_SEARCH_EXCLUDE); 170 } else { 171 //TODO: Warning - but should not happen. 172 } 173 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "true"); 174 } 175 } 176 } catch (CmsException e) { 177 LOG.error(e.getMessage(), e); 178 } 179 180 List<String> searchExcludeOptions = document.getMultivaluedFieldAsStringList( 181 CmsSearchField.FIELD_SEARCH_EXCLUDE); 182 if ((searchExcludeOptions == null) || searchExcludeOptions.isEmpty()) { 183 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "false"); 184 } 185 if (resource.getRootPath().startsWith("/system") 186 || (CmsResourceTypeJsp.getJSPTypeId() == resource.getTypeId())) { 187 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "gallery"); 188 } else { 189 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "content"); 190 } 191 192 document = appendFieldsForListSortOptions(document); 193 194 document = appendFieldsForListSearch(document, cms, resource); 195 196 if (resource.getRootPath().startsWith(OpenCms.getSiteManager().getSharedFolder()) 197 || (null != OpenCms.getSiteManager().getSiteRoot(resource.getRootPath()))) { 198 appendSpellFields(document); 199 } 200 201 document = getIndex().applyDocumentTransformation( 202 document, 203 cms, 204 resource, 205 extractionResult, 206 properties, 207 propertiesSearched); 208 209 return document; 210 } 211 212 /** 213 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendDates(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 214 */ 215 @Override 216 protected I_CmsSearchDocument appendDates( 217 I_CmsSearchDocument document, 218 CmsObject cms, 219 CmsResource resource, 220 I_CmsExtractionResult extractionResult, 221 List<CmsProperty> properties, 222 List<CmsProperty> propertiesSearched) { 223 224 document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), false); 225 document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), false); 226 document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false); 227 document.addDateField(CmsSearchField.FIELD_DATE_RELEASED, resource.getDateReleased(), false); 228 document.addDateField(CmsSearchField.FIELD_DATE_EXPIRED, resource.getDateExpired(), false); 229 230 return document; 231 } 232 233 /** 234 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMapping(org.opencms.search.I_CmsSearchDocument, org.opencms.search.fields.CmsSearchField, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 235 */ 236 @Override 237 protected I_CmsSearchDocument appendFieldMapping( 238 I_CmsSearchDocument document, 239 CmsSearchField sfield, 240 CmsObject cms, 241 CmsResource resource, 242 I_CmsExtractionResult extractionResult, 243 List<CmsProperty> properties, 244 List<CmsProperty> propertiesSearched) { 245 246 CmsSolrField field = (CmsSolrField)sfield; 247 try { 248 StringBuffer text = new StringBuffer(); 249 for (I_CmsSearchFieldMapping mapping : field.getMappings()) { 250 // loop over the mappings of the given field 251 if (extractionResult != null) { 252 String mapResult = null; 253 if ((field.getLocale() != null) && mapping.getType().equals(CmsSearchFieldMappingType.CONTENT)) { 254 // this is a localized content field, try to retrieve the localized content extraction 255 mapResult = extractionResult.getContent(field.getLocale()); 256 if (mapResult == null) { 257 // no localized content extracted 258 if (!(CmsResourceTypeXmlContent.isXmlContent(resource) 259 || CmsResourceTypeXmlPage.isXmlPage(resource))) { 260 // the resource is no XML content nor an XML page 261 if ((m_contentLocales != null) && m_contentLocales.contains(field.getLocale())) { 262 // the resource to get the extracted content for has the locale of this field, 263 // so store the extraction content into this field 264 mapResult = extractionResult.getContent(); 265 } 266 } 267 } 268 } else { 269 // this is not a localized content field, just perform the regular mapping 270 mapResult = mapping.getStringValue( 271 cms, 272 resource, 273 extractionResult, 274 properties, 275 propertiesSearched); 276 } 277 if (text.length() > 0) { 278 text.append('\n'); 279 } 280 if (mapResult != null) { 281 text.append(mapResult); 282 } else if (mapping.getDefaultValue() != null) { 283 // no mapping result found, but a default is configured 284 text.append(mapping.getDefaultValue()); 285 } 286 } else if (mapping.getStringValue( 287 cms, 288 resource, 289 extractionResult, 290 properties, 291 propertiesSearched) != null) { 292 String value = mapping.getStringValue( 293 cms, 294 resource, 295 extractionResult, 296 properties, 297 propertiesSearched); 298 if (value != null) { 299 document.addSearchField(field, value); 300 } 301 } 302 } 303 if ((text.length() <= 0) && (field.getDefaultValue() != null)) { 304 text.append(field.getDefaultValue()); 305 } 306 if (text.length() > 0) { 307 document.addSearchField(field, text.toString()); 308 } 309 } catch (Exception e) { 310 // nothing to do just log 311 LOG.error(e.getLocalizedMessage(), e); 312 } 313 return document; 314 } 315 316 /** 317 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMappings(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 318 */ 319 @Override 320 protected I_CmsSearchDocument appendFieldMappings( 321 I_CmsSearchDocument document, 322 CmsObject cms, 323 CmsResource resource, 324 I_CmsExtractionResult extractionResult, 325 List<CmsProperty> properties, 326 List<CmsProperty> propertiesSearched) { 327 328 List<String> systemFields = new ArrayList<String>(); 329 // append field mappings directly stored in the extraction result 330 if (null != extractionResult) { 331 Map<String, String> fieldMappings = extractionResult.getFieldMappings(); 332 for (String fieldName : fieldMappings.keySet()) { 333 String value = fieldMappings.get(fieldName); 334 CmsSolrField f = new CmsSolrField(fieldName, null, null, null); 335 document.addSearchField(f, value); 336 systemFields.add(fieldName); 337 } 338 } 339 340 Set<CmsSearchField> mappedFields = getXSDMappings(cms, resource); 341 if (mappedFields != null) { 342 for (CmsSearchField field : mappedFields) { 343 if (!systemFields.contains(field.getName())) { 344 document = appendFieldMapping( 345 document, 346 field, 347 cms, 348 resource, 349 extractionResult, 350 properties, 351 propertiesSearched); 352 } else { 353 LOG.error( 354 Messages.get().getBundle().key( 355 Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_2, 356 resource.getRootPath(), 357 field.getName())); 358 } 359 } 360 } 361 362 // add field mappings from elements of a container page 363 if (CmsResourceTypeXmlContainerPage.isContainerPage(resource)) { 364 document = appendFieldMappingsFromElementsOnThePage(document, cms, resource, systemFields); 365 } else { 366 try { 367 for (CmsResource detailOnlyPage : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) { 368 try { 369 document = appendFieldMappingsFromElementsOnThePage( 370 document, 371 cms, 372 detailOnlyPage, 373 systemFields); 374 } catch (Throwable t) { 375 LOG.warn( 376 Messages.get().getBundle().key( 377 Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_FOR_PAGE_2, 378 null == resource ? "null" : resource.getRootPath(), 379 null == detailOnlyPage ? "null" : detailOnlyPage.getRootPath()), 380 t); 381 } 382 } 383 } catch (Throwable t) { 384 LOG.warn( 385 Messages.get().getBundle().key( 386 Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_1, 387 null == resource ? "null" : resource.getRootPath()), 388 t); 389 } 390 } 391 392 for (CmsSolrField field : m_solrFields.values()) { 393 document = appendFieldMapping( 394 document, 395 field, 396 cms, 397 resource, 398 extractionResult, 399 properties, 400 propertiesSearched); 401 } 402 403 return document; 404 } 405 406 /** 407 * Adds search fields from elements on a container page to a container page's document. 408 * @param document The document for the container page 409 * @param cms The current CmsObject 410 * @param resource The resource of the container page 411 * @param systemFields The list of field names for fields where mappings to should be discarded, since these fields are used system internally. 412 * @return the manipulated document 413 */ 414 protected I_CmsSearchDocument appendFieldMappingsFromElementsOnThePage( 415 I_CmsSearchDocument document, 416 CmsObject cms, 417 CmsResource resource, 418 List<String> systemFields) { 419 420 try { 421 CmsFile file = cms.readFile(resource); 422 CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file); 423 CmsContainerPageBean containerBean = containerPage.getContainerPage(cms); 424 if (containerBean != null) { 425 for (CmsContainerElementBean element : containerBean.getElements()) { 426 element.initResource(cms); 427 CmsResource elemResource = element.getResource(); 428 Set<CmsSearchField> mappedFields = getXSDMappingsForPage(cms, elemResource); 429 if (mappedFields != null) { 430 431 for (CmsSearchField field : mappedFields) { 432 if (!systemFields.contains(field.getName())) { 433 try { 434 I_CmsExtractionResult extractionResult = CmsSolrDocumentXmlContent.extractXmlContent( 435 cms, 436 elemResource, 437 getIndex()); 438 document = appendFieldMapping( 439 document, 440 field, 441 cms, 442 elemResource, 443 extractionResult, 444 cms.readPropertyObjects(resource, false), 445 cms.readPropertyObjects(resource, true)); 446 } catch (Exception e) { 447 LOG.error( 448 Messages.get().getBundle().key( 449 Messages.LOG_SOLR_ERR_MAPPING_UNREADABLE_CONTENT_3, 450 elemResource.getRootPath(), 451 field.getName(), 452 resource.getRootPath()), 453 e); 454 } 455 } else { 456 LOG.error( 457 Messages.get().getBundle().key( 458 Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_3, 459 elemResource.getRootPath(), 460 field.getName(), 461 resource.getRootPath())); 462 } 463 } 464 } 465 } 466 } 467 } catch (CmsException e) { 468 // Should be thrown if element on the page does not exist anymore - this is possible, but not necessarily an error. 469 // Hence, just notice it in the debug log. 470 if (LOG.isDebugEnabled()) { 471 LOG.debug(e.getLocalizedMessage(), e); 472 } 473 } 474 return document; 475 } 476 477 /** 478 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendLocales(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 479 */ 480 @Override 481 protected I_CmsSearchDocument appendLocales( 482 I_CmsSearchDocument document, 483 CmsObject cms, 484 CmsResource resource, 485 I_CmsExtractionResult extraction, 486 List<CmsProperty> properties, 487 List<CmsProperty> propertiesSearched) { 488 489 // append the resource locales 490 Collection<Locale> resourceLocales = new ArrayList<Locale>(); 491 if ((extraction != null) && (!extraction.getLocales().isEmpty())) { 492 493 CmsResourceManager resMan = OpenCms.getResourceManager(); 494 resourceLocales = extraction.getLocales(); 495 boolean isGroup = false; 496 for (String groupType : Arrays.asList( 497 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME, 498 CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME)) { 499 if (resMan.matchResourceType(groupType, resource.getTypeId())) { 500 isGroup = true; 501 break; 502 } 503 } 504 if (isGroup) { 505 // groups are locale independent, so they have to have *all* locales so they are found for each one 506 m_contentLocales = OpenCms.getLocaleManager().getAvailableLocales(); 507 } else { 508 m_contentLocales = resourceLocales; 509 } 510 } else { 511 // For all other resources add all default locales 512 resourceLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource); 513 514 /* 515 * A problem is likely to arise when dealing with multilingual fields: 516 * Only values extracted from XML resources are written into the Solr locale-aware fields (e.g. 517 * "title_<locale>_s"), therefore sorting by them will not work as non-XML (unilingual) resources extract 518 * the information by the resource property facility and will not write to an Solr locale-aware field. 519 * 520 * The following code is used to fix this behavior, at least for "Title". 521 */ 522 523 // Check all passed properties for "Title"... 524 for (final CmsProperty prop : propertiesSearched) { 525 if (prop.getName().equals(CmsPropertyDefinition.PROPERTY_TITLE)) { 526 final String value = prop.getValue(); 527 528 // Write a Solr locale-aware field for every locale the system supports... 529 final List<Locale> availableLocales = OpenCms.getLocaleManager().getAvailableLocales(); 530 for (final Locale locale : availableLocales) { 531 final String lang = locale.getLanguage(); 532 // Don't proceed if a field has already written for this locale. 533 if (!resourceLocales.contains(lang)) { 534 final String effFieldName = CmsSearchFieldConfiguration.getLocaleExtendedName( 535 CmsSearchField.FIELD_TITLE_UNSTORED, 536 locale) + "_s"; 537 538 final CmsSolrField f = new CmsSolrField(effFieldName, null, null, null); 539 document.addSearchField(f, value); 540 } 541 } 542 } 543 } 544 m_contentLocales = getContentLocales(cms, resource, extraction); 545 } 546 547 document.addResourceLocales(resourceLocales); 548 document.addContentLocales(m_contentLocales); 549 550 // append document dependencies if configured 551 if (hasLocaleDependencies()) { 552 CmsDocumentDependency dep = CmsDocumentDependency.load(cms, resource); 553 ((CmsSolrDocument)document).addDocumentDependency(cms, dep); 554 } 555 return document; 556 } 557 558 /** 559 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendProperties(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 560 */ 561 @Override 562 protected I_CmsSearchDocument appendProperties( 563 I_CmsSearchDocument document, 564 CmsObject cms, 565 CmsResource resource, 566 I_CmsExtractionResult extraction, 567 List<CmsProperty> properties, 568 List<CmsProperty> propertiesSearched) { 569 570 for (CmsProperty prop : propertiesSearched) { 571 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) { 572 String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue()); 573 document.addSearchField( 574 new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES, null, null, null), 575 value); 576 577 // Also write the property using the dynamic field '_s' in order to prevent tokenization 578 // of the property. The resulting field is named '<property>_prop_s'. 579 document.addSearchField( 580 new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES + "_s", null, null, null), 581 value); 582 } 583 } 584 585 for (CmsProperty prop : properties) { 586 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) { 587 String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue()); 588 document.addSearchField( 589 new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT, null, null, null), 590 value); 591 592 // Also write the property using the dynamic field '_s' in order to prevent tokenization 593 // of the property. The resulting field is named '<property>_prop_nosearch_s'. 594 document.addSearchField( 595 new CmsSolrField( 596 prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT + "_s", 597 null, 598 null, 599 null), 600 value); 601 } 602 } 603 return document; 604 } 605 606 /** 607 * Retrieves the locales for an content, that is whether an XML content nor an XML page.<p> 608 * 609 * Uses following strategy: 610 * <ul> 611 * <li>first by file name</li> 612 * <li>then by detection and</li> 613 * <li>otherwise take the first configured default locale for this resource</li> 614 * </ul> 615 * 616 * @param cms the current CmsObject 617 * @param resource the resource to get the content locales for 618 * @param extraction the extraction result 619 * 620 * @return the determined locales for the given resource 621 */ 622 protected List<Locale> getContentLocales(CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction) { 623 624 // try to detect locale by filename 625 Locale detectedLocale = CmsStringUtil.getLocaleForName(resource.getRootPath()); 626 if (!OpenCms.getLocaleManager().getAvailableLocales(cms, resource).contains(detectedLocale)) { 627 detectedLocale = null; 628 } 629 // try to detect locale by language detector 630 if (getIndex().isLanguageDetection() 631 && (detectedLocale == null) 632 && (extraction != null) 633 && (extraction.getContent() != null)) { 634 detectedLocale = CmsStringUtil.getLocaleForText(extraction.getContent()); 635 } 636 // take the detected locale or use the first configured default locale for this resource 637 List<Locale> result = new ArrayList<Locale>(); 638 if (detectedLocale != null) { 639 // take the found locale 640 result.add(detectedLocale); 641 } else { 642 643 // take all locales set via locale-available or the configured default locales as fall-back for this resource 644 result.addAll(OpenCms.getLocaleManager().getAvailableLocales(cms, resource)); 645 LOG.debug(Messages.get().getBundle().key(Messages.LOG_LANGUAGE_DETECTION_FAILED_1, resource)); 646 } 647 return result; 648 } 649 650 /** 651 * Returns the search field mappings declared within the XSD.<p> 652 * 653 * @param cms the CmsObject 654 * @param resource the resource 655 * 656 * @return the fields to map 657 */ 658 protected Set<CmsSearchField> getXSDMappings(CmsObject cms, CmsResource resource) { 659 660 try { 661 if (CmsResourceTypeXmlContent.isXmlContent(resource)) { 662 I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource); 663 if ((handler != null) && !handler.getSearchFields().isEmpty()) { 664 return handler.getSearchFields(); 665 } 666 } 667 } catch (CmsException e) { 668 LOG.error(e.getMessage(), e); 669 } 670 return null; 671 } 672 673 /** 674 * Returns the search field mappings declared within the XSD that should be applied to the container page.<p> 675 * 676 * @param cms the CmsObject 677 * @param resource the resource 678 * 679 * @return the fields to map 680 */ 681 protected Set<CmsSearchField> getXSDMappingsForPage(CmsObject cms, CmsResource resource) { 682 683 try { 684 if (CmsResourceTypeXmlContent.isXmlContent(resource)) { 685 I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource); 686 if ((handler != null) && !handler.getSearchFieldsForPage().isEmpty()) { 687 return handler.getSearchFieldsForPage(); 688 } 689 } 690 } catch (CmsException e) { 691 LOG.error(e.getMessage(), e); 692 } 693 return null; 694 } 695 696 /** 697 * Adds additional fields to this field configuration.<p> 698 */ 699 private void addAdditionalFields() { 700 701 /* 702 * Add fields from opencms-search.xml (Lucene fields) 703 */ 704 for (CmsSearchField field : getFields()) { 705 if (field instanceof CmsLuceneField) { 706 CmsSolrField newSolrField = new CmsSolrField((CmsLuceneField)field); 707 m_solrFields.put(newSolrField.getName(), newSolrField); 708 } 709 } 710 711 /* 712 * Add the content fields (multiple for contents with more than one locale) 713 */ 714 // add the content_<locale> fields to this configuration 715 CmsSolrField solrField = new CmsSolrField(CmsSearchField.FIELD_CONTENT, null, null, null); 716 solrField.addMapping( 717 new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT)); 718 m_solrFields.put(solrField.getName(), solrField); 719 for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) { 720 solrField = new CmsSolrField( 721 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale), 722 Collections.singletonList(locale.toString() + CmsSearchField.FIELD_EXCERPT), 723 locale, 724 null); 725 solrField.addMapping( 726 new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT)); 727 m_solrFields.put(solrField.getName(), solrField); 728 } 729 730 /* 731 * Fields filled within appendFields 732 */ 733 CmsSolrField sfield = new CmsSolrField(CmsSearchField.FIELD_MIMETYPE, null, null, null); 734 m_solrFields.put(sfield.getName(), sfield); 735 736 sfield = new CmsSolrField(CmsSearchField.FIELD_FILENAME, null, null, null); 737 m_solrFields.put(sfield.getName(), sfield); 738 739 sfield = new CmsSolrField(CmsSearchField.FIELD_VERSION, null, null, null); 740 m_solrFields.put(sfield.getName(), sfield); 741 742 sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_CHANNEL, null, null, null); 743 m_solrFields.put(sfield.getName(), sfield); 744 745 /* 746 * Fields with mapping 747 */ 748 sfield = new CmsSolrField(CmsSearchField.FIELD_STATE, null, null, null); 749 CmsSearchFieldMapping map = new CmsSearchFieldMapping( 750 CmsSearchFieldMappingType.ATTRIBUTE, 751 CmsSearchField.FIELD_STATE); 752 sfield.addMapping(map); 753 m_solrFields.put(sfield.getName(), sfield); 754 755 sfield = new CmsSolrField(CmsSearchField.FIELD_USER_LAST_MODIFIED, null, null, null); 756 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_LAST_MODIFIED); 757 sfield.addMapping(map); 758 m_solrFields.put(sfield.getName(), sfield); 759 760 sfield = new CmsSolrField(CmsSearchField.FIELD_USER_CREATED, null, null, null); 761 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_CREATED); 762 sfield.addMapping(map); 763 m_solrFields.put(sfield.getName(), sfield); 764 765 sfield = new CmsSolrField(CmsSearchField.FIELD_META, null, null, null); 766 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE); 767 sfield.addMapping(map); 768 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_DESCRIPTION); 769 sfield.addMapping(map); 770 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, I_CmsXmlConfiguration.A_NAME); 771 sfield.addMapping(map); 772 m_solrFields.put(sfield.getName(), sfield); 773 774 sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_EXCLUDE, null, null, null); 775 map = new CmsSearchFieldMapping( 776 CmsSearchFieldMappingType.PROPERTY_SEARCH, 777 CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE); 778 sfield.addMapping(map); 779 m_solrFields.put(sfield.getName(), sfield); 780 781 } 782 783 /** 784 * Adds multiple fields to the document that are used to search in by the list app. 785 * 786 * <p>The fields are: 787 * <ul> 788 * <li>description_{locale}</li> 789 * <li>keywords_{locale}</li> 790 * </ul> 791 * for each of the locales the document is available in.</p> 792 * 793 * @param document the document to index with all other fields already added. 794 * @param cms the current context 795 * @param resource the resource that is indexed 796 * @param properties the direct properties of the resource 797 * @return the document extended by the fields used by the list. 798 */ 799 800 private I_CmsSearchDocument appendFieldsForListSearch( 801 I_CmsSearchDocument document, 802 CmsObject cms, 803 CmsResource resource) { 804 805 List<String> locales = document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES); 806 for (String locale : locales) { 807 fillLocalizedFieldWithPropertyFallbacks( 808 cms, 809 document, 810 resource, 811 locale, 812 CmsSearchField.FIELD_DESCRIPTION, 813 CmsPropertyDefinition.PROPERTY_DESCRIPTION); 814 fillLocalizedFieldWithPropertyFallbacks( 815 cms, 816 document, 817 resource, 818 locale, 819 CmsSearchField.FIELD_KEYWORDS, 820 CmsPropertyDefinition.PROPERTY_KEYWORDS); 821 } 822 823 return document; 824 825 } 826 827 /** 828 * Adds multiple fields to the document that are used for the sort options in the list app. 829 * 830 * <p>The fields are: 831 * <ul> 832 * <li>instancedate_dt</li> 833 * <li>instancedatecurrenttill_dt</li> 834 * <li>instancedaterange_dr</li> 835 * <li>disptitle_s</li> 836 * <li>disporder_i</li> 837 * </ul> 838 * and localized versions for each content locale.</p> 839 * 840 * @param document the document to index with all other fields already added. 841 * @return the document extended by the fields used by the list. 842 */ 843 private I_CmsSearchDocument appendFieldsForListSortOptions(I_CmsSearchDocument document) { 844 845 // add non-localized fields 846 // add instance date 847 String fieldName = CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE; 848 Date instanceDate = document.getFieldValueAsDate(fieldName); 849 if ((null == instanceDate) || (instanceDate.getTime() == 0)) { 850 String instanceDateCopyField = document.getFieldValueAsString( 851 CmsPropertyDefinition.PROPERTY_INSTANCEDATE_COPYFIELD + CmsSearchField.FIELD_DYNAMIC_PROPERTIES); 852 if (null != instanceDateCopyField) { 853 instanceDate = document.getFieldValueAsDate(instanceDateCopyField); 854 } 855 if ((null == instanceDate) || (instanceDate.getTime() == 0)) { 856 instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_RELEASED); 857 } 858 if ((null == instanceDate) || (instanceDate.getTime() == 0)) { 859 instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_LASTMODIFIED); 860 } 861 document.addDateField(fieldName, instanceDate.getTime(), false); 862 } 863 // Set instancedaterange_dr 864 fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE + CmsSearchField.FIELD_POSTFIX_DATE_RANGE; 865 String instanceDateString = document.getFieldValueAsString( 866 CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE); 867 String instanceDateRangeString = "[" + instanceDateString + " TO " + instanceDateString + "]"; 868 ((SolrInputDocument)document.getDocument()).setField(fieldName, instanceDateRangeString); 869 // Set instancedatecurrenttill_dt to instancedate_dt if not set yet 870 fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL + CmsSearchField.FIELD_POSTFIX_DATE; 871 Date instanceDateCurrentTill = document.getFieldValueAsDate(fieldName); 872 if ((null == instanceDateCurrentTill) || (instanceDateCurrentTill.getTime() == 0)) { 873 document.addDateField(fieldName, instanceDate.getTime(), false); 874 } 875 // add disp-title field 876 fieldName = CmsSearchField.FIELD_DISPTITLE + CmsSearchField.FIELD_POSTFIX_SORT; 877 String dispTitle = document.getFieldValueAsString(fieldName); 878 if (null == dispTitle) { 879 dispTitle = document.getFieldValueAsString( 880 CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT); 881 if (null == dispTitle) { 882 dispTitle = document.getFieldValueAsString(CmsSearchField.FIELD_FILENAME); 883 } 884 document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispTitle); 885 } 886 887 // add disp-order field 888 fieldName = CmsSearchField.FIELD_DISPORDER + CmsSearchField.FIELD_POSTFIX_INT; 889 String dispOrder = document.getFieldValueAsString(fieldName); 890 if (null == dispOrder) { 891 dispOrder = document.getFieldValueAsString( 892 CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER + CmsSearchField.FIELD_DYNAMIC_PROPERTIES); 893 if (null != dispOrder) { 894 try { 895 int o = Integer.parseInt(dispOrder); 896 dispOrder = String.valueOf(o); 897 } catch (NullPointerException | NumberFormatException e) { 898 LOG.warn( 899 "Property " 900 + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER 901 + " contains not a valid integer number."); 902 dispOrder = "0"; 903 } 904 } else { 905 dispOrder = "0"; 906 } 907 document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispOrder); 908 } 909 910 // add localized fields 911 for (String locale : document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES)) { 912 // instance date 913 fieldName = CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE; 914 Date localeInstanceDate = document.getFieldValueAsDate(fieldName); 915 if ((null == localeInstanceDate) || (localeInstanceDate.getTime() == 0)) { 916 localeInstanceDate = instanceDate; 917 document.addDateField(fieldName, localeInstanceDate.getTime(), false); 918 } 919 // instance date range 920 fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE 921 + "_" 922 + locale 923 + CmsSearchField.FIELD_POSTFIX_DATE_RANGE; 924 String localeInstanceDateString = document.getFieldValueAsString( 925 CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE); 926 String localeInstanceDateRangeString = "[" 927 + localeInstanceDateString 928 + " TO " 929 + localeInstanceDateString 930 + "]"; 931 ((SolrInputDocument)document.getDocument()).setField(fieldName, localeInstanceDateRangeString); 932 // Set instancedatecurrenttill_dt to instancedate_dt if not set yet 933 fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL 934 + "_" 935 + locale 936 + CmsSearchField.FIELD_POSTFIX_DATE; 937 Date localeInstanceDateCurrentTill = document.getFieldValueAsDate(fieldName); 938 if ((null == localeInstanceDateCurrentTill) || (localeInstanceDateCurrentTill.getTime() == 0)) { 939 document.addDateField(fieldName, localeInstanceDate.getTime(), false); 940 } 941 // disp-title field for title display and sorting 942 fieldName = CmsSearchField.FIELD_DISPTITLE + "_" + locale + CmsSearchField.FIELD_POSTFIX_SORT; 943 if (null == document.getFieldValueAsString(fieldName)) { 944 String localizedTitle = document.getFieldValueAsString( 945 CmsPropertyDefinition.PROPERTY_TITLE 946 + "_" 947 + locale 948 + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT); 949 document.addSearchField( 950 new CmsSolrField(fieldName, null, null, null), 951 null == localizedTitle ? dispTitle : localizedTitle); 952 } 953 // disp-order field 954 fieldName = CmsSearchField.FIELD_DISPORDER + "_" + locale + CmsSearchField.FIELD_POSTFIX_INT; 955 if (null == document.getFieldValueAsString(fieldName)) { 956 String localizedOrder = document.getFieldValueAsString( 957 CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER 958 + "_" 959 + locale 960 + CmsSearchField.FIELD_DYNAMIC_PROPERTIES); 961 if (null != localizedOrder) { 962 try { 963 int o = Integer.parseInt(localizedOrder); 964 localizedOrder = String.valueOf(o); 965 } catch (NullPointerException | NumberFormatException e) { 966 LOG.warn( 967 "Property " 968 + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER 969 + "_" 970 + locale 971 + " contains not a valid integer number."); 972 } 973 } 974 document.addSearchField( 975 new CmsSolrField(fieldName, null, null, null), 976 null == localizedOrder ? dispOrder : localizedOrder); 977 } 978 } 979 980 return document; 981 } 982 983 /** 984 * Copy the content and the title property of the document to a spell field / a language specific spell field. 985 * @param document the document that gets extended by the spell fields. 986 */ 987 private void appendSpellFields(I_CmsSearchDocument document) { 988 989 /* 990 * Add the content fields (multiple for contents with more than one locale) 991 */ 992 // add the content_<locale> fields to this configuration 993 String title = document.getFieldValueAsString( 994 CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT); 995 document.addSearchField( 996 new CmsSolrField(CmsSearchField.FIELD_SPELL, null, null, null), 997 document.getFieldValueAsString(CmsSearchField.FIELD_CONTENT) + "\n" + title); 998 for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) { 999 document.addSearchField( 1000 new CmsSolrField(locale + "_" + CmsSearchField.FIELD_SPELL, null, locale, null), 1001 document.getFieldValueAsString( 1002 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale)) 1003 + "\n" 1004 + title); 1005 } 1006 } 1007 1008 /** 1009 * Fills the field with the name extended by "_{locale}" with the property value, if the field is not already present in the document. 1010 * 1011 * If the localized property is not present, the default property value is used to get the value. 1012 * Properties set on the indexed resource itself are always preferred. If no suitable property is present, the parent properties are used when given. 1013 * 1014 * @param cms the current context 1015 * @param document the document to add the field to. 1016 * @param resource the currently indexed resource. 1017 * @param locale the locale to add the field for. 1018 * @param fieldName the name of the field to add (without locale postfix) 1019 * @param propertyName the property name of the property to get the value from (without locale postfix) 1020 */ 1021 private void fillLocalizedFieldWithPropertyFallbacks( 1022 CmsObject cms, 1023 I_CmsSearchDocument document, 1024 CmsResource resource, 1025 String locale, 1026 String fieldName, 1027 String propertyName) { 1028 1029 Locale l = CmsLocaleManager.getLocale(locale); 1030 String localeExtendedFieldName = getLocaleExtendedName(fieldName, locale); 1031 1032 if (!document.getFieldNames().contains(localeExtendedFieldName)) { 1033 String value = CmsVfsUtil.readPropertyValueWithFolderFallbackForDefaultFiles( 1034 cms, 1035 resource, 1036 propertyName, 1037 l); 1038 if (value != null) { 1039 document.addSearchField(new CmsSolrField(localeExtendedFieldName, null, null, null), value); 1040 } 1041 } 1042 1043 } 1044 1045 /** 1046 * Returns <code>true</code> if at least one of the index sources uses a VFS indexer that is able 1047 * to index locale dependent resources.<p> 1048 * 1049 * TODO This should be improved somehow 1050 * 1051 * @return <code>true</code> if this field configuration should resolve locale dependencies 1052 */ 1053 private boolean hasLocaleDependencies() { 1054 1055 for (CmsSearchIndexSource source : getIndex().getSources()) { 1056 if (source.getIndexer().isLocaleDependenciesEnable()) { 1057 return true; 1058 } 1059 } 1060 return false; 1061 } 1062}