001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.solr; 033 034import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 035import org.opencms.configuration.I_CmsXmlConfiguration; 036import org.opencms.file.CmsFile; 037import org.opencms.file.CmsObject; 038import org.opencms.file.CmsProperty; 039import org.opencms.file.CmsPropertyDefinition; 040import org.opencms.file.CmsResource; 041import org.opencms.file.types.CmsResourceTypeJsp; 042import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 043import org.opencms.file.types.CmsResourceTypeXmlContent; 044import org.opencms.file.types.CmsResourceTypeXmlPage; 045import org.opencms.i18n.CmsLocaleManager; 046import org.opencms.loader.CmsResourceManager; 047import org.opencms.main.CmsException; 048import org.opencms.main.CmsLog; 049import org.opencms.main.OpenCms; 050import org.opencms.search.CmsSearchIndexSource; 051import org.opencms.search.CmsSearchUtil; 052import org.opencms.search.I_CmsSearchDocument; 053import org.opencms.search.documents.CmsDocumentDependency; 054import org.opencms.search.extractors.I_CmsExtractionResult; 055import org.opencms.search.fields.CmsLuceneField; 056import org.opencms.search.fields.CmsSearchField; 057import org.opencms.search.fields.CmsSearchFieldConfiguration; 058import org.opencms.search.fields.CmsSearchFieldMapping; 059import org.opencms.search.fields.CmsSearchFieldMappingType; 060import org.opencms.search.fields.I_CmsSearchFieldMapping; 061import org.opencms.util.CmsStringUtil; 062import org.opencms.util.CmsVfsUtil; 063import org.opencms.xml.CmsXmlContentDefinition; 064import org.opencms.xml.containerpage.CmsContainerElementBean; 065import org.opencms.xml.containerpage.CmsContainerPageBean; 066import org.opencms.xml.containerpage.CmsXmlContainerPage; 067import org.opencms.xml.containerpage.CmsXmlContainerPageFactory; 068import org.opencms.xml.content.I_CmsXmlContentHandler; 069 070import java.util.ArrayList; 071import java.util.Arrays; 072import java.util.Collection; 073import java.util.Collections; 074import java.util.Date; 075import java.util.HashMap; 076import java.util.List; 077import java.util.Locale; 078import java.util.Map; 079import java.util.Set; 080 081import org.apache.commons.logging.Log; 082import org.apache.solr.common.SolrInputDocument; 083 084/** 085 * The search field implementation for Solr.<p> 086 * 087 * @since 8.5.0 088 */ 089public class CmsSolrFieldConfiguration extends CmsSearchFieldConfiguration { 090 091 /** The log object for this class. */ 092 private static final Log LOG = CmsLog.getLog(CmsSolrFieldConfiguration.class); 093 094 /** The content locale for the indexed document is stored in order to save performance. */ 095 private Collection<Locale> m_contentLocales; 096 097 /** A list of Solr fields. */ 098 private Map<String, CmsSolrField> m_solrFields = new HashMap<String, CmsSolrField>(); 099 100 /** 101 * Default constructor.<p> 102 */ 103 public CmsSolrFieldConfiguration() { 104 105 super(); 106 } 107 108 /** 109 * Adds the additional fields to the configuration, if they are not null.<p> 110 * 111 * @param additionalFields the additional fields to add 112 */ 113 public void addAdditionalFields(List<CmsSolrField> additionalFields) { 114 115 if (additionalFields != null) { 116 for (CmsSolrField solrField : additionalFields) { 117 m_solrFields.put(solrField.getName(), solrField); 118 } 119 } 120 } 121 122 /** 123 * Returns all configured Solr fields.<p> 124 * 125 * @return all configured Solr fields 126 */ 127 public Map<String, CmsSolrField> getSolrFields() { 128 129 return Collections.unmodifiableMap(m_solrFields); 130 } 131 132 /** 133 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#init() 134 */ 135 @Override 136 public void init() { 137 138 super.init(); 139 addAdditionalFields(); 140 } 141 142 /** 143 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendAdditionalValuesToDcoument(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 144 */ 145 @Override 146 protected I_CmsSearchDocument appendAdditionalValuesToDcoument( 147 I_CmsSearchDocument document, 148 CmsObject cms, 149 CmsResource resource, 150 I_CmsExtractionResult extractionResult, 151 List<CmsProperty> properties, 152 List<CmsProperty> propertiesSearched) { 153 154 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getName(), null); 155 if (mimeType != null) { 156 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_MIMETYPE), mimeType); 157 } 158 159 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_FILENAME), resource.getName()); 160 161 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_VERSION), "" + resource.getVersion()); 162 163 try { 164 if (CmsResourceTypeXmlContent.isXmlContent(resource)) { 165 I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource); 166 if ((handler != null) && handler.isContainerPageOnly()) { 167 if (document.getDocument() instanceof SolrInputDocument) { 168 SolrInputDocument doc = (SolrInputDocument)document.getDocument(); 169 doc.removeField(CmsSearchField.FIELD_SEARCH_EXCLUDE); 170 } else { 171 //TODO: Warning - but should not happen. 172 } 173 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "true"); 174 } 175 } 176 } catch (CmsException e) { 177 LOG.error(e.getMessage(), e); 178 } 179 180 List<String> searchExcludeOptions = document.getMultivaluedFieldAsStringList( 181 CmsSearchField.FIELD_SEARCH_EXCLUDE); 182 if ((searchExcludeOptions == null) || searchExcludeOptions.isEmpty()) { 183 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_EXCLUDE), "false"); 184 } 185 if (resource.getRootPath().startsWith("/system") 186 || (CmsResourceTypeJsp.getJSPTypeId() == resource.getTypeId())) { 187 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "gallery"); 188 } else { 189 document.addSearchField(m_solrFields.get(CmsSearchField.FIELD_SEARCH_CHANNEL), "content"); 190 } 191 192 document = appendFieldsForListSortOptions(document); 193 194 document = appendFieldsForListSearch(document, cms, resource); 195 196 if (resource.getRootPath().startsWith(OpenCms.getSiteManager().getSharedFolder()) 197 || (null != OpenCms.getSiteManager().getSiteRoot(resource.getRootPath()))) { 198 appendSpellFields(document); 199 } 200 201 return document; 202 } 203 204 /** 205 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendDates(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 206 */ 207 @Override 208 protected I_CmsSearchDocument appendDates( 209 I_CmsSearchDocument document, 210 CmsObject cms, 211 CmsResource resource, 212 I_CmsExtractionResult extractionResult, 213 List<CmsProperty> properties, 214 List<CmsProperty> propertiesSearched) { 215 216 document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), false); 217 document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), false); 218 document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false); 219 document.addDateField(CmsSearchField.FIELD_DATE_RELEASED, resource.getDateReleased(), false); 220 document.addDateField(CmsSearchField.FIELD_DATE_EXPIRED, resource.getDateExpired(), false); 221 222 return document; 223 } 224 225 /** 226 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMapping(org.opencms.search.I_CmsSearchDocument, org.opencms.search.fields.CmsSearchField, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 227 */ 228 @Override 229 protected I_CmsSearchDocument appendFieldMapping( 230 I_CmsSearchDocument document, 231 CmsSearchField sfield, 232 CmsObject cms, 233 CmsResource resource, 234 I_CmsExtractionResult extractionResult, 235 List<CmsProperty> properties, 236 List<CmsProperty> propertiesSearched) { 237 238 CmsSolrField field = (CmsSolrField)sfield; 239 try { 240 StringBuffer text = new StringBuffer(); 241 for (I_CmsSearchFieldMapping mapping : field.getMappings()) { 242 // loop over the mappings of the given field 243 if (extractionResult != null) { 244 String mapResult = null; 245 if ((field.getLocale() != null) && mapping.getType().equals(CmsSearchFieldMappingType.CONTENT)) { 246 // this is a localized content field, try to retrieve the localized content extraction 247 mapResult = extractionResult.getContent(field.getLocale()); 248 if (mapResult == null) { 249 // no localized content extracted 250 if (!(CmsResourceTypeXmlContent.isXmlContent(resource) 251 || CmsResourceTypeXmlPage.isXmlPage(resource))) { 252 // the resource is no XML content nor an XML page 253 if ((m_contentLocales != null) && m_contentLocales.contains(field.getLocale())) { 254 // the resource to get the extracted content for has the locale of this field, 255 // so store the extraction content into this field 256 mapResult = extractionResult.getContent(); 257 } 258 } 259 } 260 } else { 261 // this is not a localized content field, just perform the regular mapping 262 mapResult = mapping.getStringValue( 263 cms, 264 resource, 265 extractionResult, 266 properties, 267 propertiesSearched); 268 } 269 if (text.length() > 0) { 270 text.append('\n'); 271 } 272 if (mapResult != null) { 273 text.append(mapResult); 274 } else if (mapping.getDefaultValue() != null) { 275 // no mapping result found, but a default is configured 276 text.append(mapping.getDefaultValue()); 277 } 278 } else if (mapping.getStringValue( 279 cms, 280 resource, 281 extractionResult, 282 properties, 283 propertiesSearched) != null) { 284 String value = mapping.getStringValue( 285 cms, 286 resource, 287 extractionResult, 288 properties, 289 propertiesSearched); 290 if (value != null) { 291 document.addSearchField(field, value); 292 } 293 } 294 } 295 if ((text.length() <= 0) && (field.getDefaultValue() != null)) { 296 text.append(field.getDefaultValue()); 297 } 298 if (text.length() > 0) { 299 document.addSearchField(field, text.toString()); 300 } 301 } catch (Exception e) { 302 // nothing to do just log 303 LOG.error(e.getLocalizedMessage(), e); 304 } 305 return document; 306 } 307 308 /** 309 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendFieldMappings(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 310 */ 311 @Override 312 protected I_CmsSearchDocument appendFieldMappings( 313 I_CmsSearchDocument document, 314 CmsObject cms, 315 CmsResource resource, 316 I_CmsExtractionResult extractionResult, 317 List<CmsProperty> properties, 318 List<CmsProperty> propertiesSearched) { 319 320 List<String> systemFields = new ArrayList<String>(); 321 // append field mappings directly stored in the extraction result 322 if (null != extractionResult) { 323 Map<String, String> fieldMappings = extractionResult.getFieldMappings(); 324 for (String fieldName : fieldMappings.keySet()) { 325 String value = fieldMappings.get(fieldName); 326 CmsSolrField f = new CmsSolrField(fieldName, null, null, null); 327 document.addSearchField(f, value); 328 systemFields.add(fieldName); 329 } 330 } 331 332 Set<CmsSearchField> mappedFields = getXSDMappings(cms, resource); 333 if (mappedFields != null) { 334 for (CmsSearchField field : mappedFields) { 335 if (!systemFields.contains(field.getName())) { 336 document = appendFieldMapping( 337 document, 338 field, 339 cms, 340 resource, 341 extractionResult, 342 properties, 343 propertiesSearched); 344 } else { 345 LOG.error( 346 Messages.get().getBundle().key( 347 Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_2, 348 resource.getRootPath(), 349 field.getName())); 350 } 351 } 352 } 353 354 // add field mappings from elements of a container page 355 if (CmsResourceTypeXmlContainerPage.isContainerPage(resource)) { 356 document = appendFieldMappingsFromElementsOnThePage(document, cms, resource, systemFields); 357 } else { 358 try { 359 for (CmsResource detailOnlyPage : CmsDetailOnlyContainerUtil.getDetailOnlyResources(cms, resource)) { 360 try { 361 document = appendFieldMappingsFromElementsOnThePage( 362 document, 363 cms, 364 detailOnlyPage, 365 systemFields); 366 } catch (Throwable t) { 367 LOG.warn( 368 Messages.get().getBundle().key( 369 Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_FOR_PAGE_2, 370 null == resource ? "null" : resource.getRootPath(), 371 null == detailOnlyPage ? "null" : detailOnlyPage.getRootPath()), 372 t); 373 } 374 } 375 } catch (Throwable t) { 376 LOG.warn( 377 Messages.get().getBundle().key( 378 Messages.LOG_SOLR_WARN_DETAIL_ONLY_PAGE_MAPPINGS_1, 379 null == resource ? "null" : resource.getRootPath()), 380 t); 381 } 382 } 383 384 for (CmsSolrField field : m_solrFields.values()) { 385 document = appendFieldMapping( 386 document, 387 field, 388 cms, 389 resource, 390 extractionResult, 391 properties, 392 propertiesSearched); 393 } 394 395 return document; 396 } 397 398 /** 399 * Adds search fields from elements on a container page to a container page's document. 400 * @param document The document for the container page 401 * @param cms The current CmsObject 402 * @param resource The resource of the container page 403 * @param systemFields The list of field names for fields where mappings to should be discarded, since these fields are used system internally. 404 * @return the manipulated document 405 */ 406 protected I_CmsSearchDocument appendFieldMappingsFromElementsOnThePage( 407 I_CmsSearchDocument document, 408 CmsObject cms, 409 CmsResource resource, 410 List<String> systemFields) { 411 412 try { 413 CmsFile file = cms.readFile(resource); 414 CmsXmlContainerPage containerPage = CmsXmlContainerPageFactory.unmarshal(cms, file); 415 CmsContainerPageBean containerBean = containerPage.getContainerPage(cms); 416 if (containerBean != null) { 417 for (CmsContainerElementBean element : containerBean.getElements()) { 418 element.initResource(cms); 419 CmsResource elemResource = element.getResource(); 420 Set<CmsSearchField> mappedFields = getXSDMappingsForPage(cms, elemResource); 421 if (mappedFields != null) { 422 423 for (CmsSearchField field : mappedFields) { 424 if (!systemFields.contains(field.getName())) { 425 try { 426 I_CmsExtractionResult extractionResult = CmsSolrDocumentXmlContent.extractXmlContent( 427 cms, 428 elemResource, 429 getIndex()); 430 document = appendFieldMapping( 431 document, 432 field, 433 cms, 434 elemResource, 435 extractionResult, 436 cms.readPropertyObjects(resource, false), 437 cms.readPropertyObjects(resource, true)); 438 } catch (Exception e) { 439 LOG.error( 440 Messages.get().getBundle().key( 441 Messages.LOG_SOLR_ERR_MAPPING_UNREADABLE_CONTENT_3, 442 elemResource.getRootPath(), 443 field.getName(), 444 resource.getRootPath()), 445 e); 446 } 447 } else { 448 LOG.error( 449 Messages.get().getBundle().key( 450 Messages.LOG_SOLR_ERR_MAPPING_TO_INTERNALLY_USED_FIELD_3, 451 elemResource.getRootPath(), 452 field.getName(), 453 resource.getRootPath())); 454 } 455 } 456 } 457 } 458 } 459 } catch (CmsException e) { 460 // Should be thrown if element on the page does not exist anymore - this is possible, but not necessarily an error. 461 // Hence, just notice it in the debug log. 462 if (LOG.isDebugEnabled()) { 463 LOG.debug(e.getLocalizedMessage(), e); 464 } 465 } 466 return document; 467 } 468 469 /** 470 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendLocales(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 471 */ 472 @Override 473 protected I_CmsSearchDocument appendLocales( 474 I_CmsSearchDocument document, 475 CmsObject cms, 476 CmsResource resource, 477 I_CmsExtractionResult extraction, 478 List<CmsProperty> properties, 479 List<CmsProperty> propertiesSearched) { 480 481 // append the resource locales 482 Collection<Locale> resourceLocales = new ArrayList<Locale>(); 483 if ((extraction != null) && (!extraction.getLocales().isEmpty())) { 484 485 CmsResourceManager resMan = OpenCms.getResourceManager(); 486 resourceLocales = extraction.getLocales(); 487 boolean isGroup = false; 488 for (String groupType : Arrays.asList( 489 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME, 490 CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME)) { 491 if (resMan.matchResourceType(groupType, resource.getTypeId())) { 492 isGroup = true; 493 break; 494 } 495 } 496 if (isGroup) { 497 // groups are locale independent, so they have to have *all* locales so they are found for each one 498 m_contentLocales = OpenCms.getLocaleManager().getAvailableLocales(); 499 } else { 500 m_contentLocales = resourceLocales; 501 } 502 } else { 503 // For all other resources add all default locales 504 resourceLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource); 505 506 /* 507 * A problem is likely to arise when dealing with multilingual fields: 508 * Only values extracted from XML resources are written into the Solr locale-aware fields (e.g. 509 * "title_<locale>_s"), therefore sorting by them will not work as non-XML (unilingual) resources extract 510 * the information by the resource property facility and will not write to an Solr locale-aware field. 511 * 512 * The following code is used to fix this behavior, at least for "Title". 513 */ 514 515 // Check all passed properties for "Title"... 516 for (final CmsProperty prop : propertiesSearched) { 517 if (prop.getName().equals(CmsPropertyDefinition.PROPERTY_TITLE)) { 518 final String value = prop.getValue(); 519 520 // Write a Solr locale-aware field for every locale the system supports... 521 final List<Locale> availableLocales = OpenCms.getLocaleManager().getAvailableLocales(); 522 for (final Locale locale : availableLocales) { 523 final String lang = locale.getLanguage(); 524 // Don't proceed if a field has already written for this locale. 525 if (!resourceLocales.contains(lang)) { 526 final String effFieldName = CmsSearchFieldConfiguration.getLocaleExtendedName( 527 CmsSearchField.FIELD_TITLE_UNSTORED, 528 locale) + "_s"; 529 530 final CmsSolrField f = new CmsSolrField(effFieldName, null, null, null); 531 document.addSearchField(f, value); 532 } 533 } 534 } 535 } 536 m_contentLocales = getContentLocales(cms, resource, extraction); 537 } 538 539 document.addResourceLocales(resourceLocales); 540 document.addContentLocales(m_contentLocales); 541 542 // append document dependencies if configured 543 if (hasLocaleDependencies()) { 544 CmsDocumentDependency dep = CmsDocumentDependency.load(cms, resource); 545 ((CmsSolrDocument)document).addDocumentDependency(cms, dep); 546 } 547 return document; 548 } 549 550 /** 551 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendProperties(org.opencms.search.I_CmsSearchDocument, org.opencms.file.CmsObject, org.opencms.file.CmsResource, org.opencms.search.extractors.I_CmsExtractionResult, java.util.List, java.util.List) 552 */ 553 @Override 554 protected I_CmsSearchDocument appendProperties( 555 I_CmsSearchDocument document, 556 CmsObject cms, 557 CmsResource resource, 558 I_CmsExtractionResult extraction, 559 List<CmsProperty> properties, 560 List<CmsProperty> propertiesSearched) { 561 562 for (CmsProperty prop : propertiesSearched) { 563 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) { 564 String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue()); 565 document.addSearchField( 566 new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES, null, null, null), 567 value); 568 569 // Also write the property using the dynamic field '_s' in order to prevent tokenization 570 // of the property. The resulting field is named '<property>_prop_s'. 571 document.addSearchField( 572 new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES + "_s", null, null, null), 573 value); 574 } 575 } 576 577 for (CmsProperty prop : properties) { 578 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(prop.getValue())) { 579 String value = CmsSearchUtil.stripHtmlFromPropertyIfNecessary(prop.getName(), prop.getValue()); 580 document.addSearchField( 581 new CmsSolrField(prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT, null, null, null), 582 value); 583 584 // Also write the property using the dynamic field '_s' in order to prevent tokenization 585 // of the property. The resulting field is named '<property>_prop_nosearch_s'. 586 document.addSearchField( 587 new CmsSolrField( 588 prop.getName() + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT + "_s", 589 null, 590 null, 591 null), 592 value); 593 } 594 } 595 return document; 596 } 597 598 /** 599 * Retrieves the locales for an content, that is whether an XML content nor an XML page.<p> 600 * 601 * Uses following strategy: 602 * <ul> 603 * <li>first by file name</li> 604 * <li>then by detection and</li> 605 * <li>otherwise take the first configured default locale for this resource</li> 606 * </ul> 607 * 608 * @param cms the current CmsObject 609 * @param resource the resource to get the content locales for 610 * @param extraction the extraction result 611 * 612 * @return the determined locales for the given resource 613 */ 614 protected List<Locale> getContentLocales(CmsObject cms, CmsResource resource, I_CmsExtractionResult extraction) { 615 616 // try to detect locale by filename 617 Locale detectedLocale = CmsStringUtil.getLocaleForName(resource.getRootPath()); 618 if (!OpenCms.getLocaleManager().getAvailableLocales(cms, resource).contains(detectedLocale)) { 619 detectedLocale = null; 620 } 621 // try to detect locale by language detector 622 if (getIndex().isLanguageDetection() 623 && (detectedLocale == null) 624 && (extraction != null) 625 && (extraction.getContent() != null)) { 626 detectedLocale = CmsStringUtil.getLocaleForText(extraction.getContent()); 627 } 628 // take the detected locale or use the first configured default locale for this resource 629 List<Locale> result = new ArrayList<Locale>(); 630 if (detectedLocale != null) { 631 // take the found locale 632 result.add(detectedLocale); 633 } else { 634 635 // take all locales set via locale-available or the configured default locales as fall-back for this resource 636 result.addAll(OpenCms.getLocaleManager().getAvailableLocales(cms, resource)); 637 LOG.debug(Messages.get().getBundle().key(Messages.LOG_LANGUAGE_DETECTION_FAILED_1, resource)); 638 } 639 return result; 640 } 641 642 /** 643 * Returns the search field mappings declared within the XSD.<p> 644 * 645 * @param cms the CmsObject 646 * @param resource the resource 647 * 648 * @return the fields to map 649 */ 650 protected Set<CmsSearchField> getXSDMappings(CmsObject cms, CmsResource resource) { 651 652 try { 653 if (CmsResourceTypeXmlContent.isXmlContent(resource)) { 654 I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource); 655 if ((handler != null) && !handler.getSearchFields().isEmpty()) { 656 return handler.getSearchFields(); 657 } 658 } 659 } catch (CmsException e) { 660 LOG.error(e.getMessage(), e); 661 } 662 return null; 663 } 664 665 /** 666 * Returns the search field mappings declared within the XSD that should be applied to the container page.<p> 667 * 668 * @param cms the CmsObject 669 * @param resource the resource 670 * 671 * @return the fields to map 672 */ 673 protected Set<CmsSearchField> getXSDMappingsForPage(CmsObject cms, CmsResource resource) { 674 675 try { 676 if (CmsResourceTypeXmlContent.isXmlContent(resource)) { 677 I_CmsXmlContentHandler handler = CmsXmlContentDefinition.getContentHandlerForResource(cms, resource); 678 if ((handler != null) && !handler.getSearchFieldsForPage().isEmpty()) { 679 return handler.getSearchFieldsForPage(); 680 } 681 } 682 } catch (CmsException e) { 683 LOG.error(e.getMessage(), e); 684 } 685 return null; 686 } 687 688 /** 689 * Adds additional fields to this field configuration.<p> 690 */ 691 private void addAdditionalFields() { 692 693 /* 694 * Add fields from opencms-search.xml (Lucene fields) 695 */ 696 for (CmsSearchField field : getFields()) { 697 if (field instanceof CmsLuceneField) { 698 CmsSolrField newSolrField = new CmsSolrField((CmsLuceneField)field); 699 m_solrFields.put(newSolrField.getName(), newSolrField); 700 } 701 } 702 703 /* 704 * Add the content fields (multiple for contents with more than one locale) 705 */ 706 // add the content_<locale> fields to this configuration 707 CmsSolrField solrField = new CmsSolrField(CmsSearchField.FIELD_CONTENT, null, null, null); 708 solrField.addMapping( 709 new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT)); 710 m_solrFields.put(solrField.getName(), solrField); 711 for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) { 712 solrField = new CmsSolrField( 713 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale), 714 Collections.singletonList(locale.toString() + CmsSearchField.FIELD_EXCERPT), 715 locale, 716 null); 717 solrField.addMapping( 718 new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, CmsSearchField.FIELD_CONTENT)); 719 m_solrFields.put(solrField.getName(), solrField); 720 } 721 722 /* 723 * Fields filled within appendFields 724 */ 725 CmsSolrField sfield = new CmsSolrField(CmsSearchField.FIELD_MIMETYPE, null, null, null); 726 m_solrFields.put(sfield.getName(), sfield); 727 728 sfield = new CmsSolrField(CmsSearchField.FIELD_FILENAME, null, null, null); 729 m_solrFields.put(sfield.getName(), sfield); 730 731 sfield = new CmsSolrField(CmsSearchField.FIELD_VERSION, null, null, null); 732 m_solrFields.put(sfield.getName(), sfield); 733 734 sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_CHANNEL, null, null, null); 735 m_solrFields.put(sfield.getName(), sfield); 736 737 /* 738 * Fields with mapping 739 */ 740 sfield = new CmsSolrField(CmsSearchField.FIELD_STATE, null, null, null); 741 CmsSearchFieldMapping map = new CmsSearchFieldMapping( 742 CmsSearchFieldMappingType.ATTRIBUTE, 743 CmsSearchField.FIELD_STATE); 744 sfield.addMapping(map); 745 m_solrFields.put(sfield.getName(), sfield); 746 747 sfield = new CmsSolrField(CmsSearchField.FIELD_USER_LAST_MODIFIED, null, null, null); 748 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_LAST_MODIFIED); 749 sfield.addMapping(map); 750 m_solrFields.put(sfield.getName(), sfield); 751 752 sfield = new CmsSolrField(CmsSearchField.FIELD_USER_CREATED, null, null, null); 753 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, CmsSearchField.FIELD_USER_CREATED); 754 sfield.addMapping(map); 755 m_solrFields.put(sfield.getName(), sfield); 756 757 sfield = new CmsSolrField(CmsSearchField.FIELD_META, null, null, null); 758 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE); 759 sfield.addMapping(map); 760 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_DESCRIPTION); 761 sfield.addMapping(map); 762 map = new CmsSearchFieldMapping(CmsSearchFieldMappingType.ATTRIBUTE, I_CmsXmlConfiguration.A_NAME); 763 sfield.addMapping(map); 764 m_solrFields.put(sfield.getName(), sfield); 765 766 sfield = new CmsSolrField(CmsSearchField.FIELD_SEARCH_EXCLUDE, null, null, null); 767 map = new CmsSearchFieldMapping( 768 CmsSearchFieldMappingType.PROPERTY_SEARCH, 769 CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE); 770 sfield.addMapping(map); 771 m_solrFields.put(sfield.getName(), sfield); 772 773 } 774 775 /** 776 * Adds multiple fields to the document that are used to search in by the list app. 777 * 778 * <p>The fields are: 779 * <ul> 780 * <li>description_{locale}</li> 781 * <li>keywords_{locale}</li> 782 * </ul> 783 * for each of the locales the document is available in.</p> 784 * 785 * @param document the document to index with all other fields already added. 786 * @param cms the current context 787 * @param resource the resource that is indexed 788 * @param properties the direct properties of the resource 789 * @return the document extended by the fields used by the list. 790 */ 791 792 private I_CmsSearchDocument appendFieldsForListSearch( 793 I_CmsSearchDocument document, 794 CmsObject cms, 795 CmsResource resource) { 796 797 List<String> locales = document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES); 798 for (String locale : locales) { 799 fillLocalizedFieldWithPropertyFallbacks( 800 cms, 801 document, 802 resource, 803 locale, 804 CmsSearchField.FIELD_DESCRIPTION, 805 CmsPropertyDefinition.PROPERTY_DESCRIPTION); 806 fillLocalizedFieldWithPropertyFallbacks( 807 cms, 808 document, 809 resource, 810 locale, 811 CmsSearchField.FIELD_KEYWORDS, 812 CmsPropertyDefinition.PROPERTY_KEYWORDS); 813 } 814 815 return document; 816 817 } 818 819 /** 820 * Adds multiple fields to the document that are used for the sort options in the list app. 821 * 822 * <p>The fields are: 823 * <ul> 824 * <li>instancedate_dt</li> 825 * <li>instancedatecurrenttill_dt</li> 826 * <li>instancedaterange_dr</li> 827 * <li>disptitle_s</li> 828 * <li>disporder_i</li> 829 * </ul> 830 * and localized versions for each content locale.</p> 831 * 832 * @param document the document to index with all other fields already added. 833 * @return the document extended by the fields used by the list. 834 */ 835 private I_CmsSearchDocument appendFieldsForListSortOptions(I_CmsSearchDocument document) { 836 837 // add non-localized fields 838 // add instance date 839 String fieldName = CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE; 840 Date instanceDate = document.getFieldValueAsDate(fieldName); 841 if ((null == instanceDate) || (instanceDate.getTime() == 0)) { 842 String instanceDateCopyField = document.getFieldValueAsString( 843 CmsPropertyDefinition.PROPERTY_INSTANCEDATE_COPYFIELD + CmsSearchField.FIELD_DYNAMIC_PROPERTIES); 844 if (null != instanceDateCopyField) { 845 instanceDate = document.getFieldValueAsDate(instanceDateCopyField); 846 } 847 if ((null == instanceDate) || (instanceDate.getTime() == 0)) { 848 instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_RELEASED); 849 } 850 if ((null == instanceDate) || (instanceDate.getTime() == 0)) { 851 instanceDate = document.getFieldValueAsDate(CmsSearchField.FIELD_DATE_LASTMODIFIED); 852 } 853 document.addDateField(fieldName, instanceDate.getTime(), false); 854 } 855 // Set instancedaterange_dr 856 fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE + CmsSearchField.FIELD_POSTFIX_DATE_RANGE; 857 String instanceDateString = document.getFieldValueAsString( 858 CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE); 859 String instanceDateRangeString = "[" + instanceDateString + " TO " + instanceDateString + "]"; 860 ((SolrInputDocument)document.getDocument()).setField(fieldName, instanceDateRangeString); 861 // Set instancedatecurrenttill_dt to instancedate_dt if not set yet 862 fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL + CmsSearchField.FIELD_POSTFIX_DATE; 863 Date instanceDateCurrentTill = document.getFieldValueAsDate(fieldName); 864 if ((null == instanceDateCurrentTill) || (instanceDateCurrentTill.getTime() == 0)) { 865 document.addDateField(fieldName, instanceDate.getTime(), false); 866 } 867 // add disp-title field 868 fieldName = CmsSearchField.FIELD_DISPTITLE + CmsSearchField.FIELD_POSTFIX_SORT; 869 String dispTitle = document.getFieldValueAsString(fieldName); 870 if (null == dispTitle) { 871 dispTitle = document.getFieldValueAsString( 872 CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT); 873 if (null == dispTitle) { 874 dispTitle = document.getFieldValueAsString(CmsSearchField.FIELD_FILENAME); 875 } 876 document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispTitle); 877 } 878 879 // add disp-order field 880 fieldName = CmsSearchField.FIELD_DISPORDER + CmsSearchField.FIELD_POSTFIX_INT; 881 String dispOrder = document.getFieldValueAsString(fieldName); 882 if (null == dispOrder) { 883 dispOrder = document.getFieldValueAsString( 884 CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER + CmsSearchField.FIELD_DYNAMIC_PROPERTIES); 885 if (null != dispOrder) { 886 try { 887 int o = Integer.parseInt(dispOrder); 888 dispOrder = String.valueOf(o); 889 } catch (NullPointerException | NumberFormatException e) { 890 LOG.warn( 891 "Property " 892 + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER 893 + " contains not a valid integer number."); 894 dispOrder = "0"; 895 } 896 } else { 897 dispOrder = "0"; 898 } 899 document.addSearchField(new CmsSolrField(fieldName, null, null, null), dispOrder); 900 } 901 902 // add localized fields 903 for (String locale : document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES)) { 904 // instance date 905 fieldName = CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE; 906 Date localeInstanceDate = document.getFieldValueAsDate(fieldName); 907 if ((null == localeInstanceDate) || (localeInstanceDate.getTime() == 0)) { 908 localeInstanceDate = instanceDate; 909 document.addDateField(fieldName, localeInstanceDate.getTime(), false); 910 } 911 // instance date range 912 fieldName = CmsSearchField.FIELD_INSTANCEDATE_RANGE 913 + "_" 914 + locale 915 + CmsSearchField.FIELD_POSTFIX_DATE_RANGE; 916 String localeInstanceDateString = document.getFieldValueAsString( 917 CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE); 918 String localeInstanceDateRangeString = "[" 919 + localeInstanceDateString 920 + " TO " 921 + localeInstanceDateString 922 + "]"; 923 ((SolrInputDocument)document.getDocument()).setField(fieldName, localeInstanceDateRangeString); 924 // Set instancedatecurrenttill_dt to instancedate_dt if not set yet 925 fieldName = CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL 926 + "_" 927 + locale 928 + CmsSearchField.FIELD_POSTFIX_DATE; 929 Date localeInstanceDateCurrentTill = document.getFieldValueAsDate(fieldName); 930 if ((null == localeInstanceDateCurrentTill) || (localeInstanceDateCurrentTill.getTime() == 0)) { 931 document.addDateField(fieldName, localeInstanceDate.getTime(), false); 932 } 933 // disp-title field for title display and sorting 934 fieldName = CmsSearchField.FIELD_DISPTITLE + "_" + locale + CmsSearchField.FIELD_POSTFIX_SORT; 935 if (null == document.getFieldValueAsString(fieldName)) { 936 String localizedTitle = document.getFieldValueAsString( 937 CmsPropertyDefinition.PROPERTY_TITLE 938 + "_" 939 + locale 940 + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT); 941 document.addSearchField( 942 new CmsSolrField(fieldName, null, null, null), 943 null == localizedTitle ? dispTitle : localizedTitle); 944 } 945 // disp-order field 946 fieldName = CmsSearchField.FIELD_DISPORDER + "_" + locale + CmsSearchField.FIELD_POSTFIX_INT; 947 if (null == document.getFieldValueAsString(fieldName)) { 948 String localizedOrder = document.getFieldValueAsString( 949 CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER 950 + "_" 951 + locale 952 + CmsSearchField.FIELD_DYNAMIC_PROPERTIES); 953 if (null != localizedOrder) { 954 try { 955 int o = Integer.parseInt(localizedOrder); 956 localizedOrder = String.valueOf(o); 957 } catch (NullPointerException | NumberFormatException e) { 958 LOG.warn( 959 "Property " 960 + CmsPropertyDefinition.PROPERTY_DISPLAY_ORDER 961 + "_" 962 + locale 963 + " contains not a valid integer number."); 964 } 965 } 966 document.addSearchField( 967 new CmsSolrField(fieldName, null, null, null), 968 null == localizedOrder ? dispOrder : localizedOrder); 969 } 970 } 971 972 return document; 973 } 974 975 /** 976 * Copy the content and the title property of the document to a spell field / a language specific spell field. 977 * @param document the document that gets extended by the spell fields. 978 */ 979 private void appendSpellFields(I_CmsSearchDocument document) { 980 981 /* 982 * Add the content fields (multiple for contents with more than one locale) 983 */ 984 // add the content_<locale> fields to this configuration 985 String title = document.getFieldValueAsString( 986 CmsPropertyDefinition.PROPERTY_TITLE + CmsSearchField.FIELD_DYNAMIC_PROPERTIES_DIRECT); 987 document.addSearchField( 988 new CmsSolrField(CmsSearchField.FIELD_SPELL, null, null, null), 989 document.getFieldValueAsString(CmsSearchField.FIELD_CONTENT) + "\n" + title); 990 for (Locale locale : OpenCms.getLocaleManager().getAvailableLocales()) { 991 document.addSearchField( 992 new CmsSolrField(locale + "_" + CmsSearchField.FIELD_SPELL, null, locale, null), 993 document.getFieldValueAsString( 994 CmsSearchFieldConfiguration.getLocaleExtendedName(CmsSearchField.FIELD_CONTENT, locale)) 995 + "\n" 996 + title); 997 } 998 } 999 1000 /** 1001 * Fills the field with the name extended by "_{locale}" with the property value, if the field is not already present in the document. 1002 * 1003 * If the localized property is not present, the default property value is used to get the value. 1004 * Properties set on the indexed resource itself are always preferred. If no suitable property is present, the parent properties are used when given. 1005 * 1006 * @param cms the current context 1007 * @param document the document to add the field to. 1008 * @param resource the currently indexed resource. 1009 * @param locale the locale to add the field for. 1010 * @param fieldName the name of the field to add (without locale postfix) 1011 * @param propertyName the property name of the property to get the value from (without locale postfix) 1012 */ 1013 private void fillLocalizedFieldWithPropertyFallbacks( 1014 CmsObject cms, 1015 I_CmsSearchDocument document, 1016 CmsResource resource, 1017 String locale, 1018 String fieldName, 1019 String propertyName) { 1020 1021 Locale l = CmsLocaleManager.getLocale(locale); 1022 String localeExtendedFieldName = getLocaleExtendedName(fieldName, locale); 1023 1024 if (!document.getFieldNames().contains(localeExtendedFieldName)) { 1025 String value = CmsVfsUtil.readPropertyValueWithFolderFallbackForDefaultFiles( 1026 cms, 1027 resource, 1028 propertyName, 1029 l); 1030 if (value != null) { 1031 document.addSearchField(new CmsSolrField(localeExtendedFieldName, null, null, null), value); 1032 } 1033 } 1034 1035 } 1036 1037 /** 1038 * Returns <code>true</code> if at least one of the index sources uses a VFS indexer that is able 1039 * to index locale dependent resources.<p> 1040 * 1041 * TODO This should be improved somehow 1042 * 1043 * @return <code>true</code> if this field configuration should resolve locale dependencies 1044 */ 1045 private boolean hasLocaleDependencies() { 1046 1047 for (CmsSearchIndexSource source : getIndex().getSources()) { 1048 if (source.getIndexer().isLocaleDependenciesEnable()) { 1049 return true; 1050 } 1051 } 1052 return false; 1053 } 1054}