001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.fields; 033 034import org.opencms.file.CmsObject; 035import org.opencms.file.CmsProperty; 036import org.opencms.file.CmsResource; 037import org.opencms.file.types.I_CmsResourceType; 038import org.opencms.loader.CmsLoaderException; 039import org.opencms.main.CmsException; 040import org.opencms.main.OpenCms; 041import org.opencms.relations.CmsCategoryService; 042import org.opencms.search.CmsSearchIndex; 043import org.opencms.search.I_CmsSearchDocument; 044import org.opencms.search.I_CmsSearchIndex; 045import org.opencms.search.extractors.I_CmsExtractionResult; 046import org.opencms.util.CmsStringUtil; 047 048import java.util.ArrayList; 049import java.util.List; 050import java.util.Locale; 051import java.util.Map; 052 053import org.apache.solr.uninverting.UninvertingReader.Type; 054 055/** 056 * Abstract implementation for OpenCms search field configurations.<p> 057 * 058 * @since 8.5.0 059 */ 060public class CmsSearchFieldConfiguration extends A_CmsSearchFieldConfiguration { 061 062 /** A list of fields that should be lazy-loaded. */ 063 public static final List<String> LAZY_FIELDS = new ArrayList<String>(); 064 065 /** The name for the standard field configuration. */ 066 public static final String STR_STANDARD = "standard"; 067 068 /** The serial version id. */ 069 private static final long serialVersionUID = -7581572963583498549L; 070 071 static { 072 LAZY_FIELDS.add(CmsSearchField.FIELD_CONTENT); 073 LAZY_FIELDS.add(CmsSearchField.FIELD_CONTENT_BLOB); 074 } 075 076 /** The current index. */ 077 private transient CmsSearchIndex m_index; 078 079 /** 080 * Creates a new, empty field configuration.<p> 081 */ 082 public CmsSearchFieldConfiguration() { 083 084 super(); 085 } 086 087 /** 088 * Returns the locale extended name for the given lookup String.<p> 089 * 090 * @param lookup the lookup String 091 * @param locale the locale 092 * 093 * @return the locale extended name for the given lookup String 094 */ 095 public static final String getLocaleExtendedName(String lookup, Locale locale) { 096 097 if (locale == null) { 098 return lookup; 099 } 100 return getLocaleExtendedName(lookup, locale.toString()); 101 } 102 103 /** 104 * Returns the locale extended name for the given lookup String.<p> 105 * 106 * @param lookup the lookup String 107 * @param locale the locale 108 * 109 * @return the locale extended name for the given lookup String 110 */ 111 public static final String getLocaleExtendedName(String lookup, String locale) { 112 113 StringBuffer result = new StringBuffer(32); 114 result.append(lookup); 115 result.append('_'); 116 result.append(locale); 117 return result.toString(); 118 } 119 120 /** 121 * Creates a space separated list of all parent folders of the given root path.<p> 122 * 123 * @param rootPath the root path to get the parent folder list for 124 * 125 * @return a space separated list of all parent folders of the given root path 126 */ 127 public static String getParentFolderTokens(String rootPath) { 128 129 if (CmsStringUtil.isEmpty(rootPath)) { 130 return "/"; 131 } 132 StringBuffer result = new StringBuffer(128); 133 String folderName = CmsResource.getFolderPath(rootPath); 134 for (int i = 0; i < folderName.length(); i++) { 135 char c = folderName.charAt(i); 136 if (c == '/') { 137 if (result.length() > 0) { 138 result.append(' '); 139 } 140 result.append(folderName.substring(0, i + 1)); 141 } 142 } 143 return result.toString(); 144 } 145 146 /** To allow sorting on a field the field must be added to the map given to {@link org.apache.solr.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}. 147 * The method adds the configured fields. 148 * @param uninvertingMap the map to which the fields are added. 149 */ 150 @Override 151 public void addUninvertingMappings(Map<String, Type> uninvertingMap) { 152 153 for (String fieldName : getFieldNames()) { 154 uninvertingMap.put(fieldName, Type.SORTED); 155 } 156 157 } 158 159 /** 160 * Creates the Lucene Document with this field configuration for the provided VFS resource, search index and content.<p> 161 * 162 * This triggers the indexing process for the given VFS resource according to the configuration 163 * of the provided index.<p> 164 * 165 * The provided index resource contains the basic contents to index. 166 * The provided search index contains the configuration what to index, such as the locale and 167 * possible special field mappings.<p> 168 * 169 * @param cms the OpenCms user context used to access the OpenCms VFS 170 * @param resource the resource to create the Lucene document from 171 * @param index the search index to create the Document for 172 * @param extraction the plain text content extracted from the document 173 * 174 * @return the Search Document for the given VFS resource and the given search index 175 * 176 * @throws CmsException if something goes wrong 177 */ 178 public I_CmsSearchDocument createDocument( 179 CmsObject cms, 180 CmsResource resource, 181 I_CmsSearchIndex index, 182 I_CmsExtractionResult extraction) 183 throws CmsException { 184 185 m_index = (CmsSearchIndex)index; 186 187 I_CmsSearchDocument document = m_index.createEmptyDocument(resource); 188 189 List<CmsProperty> propertiesSearched = cms.readPropertyObjects(resource, true); 190 List<CmsProperty> properties = cms.readPropertyObjects(resource, false); 191 192 document = appendContentBlob(document, cms, resource, extraction, properties, propertiesSearched); 193 document = appendPath(document, cms, resource, extraction, properties, propertiesSearched); 194 document = appendType(document, cms, resource, extraction, properties, propertiesSearched); 195 document = appendFileSize(document, cms, resource, extraction, properties, propertiesSearched); 196 document = appendDates(document, cms, resource, extraction, properties, propertiesSearched); 197 document = appendLocales(document, cms, resource, extraction, properties, propertiesSearched); 198 document = appendProperties(document, cms, resource, extraction, properties, propertiesSearched); 199 document = appendCategories(document, cms, resource, extraction, properties, propertiesSearched); 200 document = appendFieldMappings(document, cms, resource, extraction, properties, propertiesSearched); 201 document = appendAdditionalValuesToDcoument( 202 document, 203 cms, 204 resource, 205 extraction, 206 properties, 207 propertiesSearched); 208 209 return document; 210 } 211 212 /** 213 * Returns the index.<p> 214 * 215 * @return the index 216 */ 217 public I_CmsSearchIndex getIndex() { 218 219 return m_index; 220 } 221 222 /** 223 * Sets the index.<p> 224 * 225 * @param index the index to set 226 */ 227 public void setIndex(CmsSearchIndex index) { 228 229 m_index = index; 230 } 231 232 /** 233 * Overriding this method allows to append some 'extra' values/fields to a document 234 * without overriding the {@link #createDocument} method itself.<p> 235 * 236 * The method {@link #createDocument} reads all properties of the current resource which is 237 * an expensive operation. In order to avoid reading those properties twice, this method has been introduced.<p> 238 * 239 * Compared with all the other appender methods the name of this method is generic.<p> 240 * 241 * In this default implementation the document is returned unchanged.<p> 242 * 243 * @param document the document to extend 244 * @param cms the OpenCms context used for building the search index 245 * @param resource the resource that is indexed 246 * @param extraction the plain text extraction result from the resource 247 * @param properties the list of all properties directly attached to the resource (not searched) 248 * @param propertiesSearched the list of all searched properties of the resource 249 * 250 * @return the document extended by resource category information 251 */ 252 protected I_CmsSearchDocument appendAdditionalValuesToDcoument( 253 I_CmsSearchDocument document, 254 CmsObject cms, 255 CmsResource resource, 256 I_CmsExtractionResult extraction, 257 List<CmsProperty> properties, 258 List<CmsProperty> propertiesSearched) { 259 260 return document; 261 } 262 263 /** 264 * Extends the given document by resource category information based on properties.<p> 265 * 266 * @param document the document to extend 267 * @param cms the OpenCms context used for building the search index 268 * @param resource the resource that is indexed 269 * @param extractionResult the plain text extraction result from the resource 270 * @param properties the list of all properties directly attached to the resource (not searched) 271 * @param propertiesSearched the list of all searched properties of the resource 272 * 273 * @return the document extended by resource category information 274 * 275 * @throws CmsException if something goes wrong 276 */ 277 protected I_CmsSearchDocument appendCategories( 278 I_CmsSearchDocument document, 279 CmsObject cms, 280 CmsResource resource, 281 I_CmsExtractionResult extractionResult, 282 List<CmsProperty> properties, 283 List<CmsProperty> propertiesSearched) 284 throws CmsException { 285 286 CmsCategoryService categoryService = CmsCategoryService.getInstance(); 287 document.addCategoryField(categoryService.readResourceCategories(cms, resource)); 288 289 return document; 290 } 291 292 /** 293 * Extends the given document by a field that contains the extracted content blob.<p> 294 * 295 * @param document the document to extend 296 * @param cms the OpenCms context used for building the search index 297 * @param resource the resource that is indexed 298 * @param extractionResult the plain text extraction result from the resource 299 * @param properties the list of all properties directly attached to the resource (not searched) 300 * @param propertiesSearched the list of all searched properties of the resource 301 * 302 * @return the document extended by a field that contains the extracted content blob 303 */ 304 protected I_CmsSearchDocument appendContentBlob( 305 I_CmsSearchDocument document, 306 CmsObject cms, 307 CmsResource resource, 308 I_CmsExtractionResult extractionResult, 309 List<CmsProperty> properties, 310 List<CmsProperty> propertiesSearched) { 311 312 if (extractionResult != null) { 313 byte[] data = extractionResult.getBytes(); 314 if (data != null) { 315 document.addContentField(data); 316 } 317 } 318 319 return document; 320 } 321 322 /** 323 * Extends the given document by fields for date of creation, content and last modification.<p> 324 * 325 * @param document the document to extend 326 * @param cms the OpenCms context used for building the search index 327 * @param resource the resource that is indexed 328 * @param extractionResult the plain text extraction result from the resource 329 * @param properties the list of all properties directly attached to the resource (not searched) 330 * @param propertiesSearched the list of all searched properties of the resource 331 * 332 * @return the document extended by fields for date of creation, content and last modification 333 */ 334 protected I_CmsSearchDocument appendDates( 335 I_CmsSearchDocument document, 336 CmsObject cms, 337 CmsResource resource, 338 I_CmsExtractionResult extractionResult, 339 List<CmsProperty> properties, 340 List<CmsProperty> propertiesSearched) { 341 342 document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), true); 343 document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), true); 344 document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false); 345 346 return document; 347 } 348 349 /** 350 * Extends the given document by the mappings for the given field.<p> 351 * 352 * @param document the document to extend 353 * @param field the field to create the mappings for 354 * @param cms the OpenCms context used for building the search index 355 * @param resource the resource that is indexed 356 * @param extractionResult the plain text extraction result from the resource 357 * @param properties the list of all properties directly attached to the resource (not searched) 358 * @param propertiesSearched the list of all searched properties of the resource 359 * 360 * @return the document extended by the mappings for the given field 361 */ 362 protected I_CmsSearchDocument appendFieldMapping( 363 I_CmsSearchDocument document, 364 CmsSearchField field, 365 CmsObject cms, 366 CmsResource resource, 367 I_CmsExtractionResult extractionResult, 368 List<CmsProperty> properties, 369 List<CmsProperty> propertiesSearched) { 370 371 StringBuffer text = new StringBuffer(); 372 for (I_CmsSearchFieldMapping mapping : field.getMappings()) { 373 String mapResult = mapping.getStringValue(cms, resource, extractionResult, properties, propertiesSearched); 374 if (mapResult != null) { 375 if (text.length() > 0) { 376 text.append('\n'); 377 } 378 text.append(mapResult); 379 } 380 } 381 if (text.length() > 0) { 382 document.addSearchField(field, text.toString()); 383 } 384 385 return document; 386 } 387 388 /** 389 * Extends the given document by the configured field mappings.<p> 390 * 391 * @param document the document to extend 392 * @param cms the OpenCms context used for building the search index 393 * @param resource the resource that is indexed 394 * @param extractionResult the plain text extraction result from the resource 395 * @param properties the list of all properties directly attached to the resource (not searched) 396 * @param propertiesSearched the list of all searched properties of the resource 397 * 398 * @return the document extended by the configured field mappings 399 */ 400 protected I_CmsSearchDocument appendFieldMappings( 401 I_CmsSearchDocument document, 402 CmsObject cms, 403 CmsResource resource, 404 I_CmsExtractionResult extractionResult, 405 List<CmsProperty> properties, 406 List<CmsProperty> propertiesSearched) { 407 408 for (CmsSearchField field : getFields()) { 409 document = appendFieldMapping( 410 document, 411 field, 412 cms, 413 resource, 414 extractionResult, 415 properties, 416 propertiesSearched); 417 } 418 419 return document; 420 } 421 422 /** 423 * Extends the given document by the "size" field.<p> 424 * 425 * @param document the document to extend 426 * @param cms the OpenCms context used for building the search index 427 * @param resource the resource that is indexed 428 * @param extractionResult the plain text extraction result from the resource 429 * @param properties the list of all properties directly attached to the resource (not searched) 430 * @param propertiesSearched the list of all searched properties of the resource 431 * 432 * @return the document extended by the resource locales 433 */ 434 protected I_CmsSearchDocument appendFileSize( 435 I_CmsSearchDocument document, 436 CmsObject cms, 437 CmsResource resource, 438 I_CmsExtractionResult extractionResult, 439 List<CmsProperty> properties, 440 List<CmsProperty> propertiesSearched) { 441 442 document.addFileSizeField(resource.getLength()); 443 444 return document; 445 } 446 447 /** 448 * Extends the given document by the "res_locales" field.<p> 449 * 450 * @param document the document to extend 451 * @param cms the OpenCms context used for building the search index 452 * @param resource the resource that is indexed 453 * @param extraction the plain text extraction result from the resource 454 * @param properties the list of all properties directly attached to the resource (not searched) 455 * @param propertiesSearched the list of all searched properties of the resource 456 * 457 * @return the document extended by the resource locales 458 */ 459 protected I_CmsSearchDocument appendLocales( 460 I_CmsSearchDocument document, 461 CmsObject cms, 462 CmsResource resource, 463 I_CmsExtractionResult extraction, 464 List<CmsProperty> properties, 465 List<CmsProperty> propertiesSearched) { 466 467 return document; 468 } 469 470 /** 471 * Extends the given document by fields for VFS path lookup.<p> 472 * 473 * @param document the document to extend 474 * @param cms the OpenCms context used for building the search index 475 * @param resource the resource that is indexed 476 * @param extractionResult the plain text extraction result from the resource 477 * @param properties the list of all properties directly attached to the resource (not searched) 478 * @param propertiesSearched the list of all searched properties of the resource 479 * 480 * @return the document extended by fields for VFS path lookup 481 */ 482 protected I_CmsSearchDocument appendPath( 483 I_CmsSearchDocument document, 484 CmsObject cms, 485 CmsResource resource, 486 I_CmsExtractionResult extractionResult, 487 List<CmsProperty> properties, 488 List<CmsProperty> propertiesSearched) { 489 490 document.addPathField(resource.getRootPath()); 491 492 document.addRootPathField(resource.getRootPath()); 493 494 return document; 495 } 496 497 /** 498 * Appends all direct properties, that are not empty or white space only to the document.<p> 499 * 500 * @param document the document to extend 501 * @param cms the OpenCms context used for building the search index 502 * @param resource the resource that is indexed 503 * @param extraction the plain text extraction result from the resource 504 * @param properties the list of all properties directly attached to the resource (not searched) 505 * @param propertiesSearched the list of all searched properties of the resource 506 * 507 * @return the document extended by resource category information 508 */ 509 protected I_CmsSearchDocument appendProperties( 510 I_CmsSearchDocument document, 511 CmsObject cms, 512 CmsResource resource, 513 I_CmsExtractionResult extraction, 514 List<CmsProperty> properties, 515 List<CmsProperty> propertiesSearched) { 516 517 return document; 518 } 519 520 /** 521 * Extends the given document by a field that contains the resource type name.<p> 522 * 523 * @param document the document to extend 524 * @param cms the OpenCms context used for building the search index 525 * @param resource the resource that is indexed 526 * @param extractionResult the plain text extraction result from the resource 527 * @param properties the list of all properties directly attached to the resource (not searched) 528 * @param propertiesSearched the list of all searched properties of the resource 529 * 530 * @return the document extended by a field that contains the resource type name 531 * 532 * @throws CmsLoaderException in case of errors identifying the resource type name 533 */ 534 protected I_CmsSearchDocument appendType( 535 I_CmsSearchDocument document, 536 CmsObject cms, 537 CmsResource resource, 538 I_CmsExtractionResult extractionResult, 539 List<CmsProperty> properties, 540 List<CmsProperty> propertiesSearched) 541 throws CmsLoaderException { 542 543 // add the resource type to the document 544 I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 545 String typeName = "VFS"; 546 if (type != null) { 547 typeName = type.getTypeName(); 548 } 549 document.addTypeField(typeName); 550 551 // add the file name suffix to the document 552 String resName = CmsResource.getName(resource.getRootPath()); 553 int index = resName.lastIndexOf('.'); 554 if ((index != -1) && (resName.length() > index)) { 555 document.addSuffixField(resName.substring(index + 1)); 556 } 557 return document; 558 } 559 560}