001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: http://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: http://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search.fields;
033
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsProperty;
036import org.opencms.file.CmsResource;
037import org.opencms.file.types.I_CmsResourceType;
038import org.opencms.loader.CmsLoaderException;
039import org.opencms.main.CmsException;
040import org.opencms.main.OpenCms;
041import org.opencms.relations.CmsCategoryService;
042import org.opencms.search.CmsSearchIndex;
043import org.opencms.search.I_CmsSearchDocument;
044import org.opencms.search.I_CmsSearchIndex;
045import org.opencms.search.extractors.I_CmsExtractionResult;
046import org.opencms.util.CmsStringUtil;
047
048import java.util.ArrayList;
049import java.util.List;
050import java.util.Locale;
051import java.util.Map;
052
053import org.apache.solr.uninverting.UninvertingReader.Type;
054
055/**
056 * Abstract implementation for OpenCms search field configurations.<p>
057 *
058 * @since 8.5.0
059 */
060public class CmsSearchFieldConfiguration extends A_CmsSearchFieldConfiguration {
061
062    /** A list of fields that should be lazy-loaded. */
063    public static final List<String> LAZY_FIELDS = new ArrayList<String>();
064
065    /** The name for the standard field configuration. */
066    public static final String STR_STANDARD = "standard";
067
068    /** The serial version id. */
069    private static final long serialVersionUID = -7581572963583498549L;
070
071    static {
072        LAZY_FIELDS.add(CmsSearchField.FIELD_CONTENT);
073        LAZY_FIELDS.add(CmsSearchField.FIELD_CONTENT_BLOB);
074    }
075
076    /** The current index. */
077    private transient CmsSearchIndex m_index;
078
079    /**
080     * Creates a new, empty field configuration.<p>
081     */
082    public CmsSearchFieldConfiguration() {
083
084        super();
085    }
086
087    /**
088     * Returns the locale extended name for the given lookup String.<p>
089     *
090     * @param lookup the lookup String
091     * @param locale the locale
092     *
093     * @return the locale extended name for the given lookup String
094     */
095    public static final String getLocaleExtendedName(String lookup, Locale locale) {
096
097        if (locale == null) {
098            return lookup;
099        }
100        return getLocaleExtendedName(lookup, locale.toString());
101    }
102
103    /**
104     * Returns the locale extended name for the given lookup String.<p>
105     *
106     * @param lookup the lookup String
107     * @param locale the locale
108     *
109     * @return the locale extended name for the given lookup String
110     */
111    public static final String getLocaleExtendedName(String lookup, String locale) {
112
113        StringBuffer result = new StringBuffer(32);
114        result.append(lookup);
115        result.append('_');
116        result.append(locale);
117        return result.toString();
118    }
119
120    /**
121     * Creates a space separated list of all parent folders of the given root path.<p>
122     *
123     * @param rootPath the root path to get the parent folder list for
124     *
125     * @return a space separated list of all parent folders of the given root path
126     */
127    public static String getParentFolderTokens(String rootPath) {
128
129        if (CmsStringUtil.isEmpty(rootPath)) {
130            return "/";
131        }
132        StringBuffer result = new StringBuffer(128);
133        String folderName = CmsResource.getFolderPath(rootPath);
134        for (int i = 0; i < folderName.length(); i++) {
135            char c = folderName.charAt(i);
136            if (c == '/') {
137                if (result.length() > 0) {
138                    result.append(' ');
139                }
140                result.append(folderName.substring(0, i + 1));
141            }
142        }
143        return result.toString();
144    }
145
146    /** To allow sorting on a field the field must be added to the map given to {@link org.apache.solr.uninverting.UninvertingReader#wrap(org.apache.lucene.index.DirectoryReader, Map)}.
147     *  The method adds the configured fields.
148     * @param uninvertingMap the map to which the fields are added.
149     */
150    @Override
151    public void addUninvertingMappings(Map<String, Type> uninvertingMap) {
152
153        for (String fieldName : getFieldNames()) {
154            uninvertingMap.put(fieldName, Type.SORTED);
155        }
156
157    }
158
159    /**
160     * Creates the Lucene Document with this field configuration for the provided VFS resource, search index and content.<p>
161     *
162     * This triggers the indexing process for the given VFS resource according to the configuration
163     * of the provided index.<p>
164     *
165     * The provided index resource contains the basic contents to index.
166     * The provided search index contains the configuration what to index, such as the locale and
167     * possible special field mappings.<p>
168     *
169     * @param cms the OpenCms user context used to access the OpenCms VFS
170     * @param resource the resource to create the Lucene document from
171     * @param index the search index to create the Document for
172     * @param extraction the plain text content extracted from the document
173     *
174     * @return the Search Document for the given VFS resource and the given search index
175     *
176     * @throws CmsException if something goes wrong
177     */
178    public I_CmsSearchDocument createDocument(
179        CmsObject cms,
180        CmsResource resource,
181        I_CmsSearchIndex index,
182        I_CmsExtractionResult extraction)
183    throws CmsException {
184
185        m_index = (CmsSearchIndex)index;
186
187        I_CmsSearchDocument document = m_index.createEmptyDocument(resource);
188
189        List<CmsProperty> propertiesSearched = cms.readPropertyObjects(resource, true);
190        List<CmsProperty> properties = cms.readPropertyObjects(resource, false);
191
192        document = appendContentBlob(document, cms, resource, extraction, properties, propertiesSearched);
193        document = appendPath(document, cms, resource, extraction, properties, propertiesSearched);
194        document = appendType(document, cms, resource, extraction, properties, propertiesSearched);
195        document = appendFileSize(document, cms, resource, extraction, properties, propertiesSearched);
196        document = appendDates(document, cms, resource, extraction, properties, propertiesSearched);
197        document = appendLocales(document, cms, resource, extraction, properties, propertiesSearched);
198        document = appendProperties(document, cms, resource, extraction, properties, propertiesSearched);
199        document = appendCategories(document, cms, resource, extraction, properties, propertiesSearched);
200        document = appendFieldMappings(document, cms, resource, extraction, properties, propertiesSearched);
201        document = appendAdditionalValuesToDcoument(
202            document,
203            cms,
204            resource,
205            extraction,
206            properties,
207            propertiesSearched);
208
209        return document;
210    }
211
212    /**
213     * Returns the index.<p>
214     *
215     * @return the index
216     */
217    public I_CmsSearchIndex getIndex() {
218
219        return m_index;
220    }
221
222    /**
223     * Sets the index.<p>
224     *
225     * @param index the index to set
226     */
227    public void setIndex(CmsSearchIndex index) {
228
229        m_index = index;
230    }
231
232    /**
233     * Overriding this method allows to append some 'extra' values/fields to a document
234     * without overriding the {@link #createDocument} method itself.<p>
235     *
236     * The method {@link #createDocument} reads all properties of the current resource which is
237     * an expensive operation. In order to avoid reading those properties twice, this method has been introduced.<p>
238     *
239     * Compared with all the other appender methods the name of this method is generic.<p>
240     *
241     * In this default implementation the document is returned unchanged.<p>
242     *
243     * @param document the document to extend
244     * @param cms the OpenCms context used for building the search index
245     * @param resource the resource that is indexed
246     * @param extraction the plain text extraction result from the resource
247     * @param properties the list of all properties directly attached to the resource (not searched)
248     * @param propertiesSearched the list of all searched properties of the resource
249     *
250     * @return the document extended by resource category information
251     */
252    protected I_CmsSearchDocument appendAdditionalValuesToDcoument(
253        I_CmsSearchDocument document,
254        CmsObject cms,
255        CmsResource resource,
256        I_CmsExtractionResult extraction,
257        List<CmsProperty> properties,
258        List<CmsProperty> propertiesSearched) {
259
260        return document;
261    }
262
263    /**
264     * Extends the given document by resource category information based on properties.<p>
265     *
266     * @param document the document to extend
267     * @param cms the OpenCms context used for building the search index
268     * @param resource the resource that is indexed
269     * @param extractionResult the plain text extraction result from the resource
270     * @param properties the list of all properties directly attached to the resource (not searched)
271     * @param propertiesSearched the list of all searched properties of the resource
272     *
273     * @return the document extended by resource category information
274     *
275     * @throws CmsException if something goes wrong
276     */
277    protected I_CmsSearchDocument appendCategories(
278        I_CmsSearchDocument document,
279        CmsObject cms,
280        CmsResource resource,
281        I_CmsExtractionResult extractionResult,
282        List<CmsProperty> properties,
283        List<CmsProperty> propertiesSearched)
284    throws CmsException {
285
286        CmsCategoryService categoryService = CmsCategoryService.getInstance();
287        document.addCategoryField(categoryService.readResourceCategories(cms, resource));
288
289        return document;
290    }
291
292    /**
293     * Extends the given document by a field that contains the extracted content blob.<p>
294     *
295     * @param document the document to extend
296     * @param cms the OpenCms context used for building the search index
297     * @param resource the resource that is indexed
298     * @param extractionResult the plain text extraction result from the resource
299     * @param properties the list of all properties directly attached to the resource (not searched)
300     * @param propertiesSearched the list of all searched properties of the resource
301     *
302     * @return the document extended by a field that contains the extracted content blob
303     */
304    protected I_CmsSearchDocument appendContentBlob(
305        I_CmsSearchDocument document,
306        CmsObject cms,
307        CmsResource resource,
308        I_CmsExtractionResult extractionResult,
309        List<CmsProperty> properties,
310        List<CmsProperty> propertiesSearched) {
311
312        if (extractionResult != null) {
313            byte[] data = extractionResult.getBytes();
314            if (data != null) {
315                document.addContentField(data);
316            }
317        }
318
319        return document;
320    }
321
322    /**
323     * Extends the given document by fields for date of creation, content and last modification.<p>
324     *
325     * @param document the document to extend
326     * @param cms the OpenCms context used for building the search index
327     * @param resource the resource that is indexed
328     * @param extractionResult the plain text extraction result from the resource
329     * @param properties the list of all properties directly attached to the resource (not searched)
330     * @param propertiesSearched the list of all searched properties of the resource
331     *
332     * @return the document extended by fields for date of creation, content and last modification
333     */
334    protected I_CmsSearchDocument appendDates(
335        I_CmsSearchDocument document,
336        CmsObject cms,
337        CmsResource resource,
338        I_CmsExtractionResult extractionResult,
339        List<CmsProperty> properties,
340        List<CmsProperty> propertiesSearched) {
341
342        document.addDateField(CmsSearchField.FIELD_DATE_CREATED, resource.getDateCreated(), true);
343        document.addDateField(CmsSearchField.FIELD_DATE_LASTMODIFIED, resource.getDateLastModified(), true);
344        document.addDateField(CmsSearchField.FIELD_DATE_CONTENT, resource.getDateContent(), false);
345
346        return document;
347    }
348
349    /**
350     * Extends the given document by the mappings for the given field.<p>
351     *
352     * @param document the document to extend
353     * @param field the field to create the mappings for
354     * @param cms the OpenCms context used for building the search index
355     * @param resource the resource that is indexed
356     * @param extractionResult the plain text extraction result from the resource
357     * @param properties the list of all properties directly attached to the resource (not searched)
358     * @param propertiesSearched the list of all searched properties of the resource
359     *
360     * @return the document extended by the mappings for the given field
361     */
362    protected I_CmsSearchDocument appendFieldMapping(
363        I_CmsSearchDocument document,
364        CmsSearchField field,
365        CmsObject cms,
366        CmsResource resource,
367        I_CmsExtractionResult extractionResult,
368        List<CmsProperty> properties,
369        List<CmsProperty> propertiesSearched) {
370
371        StringBuffer text = new StringBuffer();
372        for (I_CmsSearchFieldMapping mapping : field.getMappings()) {
373            String mapResult = mapping.getStringValue(cms, resource, extractionResult, properties, propertiesSearched);
374            if (mapResult != null) {
375                if (text.length() > 0) {
376                    text.append('\n');
377                }
378                text.append(mapResult);
379            }
380        }
381        if (text.length() > 0) {
382            document.addSearchField(field, text.toString());
383        }
384
385        return document;
386    }
387
388    /**
389     * Extends the given document by the configured field mappings.<p>
390     *
391     * @param document the document to extend
392     * @param cms the OpenCms context used for building the search index
393     * @param resource the resource that is indexed
394     * @param extractionResult the plain text extraction result from the resource
395     * @param properties the list of all properties directly attached to the resource (not searched)
396     * @param propertiesSearched the list of all searched properties of the resource
397     *
398     * @return the document extended by the configured field mappings
399     */
400    protected I_CmsSearchDocument appendFieldMappings(
401        I_CmsSearchDocument document,
402        CmsObject cms,
403        CmsResource resource,
404        I_CmsExtractionResult extractionResult,
405        List<CmsProperty> properties,
406        List<CmsProperty> propertiesSearched) {
407
408        for (CmsSearchField field : getFields()) {
409            document = appendFieldMapping(
410                document,
411                field,
412                cms,
413                resource,
414                extractionResult,
415                properties,
416                propertiesSearched);
417        }
418
419        return document;
420    }
421
422    /**
423     * Extends the given document by the "size" field.<p>
424     *
425     * @param document the document to extend
426     * @param cms the OpenCms context used for building the search index
427     * @param resource the resource that is indexed
428     * @param extractionResult the plain text extraction result from the resource
429     * @param properties the list of all properties directly attached to the resource (not searched)
430     * @param propertiesSearched the list of all searched properties of the resource
431     *
432     * @return the document extended by the resource locales
433     */
434    protected I_CmsSearchDocument appendFileSize(
435        I_CmsSearchDocument document,
436        CmsObject cms,
437        CmsResource resource,
438        I_CmsExtractionResult extractionResult,
439        List<CmsProperty> properties,
440        List<CmsProperty> propertiesSearched) {
441
442        document.addFileSizeField(resource.getLength());
443
444        return document;
445    }
446
447    /**
448     * Extends the given document by the "res_locales" field.<p>
449     *
450     * @param document the document to extend
451     * @param cms the OpenCms context used for building the search index
452     * @param resource the resource that is indexed
453     * @param extraction the plain text extraction result from the resource
454     * @param properties the list of all properties directly attached to the resource (not searched)
455     * @param propertiesSearched the list of all searched properties of the resource
456     *
457     * @return the document extended by the resource locales
458     */
459    protected I_CmsSearchDocument appendLocales(
460        I_CmsSearchDocument document,
461        CmsObject cms,
462        CmsResource resource,
463        I_CmsExtractionResult extraction,
464        List<CmsProperty> properties,
465        List<CmsProperty> propertiesSearched) {
466
467        return document;
468    }
469
470    /**
471     * Extends the given document by fields for VFS path lookup.<p>
472     *
473     * @param document the document to extend
474     * @param cms the OpenCms context used for building the search index
475     * @param resource the resource that is indexed
476     * @param extractionResult the plain text extraction result from the resource
477     * @param properties the list of all properties directly attached to the resource (not searched)
478     * @param propertiesSearched the list of all searched properties of the resource
479     *
480     * @return the document extended by fields for VFS path lookup
481     */
482    protected I_CmsSearchDocument appendPath(
483        I_CmsSearchDocument document,
484        CmsObject cms,
485        CmsResource resource,
486        I_CmsExtractionResult extractionResult,
487        List<CmsProperty> properties,
488        List<CmsProperty> propertiesSearched) {
489
490        document.addPathField(resource.getRootPath());
491
492        document.addRootPathField(resource.getRootPath());
493
494        return document;
495    }
496
497    /**
498     * Appends all direct properties, that are not empty or white space only to the document.<p>
499     *
500     * @param document the document to extend
501     * @param cms the OpenCms context used for building the search index
502     * @param resource the resource that is indexed
503     * @param extraction the plain text extraction result from the resource
504     * @param properties the list of all properties directly attached to the resource (not searched)
505     * @param propertiesSearched the list of all searched properties of the resource
506     *
507     * @return the document extended by resource category information
508     */
509    protected I_CmsSearchDocument appendProperties(
510        I_CmsSearchDocument document,
511        CmsObject cms,
512        CmsResource resource,
513        I_CmsExtractionResult extraction,
514        List<CmsProperty> properties,
515        List<CmsProperty> propertiesSearched) {
516
517        return document;
518    }
519
520    /**
521     * Extends the given document by a field that contains the resource type name.<p>
522     *
523     * @param document the document to extend
524     * @param cms the OpenCms context used for building the search index
525     * @param resource the resource that is indexed
526     * @param extractionResult the plain text extraction result from the resource
527     * @param properties the list of all properties directly attached to the resource (not searched)
528     * @param propertiesSearched the list of all searched properties of the resource
529     *
530     * @return the document extended by a field that contains the resource type name
531     *
532     * @throws CmsLoaderException in case of errors identifying the resource type name
533     */
534    protected I_CmsSearchDocument appendType(
535        I_CmsSearchDocument document,
536        CmsObject cms,
537        CmsResource resource,
538        I_CmsExtractionResult extractionResult,
539        List<CmsProperty> properties,
540        List<CmsProperty> propertiesSearched)
541    throws CmsLoaderException {
542
543        // add the resource type to the document
544        I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
545        String typeName = "VFS";
546        if (type != null) {
547            typeName = type.getTypeName();
548        }
549        document.addTypeField(typeName);
550
551        // add the file name suffix to the document
552        String resName = CmsResource.getName(resource.getRootPath());
553        int index = resName.lastIndexOf('.');
554        if ((index != -1) && (resName.length() > index)) {
555            document.addSuffixField(resName.substring(index + 1));
556        }
557        return document;
558    }
559
560}