001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (https://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: https://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: https://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search;
033
034import org.opencms.configuration.CmsParameterConfiguration;
035import org.opencms.file.CmsObject;
036import org.opencms.file.CmsResource;
037import org.opencms.file.CmsResourceFilter;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsIllegalArgumentException;
040import org.opencms.main.CmsLog;
041import org.opencms.main.OpenCms;
042import org.opencms.report.I_CmsReport;
043import org.opencms.search.documents.I_CmsTermHighlighter;
044import org.opencms.search.extractors.CmsExtractionResult;
045import org.opencms.search.extractors.I_CmsExtractionResult;
046import org.opencms.search.fields.CmsLuceneFieldConfiguration;
047import org.opencms.search.fields.CmsSearchField;
048import org.opencms.search.fields.CmsSearchFieldConfiguration;
049import org.opencms.util.CmsFileUtil;
050import org.opencms.util.CmsStringUtil;
051
052import java.io.File;
053import java.io.IOException;
054import java.nio.file.Paths;
055import java.text.ParseException;
056import java.util.ArrayList;
057import java.util.Calendar;
058import java.util.Collections;
059import java.util.Date;
060import java.util.HashMap;
061import java.util.List;
062import java.util.Locale;
063import java.util.Map;
064import java.util.Set;
065
066import org.apache.commons.logging.Log;
067import org.apache.lucene.analysis.Analyzer;
068import org.apache.lucene.document.DateTools;
069import org.apache.lucene.document.Document;
070import org.apache.lucene.index.DirectoryReader;
071import org.apache.lucene.index.FieldInfo;
072import org.apache.lucene.index.IndexReader;
073import org.apache.lucene.index.IndexWriter;
074import org.apache.lucene.index.IndexWriterConfig;
075import org.apache.lucene.index.StoredFieldVisitor;
076import org.apache.lucene.index.Term;
077import org.apache.lucene.queryparser.classic.QueryParser;
078import org.apache.lucene.search.BooleanClause;
079import org.apache.lucene.search.BooleanClause.Occur;
080import org.apache.lucene.search.BooleanQuery;
081import org.apache.lucene.search.IndexSearcher;
082import org.apache.lucene.search.MatchAllDocsQuery;
083import org.apache.lucene.search.MultiTermQuery;
084import org.apache.lucene.search.Query;
085import org.apache.lucene.search.ScoreMode;
086import org.apache.lucene.search.Sort;
087import org.apache.lucene.search.SortField;
088import org.apache.lucene.search.TermQuery;
089import org.apache.lucene.search.TopDocs;
090import org.apache.lucene.search.similarities.Similarity;
091import org.apache.lucene.store.Directory;
092import org.apache.lucene.store.FSDirectory;
093import org.apache.lucene.store.IOContext;
094import org.apache.solr.uninverting.UninvertingReader;
095import org.apache.solr.uninverting.UninvertingReader.Type;
096
097/**
098 * Abstract search index implementation.<p>
099 */
100public class CmsSearchIndex extends A_CmsSearchIndex {
101
102    /** A constant for the full qualified name of the CmsSearchIndex class. */
103    public static final String A_PARAM_PREFIX = "org.opencms.search.CmsSearchIndex";
104
105    /** Constant for additional parameter to enable optimized full index regeneration (default: false). */
106    public static final String BACKUP_REINDEXING = A_PARAM_PREFIX + ".useBackupReindexing";
107
108    /** Look table to quickly zero-pad days / months in date Strings. */
109    public static final String[] DATES = new String[] {
110        "00",
111        "01",
112        "02",
113        "03",
114        "04",
115        "05",
116        "06",
117        "07",
118        "08",
119        "09",
120        "10",
121        "11",
122        "12",
123        "13",
124        "14",
125        "15",
126        "16",
127        "17",
128        "18",
129        "19",
130        "20",
131        "21",
132        "22",
133        "23",
134        "24",
135        "25",
136        "26",
137        "27",
138        "28",
139        "29",
140        "30",
141        "31"};
142
143    /** Constant for a field list that contains the "meta" field as well as the "content" field. */
144    public static final String[] DOC_META_FIELDS = new String[] {
145        CmsSearchField.FIELD_META,
146        CmsSearchField.FIELD_CONTENT};
147
148    /** Constant for additional parameter to enable excerpt creation (default: true). */
149    public static final String EXCERPT = A_PARAM_PREFIX + ".createExcerpt";
150
151    /** Constant for additional parameter for index content extraction. */
152    public static final String EXTRACT_CONTENT = A_PARAM_PREFIX + ".extractContent";
153
154    /** Constant for additional parameter to enable/disable language detection (default: false). */
155    public static final String IGNORE_EXPIRATION = A_PARAM_PREFIX + ".ignoreExpiration";
156
157    /** Constant for additional parameter to enable/disable language detection (default: false). */
158    public static final String LANGUAGEDETECTION = "search.solr.useLanguageDetection";
159
160    /** Constant for additional parameter for the Lucene index setting. */
161    public static final String LUCENE_AUTO_COMMIT = "lucene.AutoCommit";
162
163    /** Constant for additional parameter for the Lucene index setting. */
164    public static final String LUCENE_RAM_BUFFER_SIZE_MB = "lucene.RAMBufferSizeMB";
165
166    /** Constant for additional parameter for controlling how many hits are loaded at maximum (default: 1000). */
167    public static final String MAX_HITS = A_PARAM_PREFIX + ".maxHits";
168
169    /** Indicates how many hits are loaded at maximum by default. */
170    public static final int MAX_HITS_DEFAULT = 5000;
171
172    /** Constant for years max range span in document search. */
173    public static final int MAX_YEAR_RANGE = 25;
174
175    /** Constant for additional parameter to enable permission checks (default: true). */
176    public static final String PERMISSIONS = A_PARAM_PREFIX + ".checkPermissions";
177
178    /** Constant for additional parameter to set the thread priority during search. */
179    public static final String PRIORITY = A_PARAM_PREFIX + ".priority";
180
181    /** Constant for additional parameter to enable time range checks (default: true). */
182    public static final String TIME_RANGE = A_PARAM_PREFIX + ".checkTimeRange";
183
184    /**
185     * A stored field visitor, that does not return the large fields: "content" and "contentblob".<p>
186     */
187    protected static final StoredFieldVisitor VISITOR = new StoredFieldVisitor() {
188
189        /**
190         * @see org.apache.lucene.index.StoredFieldVisitor#needsField(org.apache.lucene.index.FieldInfo)
191         */
192        @Override
193        public Status needsField(FieldInfo fieldInfo) {
194
195            return !CmsSearchFieldConfiguration.LAZY_FIELDS.contains(fieldInfo.name) ? Status.YES : Status.NO;
196        }
197    };
198
199    /** The log object for this class. */
200    private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class);
201
202    /** The serial version id. */
203    private static final long serialVersionUID = 8461682478204452718L;
204
205    /** The configured Lucene analyzer used for this index. */
206    private transient Analyzer m_analyzer;
207
208    /** Indicates if backup re-indexing is used by this index. */
209    private boolean m_backupReindexing;
210
211    /** The permission check mode for this index. */
212    private boolean m_checkPermissions;
213
214    /** The time range check mode for this index. */
215    private boolean m_checkTimeRange;
216
217    /** The excerpt mode for this index. */
218    private boolean m_createExcerpt;
219
220    /** Map of display query filters to use. */
221    private transient Map<String, Query> m_displayFilters;
222
223    /**
224     * Signals whether expiration dates should be ignored when checking permissions or not.<p>
225     * @see #IGNORE_EXPIRATION
226     */
227    private boolean m_ignoreExpiration;
228
229    /** The Lucene index searcher to use. */
230    private transient IndexSearcher m_indexSearcher;
231
232    /** The Lucene index RAM buffer size, see {@link IndexWriterConfig#setRAMBufferSizeMB(double)}. */
233    private Double m_luceneRAMBufferSizeMB;
234
235    /** Indicates how many hits are loaded at maximum. */
236    private int m_maxHits;
237
238    /** The thread priority for a search. */
239    private int m_priority;
240
241    /** Controls if a resource requires view permission to be displayed in the result list. */
242    private boolean m_requireViewPermission;
243
244    /** The cms specific Similarity implementation. */
245    private final transient Similarity m_sim = new CmsSearchSimilarity();
246
247    /**
248     * Default constructor only intended to be used by the XML configuration. <p>
249     *
250     * It is recommended to use the constructor <code>{@link #CmsSearchIndex(String)}</code>
251     * as it enforces the mandatory name argument. <p>
252     */
253    public CmsSearchIndex() {
254
255        super();
256        m_checkPermissions = true;
257        m_priority = -1;
258        m_createExcerpt = true;
259        m_maxHits = MAX_HITS_DEFAULT;
260        m_checkTimeRange = false;
261    }
262
263    /**
264     * Creates a new CmsSearchIndex with the given name.<p>
265     *
266     * @param name the system-wide unique name for the search index
267     *
268     * @throws CmsIllegalArgumentException if the given name is null, empty or already taken by another search index
269     */
270    public CmsSearchIndex(String name)
271    throws CmsIllegalArgumentException {
272
273        this();
274        setName(name);
275    }
276
277    /**
278     * Generates a list of date terms for the optimized date range search with "daily" granularity level.<p>
279     *
280     * How this works:<ul>
281     * <li>For each document, terms are added for the year, the month and the day the document
282     * was modified or created) in. So for example if a document is modified at February 02, 2009,
283     * then the following terms are stored for this document:
284     * "20090202", "200902" and "2009".</li>
285     * <li>In case a date range search is done, then all possible matches for the
286     * provided rage are created as search terms and matched with the document terms.</li>
287     * <li>Consider the following use case: You want to find out if a resource has been changed
288     * in the time between November 29, 2007 and March 01, 2009.
289     * One term to match is simply "2008" because if a document
290     * was modified in 2008, then it is clearly in the date range.
291     * Other terms are "200712", "200901" and "200902", because all documents
292     * modified in these months are also a certain matches.
293     * Finally we need to add terms for "20071129", "20071130" and "20090301" to match the days in the
294     * starting and final month.</li>
295     * </ul>
296     *
297     * @param startDate start date of the range to search in
298     * @param endDate end date of the range to search in
299     *
300     * @return a list of date terms for the optimized date range search
301     */
302    public static List<String> getDateRangeSpan(long startDate, long endDate) {
303
304        if (startDate > endDate) {
305            // switch so that the end is always before the start
306            long temp = endDate;
307            endDate = startDate;
308            startDate = temp;
309        }
310
311        List<String> result = new ArrayList<String>(100);
312
313        // initialize calendars from the time value
314        Calendar calStart = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone());
315        Calendar calEnd = Calendar.getInstance(calStart.getTimeZone());
316        calStart.setTimeInMillis(startDate);
317        calEnd.setTimeInMillis(endDate);
318
319        // get the required info to build the date range from the calendars
320        int startDay = calStart.get(Calendar.DAY_OF_MONTH);
321        int endDay = calEnd.get(Calendar.DAY_OF_MONTH);
322        int maxDayInStartMonth = calStart.getActualMaximum(Calendar.DAY_OF_MONTH);
323        int startMonth = calStart.get(Calendar.MONTH) + 1;
324        int endMonth = calEnd.get(Calendar.MONTH) + 1;
325        int startYear = calStart.get(Calendar.YEAR);
326        int endYear = calEnd.get(Calendar.YEAR);
327
328        // first add all full years in the date range
329        result.addAll(getYearSpan(startYear + 1, endYear - 1));
330
331        if (startYear != endYear) {
332            // different year, different month
333            result.addAll(getMonthSpan(startMonth + 1, 12, startYear));
334            result.addAll(getMonthSpan(1, endMonth - 1, endYear));
335            result.addAll(getDaySpan(startDay, maxDayInStartMonth, startMonth, startYear));
336            result.addAll(getDaySpan(1, endDay, endMonth, endYear));
337        } else {
338            if (startMonth != endMonth) {
339                // same year, different month
340                result.addAll(getMonthSpan(startMonth + 1, endMonth - 1, startYear));
341                result.addAll(getDaySpan(startDay, maxDayInStartMonth, startMonth, startYear));
342                result.addAll(getDaySpan(1, endDay, endMonth, endYear));
343            } else {
344                // same year, same month
345                result.addAll(getDaySpan(startDay, endDay, endMonth, endYear));
346            }
347        }
348
349        // sort the result, makes the range better readable in the debugger
350        Collections.sort(result);
351        return result;
352    }
353
354    /**
355     * Calculate a span of days in the given year and month for the optimized date range search.<p>
356     *
357     * The result will contain dates formatted like "yyyyMMDD", for example "20080131".<p>
358     *
359     * @param startDay the start day
360     * @param endDay the end day
361     * @param month the month
362     * @param year the year
363     *
364     * @return a span of days in the given year and month for the optimized date range search
365     */
366    private static List<String> getDaySpan(int startDay, int endDay, int month, int year) {
367
368        List<String> result = new ArrayList<String>();
369        String yearMonthStr = String.valueOf(year) + DATES[month];
370        for (int i = startDay; i <= endDay; i++) {
371            String dateStr = yearMonthStr + DATES[i];
372            result.add(dateStr);
373        }
374        return result;
375    }
376
377    /**
378     * Calculate a span of months in the given year for the optimized date range search.<p>
379     *
380     * The result will contain dates formatted like "yyyyMM", for example "200801".<p>
381     *
382     * @param startMonth the start month
383     * @param endMonth the end month
384     * @param year the year
385     *
386     * @return a span of months in the given year for the optimized date range search
387     */
388    private static List<String> getMonthSpan(int startMonth, int endMonth, int year) {
389
390        List<String> result = new ArrayList<String>();
391        String yearStr = String.valueOf(year);
392        for (int i = startMonth; i <= endMonth; i++) {
393            String dateStr = yearStr + DATES[i];
394            result.add(dateStr);
395        }
396        return result;
397    }
398
399    /**
400     * Calculate a span of years for the optimized date range search.<p>
401     *
402     * The result will contain dates formatted like "yyyy", for example "2008".<p>
403     *
404     * @param startYear the start year
405     * @param endYear the end year
406     *
407     * @return a span of years for the optimized date range search
408     */
409    private static List<String> getYearSpan(int startYear, int endYear) {
410
411        List<String> result = new ArrayList<String>();
412        for (int i = startYear; i <= endYear; i++) {
413            String dateStr = String.valueOf(i);
414            result.add(dateStr);
415        }
416        return result;
417    }
418
419    /**
420     * Adds a parameter.<p>
421     *
422     * @param key the key/name of the parameter
423     * @param value the value of the parameter
424     *
425     */
426    @Override
427    public void addConfigurationParameter(String key, String value) {
428
429        if (PERMISSIONS.equals(key)) {
430            m_checkPermissions = Boolean.valueOf(value).booleanValue();
431        } else if (EXTRACT_CONTENT.equals(key)) {
432            setExtractContent(Boolean.valueOf(value).booleanValue());
433        } else if (BACKUP_REINDEXING.equals(key)) {
434            m_backupReindexing = Boolean.valueOf(value).booleanValue();
435        } else if (LANGUAGEDETECTION.equals(key)) {
436            setLanguageDetection(Boolean.valueOf(value).booleanValue());
437        } else if (IGNORE_EXPIRATION.equals(key)) {
438            m_ignoreExpiration = Boolean.valueOf(value).booleanValue();
439        } else if (PRIORITY.equals(key)) {
440            m_priority = Integer.parseInt(value);
441            if (m_priority < Thread.MIN_PRIORITY) {
442                m_priority = Thread.MIN_PRIORITY;
443                LOG.error(
444                    Messages.get().getBundle().key(
445                        Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2,
446                        value,
447                        Integer.valueOf(Thread.MIN_PRIORITY)));
448
449            } else if (m_priority > Thread.MAX_PRIORITY) {
450                m_priority = Thread.MAX_PRIORITY;
451                LOG.debug(
452                    Messages.get().getBundle().key(
453                        Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2,
454                        value,
455                        Integer.valueOf(Thread.MAX_PRIORITY)));
456            }
457        }
458
459        if (MAX_HITS.equals(key)) {
460            try {
461                m_maxHits = Integer.parseInt(value);
462            } catch (NumberFormatException e) {
463                LOG.error(Messages.get().getBundle().key(Messages.LOG_INVALID_PARAM_3, value, key, getName()));
464            }
465            if (m_maxHits < (MAX_HITS_DEFAULT / 100)) {
466                m_maxHits = MAX_HITS_DEFAULT;
467                LOG.error(Messages.get().getBundle().key(Messages.LOG_INVALID_PARAM_3, value, key, getName()));
468            }
469        } else if (TIME_RANGE.equals(key)) {
470            m_checkTimeRange = Boolean.valueOf(value).booleanValue();
471        } else if (CmsSearchIndex.EXCERPT.equals(key)) {
472            m_createExcerpt = Boolean.valueOf(value).booleanValue();
473
474        } else if (LUCENE_RAM_BUFFER_SIZE_MB.equals(key)) {
475            try {
476                m_luceneRAMBufferSizeMB = Double.valueOf(value);
477            } catch (NumberFormatException e) {
478                LOG.error(Messages.get().getBundle().key(Messages.LOG_INVALID_PARAM_3, value, key, getName()));
479            }
480        }
481    }
482
483    /**
484     * Creates an empty document that can be used by this search field configuration.<p>
485     *
486     * @param resource the resource to create the document for
487     *
488     * @return a new and empty document
489     */
490    public I_CmsSearchDocument createEmptyDocument(CmsResource resource) {
491
492        return new CmsLuceneDocument(new Document());
493    }
494
495    /**
496     * Returns the Lucene analyzer used for this index.<p>
497     *
498     * @return the Lucene analyzer used for this index
499     */
500    public Analyzer getAnalyzer() {
501
502        return m_analyzer;
503    }
504
505    /**
506     * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration()
507     */
508    @Override
509    public CmsParameterConfiguration getConfiguration() {
510
511        CmsParameterConfiguration result = new CmsParameterConfiguration();
512        if (getPriority() > 0) {
513            result.put(PRIORITY, String.valueOf(m_priority));
514        }
515        if (!isExtractingContent()) {
516            result.put(EXTRACT_CONTENT, String.valueOf(isExtractingContent()));
517        }
518        if (!isCheckingPermissions()) {
519            result.put(PERMISSIONS, String.valueOf(m_checkPermissions));
520        }
521        if (isBackupReindexing()) {
522            result.put(BACKUP_REINDEXING, String.valueOf(m_backupReindexing));
523        }
524        if (isLanguageDetection()) {
525            result.put(LANGUAGEDETECTION, String.valueOf(isLanguageDetection()));
526        }
527        if (getMaxHits() != MAX_HITS_DEFAULT) {
528            result.put(MAX_HITS, String.valueOf(getMaxHits()));
529        }
530        if (!isCreatingExcerpt()) {
531            result.put(EXCERPT, String.valueOf(m_createExcerpt));
532        }
533        if (m_luceneRAMBufferSizeMB != null) {
534            result.put(LUCENE_RAM_BUFFER_SIZE_MB, String.valueOf(m_luceneRAMBufferSizeMB));
535        }
536        // always write time range check parameter because of logic change in OpenCms 8.0
537        result.put(TIME_RANGE, String.valueOf(m_checkTimeRange));
538        return result;
539    }
540
541    /**
542     * @see org.opencms.search.I_CmsSearchIndex#getContentIfUnchanged(org.opencms.file.CmsResource)
543     */
544    @Override
545    public I_CmsExtractionResult getContentIfUnchanged(CmsResource resource) {
546
547        // compare "date of last modification of content" from Lucene index and OpenCms VFS
548        // if this is identical, then the data from the Lucene index can be re-used
549        I_CmsSearchDocument oldDoc = getDocument(CmsSearchField.FIELD_PATH, resource.getRootPath());
550        // first check if the document is already in the index
551        if ((oldDoc != null) && (oldDoc.getFieldValueAsDate(CmsSearchField.FIELD_DATE_CONTENT) != null)) {
552            long contentDateIndex = oldDoc.getFieldValueAsDate(CmsSearchField.FIELD_DATE_CONTENT).getTime();
553            // now compare the date with the date stored in the resource
554            // we truncate to seconds, since the index stores no milliseconds
555            // and it seems practically irrelevant that a content is updated twice in a second.
556            if ((contentDateIndex / 1000L) == (resource.getDateContent() / 1000L)) {
557                // extract stored content blob from index
558                return CmsExtractionResult.fromBytes(oldDoc.getContentBlob());
559            }
560        }
561        return null;
562    }
563
564    /**
565     * Returns a document by document ID.<p>
566     *
567     * @param docId the id to get the document for
568     *
569     * @return the CMS specific document
570     */
571    public I_CmsSearchDocument getDocument(int docId) {
572
573        try {
574            IndexSearcher searcher = getSearcher();
575            return new CmsLuceneDocument(searcher.storedFields().document(docId));
576        } catch (IOException e) {
577            // ignore, return null and assume document was not found
578        }
579        return null;
580    }
581
582    /**
583     * Returns the Lucene document with the given root path from the index.<p>
584     *
585     * @param rootPath the root path of the document to get
586     *
587     * @return the Lucene document with the given root path from the index
588     *
589     * @deprecated Use {@link #getDocument(String, String)} instead and provide {@link org.opencms.search.fields.CmsLuceneField#FIELD_PATH} as field to search in
590     */
591    @Deprecated
592    public Document getDocument(String rootPath) {
593
594        if (getDocument(CmsSearchField.FIELD_PATH, rootPath) != null) {
595            return (Document)getDocument(CmsSearchField.FIELD_PATH, rootPath).getDocument();
596        }
597        return null;
598    }
599
600    /**
601     * Returns the first document where the given term matches the selected index field.<p>
602     *
603     * Use this method to search for documents which have unique field values, like a unique id.<p>
604     *
605     * @param field the field to search in
606     * @param term the term to search for
607     *
608     * @return the first document where the given term matches the selected index field
609     */
610    public I_CmsSearchDocument getDocument(String field, String term) {
611
612        Document result = null;
613        IndexSearcher searcher = getSearcher();
614        if (searcher != null) {
615            // search for an exact match on the selected field
616            Term resultTerm = new Term(field, term);
617            try {
618                TopDocs hits = searcher.search(new TermQuery(resultTerm), 1);
619                if (hits.scoreDocs.length > 0) {
620                    result = searcher.storedFields().document(hits.scoreDocs[0].doc);
621                }
622            } catch (IOException e) {
623                // ignore, return null and assume document was not found
624            }
625        }
626        if (result != null) {
627            return new CmsLuceneDocument(result);
628        }
629        return null;
630    }
631
632    /**
633     * Returns the language locale for the given resource in this index.<p>
634     *
635     * @param cms the current OpenCms user context
636     * @param resource the resource to check
637     * @param availableLocales a list of locales supported by the resource
638     *
639     * @return the language locale for the given resource in this index
640     */
641    @Override
642    public Locale getLocaleForResource(CmsObject cms, CmsResource resource, List<Locale> availableLocales) {
643
644        Locale result;
645        List<Locale> defaultLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource);
646        List<Locale> locales = availableLocales;
647        if ((locales == null) || (locales.size() == 0)) {
648            locales = defaultLocales;
649        }
650        result = OpenCms.getLocaleManager().getBestMatchingLocale(getLocale(), defaultLocales, locales);
651        return result;
652    }
653
654    /**
655    * Returns the language locale of the index as a String.<p>
656    *
657    * @return the language locale of the index as a String
658    *
659    * @see #getLocale()
660    */
661    public String getLocaleString() {
662
663        return getLocale().toString();
664    }
665
666    /**
667     * Indicates the number of how many hits are loaded at maximum.<p>
668     *
669     * The number of maximum documents to load from the index
670     * must be specified. The default of this setting is {@link CmsSearchIndex#MAX_HITS_DEFAULT} (5000).
671     * This means that at maximum 5000 results are returned from the index.
672     * Please note that this number may be reduced further because of OpenCms read permissions
673     * or per-user file visibility settings not controlled in the index.<p>
674     *
675     * @return the number of how many hits are loaded at maximum
676     *
677     * @since 7.5.1
678     */
679    public int getMaxHits() {
680
681        return m_maxHits;
682    }
683
684    /**
685     * Returns the path where this index stores it's data in the "real" file system.<p>
686     *
687     * @return the path where this index stores it's data in the "real" file system
688     */
689    @Override
690    public String getPath() {
691
692        if (super.getPath() == null) {
693            setPath(generateIndexDirectory());
694        }
695        return super.getPath();
696    }
697
698    /**
699     * Returns the Thread priority for this search index.<p>
700     *
701     * @return the Thread priority for this search index
702     */
703    public int getPriority() {
704
705        return m_priority;
706    }
707
708    /**
709     * Returns the Lucene index searcher used for this search index.<p>
710     *
711     * @return the Lucene index searcher used for this search index
712     */
713    public IndexSearcher getSearcher() {
714
715        return m_indexSearcher;
716    }
717
718    /**
719     * @see org.opencms.search.A_CmsSearchIndex#initialize()
720     */
721    @Override
722    public void initialize() throws CmsSearchException {
723
724        super.initialize();
725
726        // get the configured analyzer and apply the the field configuration analyzer wrapper
727        @SuppressWarnings("resource")
728        Analyzer baseAnalyzer = OpenCms.getSearchManager().getAnalyzer(getLocale());
729
730        if (getFieldConfiguration() instanceof CmsLuceneFieldConfiguration) {
731            CmsLuceneFieldConfiguration fc = (CmsLuceneFieldConfiguration)getFieldConfiguration();
732            setAnalyzer(fc.getAnalyzer(baseAnalyzer));
733        }
734    }
735
736    /**
737     * Returns <code>true</code> if backup re-indexing is done by this index.<p>
738     *
739     * This is an optimization method by which the old extracted content is
740     * reused in order to save performance when re-indexing.<p>
741     *
742     * @return  <code>true</code> if backup re-indexing is done by this index
743     *
744     * @since 7.5.1
745     */
746    public boolean isBackupReindexing() {
747
748        return m_backupReindexing;
749    }
750
751    /**
752     * Returns <code>true</code> if permissions are checked for search results by this index.<p>
753     *
754     * If permission checks are not required, they can be turned off in the index search configuration parameters
755     * in <code>opencms-search.xml</code>. Not checking permissions will improve performance.<p>
756     *
757     * This is can be of use in scenarios when you know that all search results are always readable,
758     * which is usually true for public websites that do not have personalized accounts.<p>
759     *
760     * Please note that even if a result is returned where the current user has no read permissions,
761     * the user can not actually access this document. It will only appear in the search result list,
762     * but if the user clicks the link to open the document he will get an error.<p>
763     *
764     *
765     * @return <code>true</code> if permissions are checked for search results by this index
766     */
767    public boolean isCheckingPermissions() {
768
769        return m_checkPermissions;
770    }
771
772    /**
773     * Returns <code>true</code> if the document time range is checked with a granularity level of seconds
774     * for search results by this index.<p>
775     *
776     * Since OpenCms 8.0, time range checks are always done if {@link CmsSearchParameters#setMinDateLastModified(long)}
777     * or any of the corresponding methods are used.
778     * This is done very efficiently using optimized Lucene filers.
779     * However, the granularity of these checks are done only on a daily
780     * basis, which means that you can only find "changes made yesterday" but not "changes made last hour".
781     * For normal limitation of search results, a daily granularity should be enough.<p>
782     *
783     * If time range checks with a granularity level of seconds are required,
784     * they can be turned on in the index search configuration parameters
785     * in <code>opencms-search.xml</code>.
786     * Not checking the time range  with a granularity level of seconds will improve performance.<p>
787     *
788     * By default the granularity level of seconds is turned off since OpenCms 8.0<p>
789     *
790     * @return <code>true</code> if the document time range is checked  with a granularity level of seconds for search results by this index
791     */
792    public boolean isCheckingTimeRange() {
793
794        return m_checkTimeRange;
795    }
796
797    /**
798     * Returns the checkPermissions.<p>
799     *
800     * @return the checkPermissions
801     */
802    public boolean isCheckPermissions() {
803
804        return m_checkPermissions;
805    }
806
807    /**
808     * Returns <code>true</code> if an excerpt is generated by this index.<p>
809     *
810     * If no except is required, generation can be turned off in the index search configuration parameters
811     * in <code>opencms-search.xml</code>. Not generating an excerpt will improve performance.<p>
812     *
813     * @return <code>true</code> if an excerpt is generated by this index
814     */
815    public boolean isCreatingExcerpt() {
816
817        return m_createExcerpt;
818    }
819
820    /**
821     * Returns the ignoreExpiration.<p>
822     *
823     * @return the ignoreExpiration
824     */
825    public boolean isIgnoreExpiration() {
826
827        return m_ignoreExpiration;
828    }
829
830    /**
831     * @see org.opencms.search.A_CmsSearchIndex#isInitialized()
832     */
833    @Override
834    public boolean isInitialized() {
835
836        return super.isInitialized() && (null != getPath());
837    }
838
839    /**
840     * Returns <code>true</code> if a resource requires read permission to be included in the result list.<p>
841     *
842     * @return <code>true</code> if a resource requires read permission to be included in the result list
843     */
844    public boolean isRequireViewPermission() {
845
846        return m_requireViewPermission;
847    }
848
849    /**
850     * @see org.opencms.search.A_CmsSearchIndex#onIndexChanged(boolean)
851     */
852    @Override
853    public void onIndexChanged(boolean force) {
854
855        if (force) {
856            indexSearcherOpen(getPath());
857        } else {
858            indexSearcherUpdate();
859        }
860    }
861
862    /**
863     * Performs a search on the index within the given fields.<p>
864     *
865     * The result is returned as List with entries of type I_CmsSearchResult.<p>
866     *
867     * @param cms the current user's Cms object
868     * @param params the parameters to use for the search
869     *
870     * @return the List of results found or an empty list
871     *
872     * @throws CmsSearchException if something goes wrong
873     */
874    public CmsSearchResultList search(CmsObject cms, CmsSearchParameters params) throws CmsSearchException {
875
876        long timeTotal = -System.currentTimeMillis();
877        long timeLucene;
878        long timeResultProcessing;
879
880        if (LOG.isDebugEnabled()) {
881            LOG.debug(Messages.get().getBundle().key(Messages.LOG_SEARCH_PARAMS_2, params, getName()));
882        }
883
884        // the hits found during the search
885        TopDocs hits;
886
887        // storage for the results found
888        CmsSearchResultList searchResults = new CmsSearchResultList();
889
890        int previousPriority = Thread.currentThread().getPriority();
891
892        try {
893            // copy the user OpenCms context
894            CmsObject searchCms = OpenCms.initCmsObject(cms);
895
896            if (getPriority() > 0) {
897                // change thread priority in order to reduce search impact on overall system performance
898                Thread.currentThread().setPriority(getPriority());
899            }
900
901            // change the project
902            searchCms.getRequestContext().setCurrentProject(searchCms.readProject(getProject()));
903
904            timeLucene = -System.currentTimeMillis();
905
906            // several search options are searched using filters
907            BooleanQuery.Builder builder = new BooleanQuery.Builder();
908            // append root path filter
909            builder = appendPathFilter(searchCms, builder, params.getRoots());
910            // append category filter
911            builder = appendCategoryFilter(searchCms, builder, params.getCategories());
912            // append resource type filter
913            builder = appendResourceTypeFilter(searchCms, builder, params.getResourceTypes());
914
915            // append date last modified filter
916            builder = appendDateLastModifiedFilter(
917                builder,
918                params.getMinDateLastModified(),
919                params.getMaxDateLastModified());
920            // append date created filter
921            builder = appendDateCreatedFilter(builder, params.getMinDateCreated(), params.getMaxDateCreated());
922
923            // the search query to use, will be constructed in the next lines
924            Query query = null;
925            // store separate fields query for excerpt highlighting
926            Query fieldsQuery = null;
927
928            // get an index searcher that is certainly up to date
929            indexSearcherUpdate();
930            IndexSearcher searcher = getSearcher();
931
932            if (!params.isIgnoreQuery()) {
933                // since OpenCms 8 the query can be empty in which case only filters are used for the result
934                if (params.getFieldQueries() != null) {
935                    // each field has an individual query
936                    BooleanQuery.Builder mustOccur = null;
937                    BooleanQuery.Builder shouldOccur = null;
938                    for (CmsSearchParameters.CmsSearchFieldQuery fq : params.getFieldQueries()) {
939                        // add one sub-query for each defined field
940                        QueryParser p = new QueryParser(fq.getFieldName(), getAnalyzer());
941                        // first generate the combined keyword query
942                        Query keywordQuery = null;
943                        if (fq.getSearchTerms().size() == 1) {
944                            // this is just a single size keyword list
945                            keywordQuery = p.parse(fq.getSearchTerms().get(0));
946                        } else {
947                            // multiple size keyword list
948                            BooleanQuery.Builder keywordListQuery = new BooleanQuery.Builder();
949                            for (String keyword : fq.getSearchTerms()) {
950                                keywordListQuery.add(p.parse(keyword), fq.getTermOccur());
951                            }
952                            keywordQuery = keywordListQuery.build();
953                        }
954                        if (BooleanClause.Occur.SHOULD.equals(fq.getOccur())) {
955                            if (shouldOccur == null) {
956                                shouldOccur = new BooleanQuery.Builder();
957                            }
958                            shouldOccur.add(keywordQuery, fq.getOccur());
959                        } else {
960                            if (mustOccur == null) {
961                                mustOccur = new BooleanQuery.Builder();
962                            }
963                            mustOccur.add(keywordQuery, fq.getOccur());
964                        }
965                    }
966                    BooleanQuery.Builder booleanFieldsQuery = new BooleanQuery.Builder();
967                    if (mustOccur != null) {
968                        booleanFieldsQuery.add(mustOccur.build(), BooleanClause.Occur.MUST);
969                    }
970                    if (shouldOccur != null) {
971                        booleanFieldsQuery.add(shouldOccur.build(), BooleanClause.Occur.MUST);
972                    }
973                    fieldsQuery = searcher.rewrite(booleanFieldsQuery.build());
974                } else if ((params.getFields() != null) && (params.getFields().size() > 0)) {
975                    // no individual field queries have been defined, so use one query for all fields
976                    BooleanQuery.Builder booleanFieldsQuery = new BooleanQuery.Builder();
977                    // this is a "regular" query over one or more fields
978                    // add one sub-query for each of the selected fields, e.g. "content", "title" etc.
979                    for (int i = 0; i < params.getFields().size(); i++) {
980                        QueryParser p = new QueryParser(params.getFields().get(i), getAnalyzer());
981                        p.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
982                        booleanFieldsQuery.add(p.parse(params.getQuery()), BooleanClause.Occur.SHOULD);
983                    }
984                    fieldsQuery = searcher.rewrite(booleanFieldsQuery.build());
985                } else {
986                    // if no fields are provided, just use the "content" field by default
987                    QueryParser p = new QueryParser(CmsSearchField.FIELD_CONTENT, getAnalyzer());
988                    fieldsQuery = searcher.rewrite(p.parse(params.getQuery()));
989                }
990
991                // finally set the main query to the fields query
992                // please note that we still need both variables in case the query is a MatchAllDocsQuery - see below
993                query = fieldsQuery;
994            }
995
996            if (LOG.isDebugEnabled()) {
997                LOG.debug(Messages.get().getBundle().key(Messages.LOG_BASE_QUERY_1, query));
998            }
999
1000            if (query == null) {
1001                // if no text query is set, then we match all documents
1002                query = new MatchAllDocsQuery();
1003            }
1004
1005            // build the final query
1006            final BooleanQuery.Builder finalQueryBuilder = new BooleanQuery.Builder();
1007            finalQueryBuilder.add(query, BooleanClause.Occur.MUST);
1008            finalQueryBuilder.add(builder.build(), BooleanClause.Occur.FILTER);
1009            final BooleanQuery finalQuery = finalQueryBuilder.build();
1010
1011            // collect the categories
1012            CmsSearchCategoryCollector categoryCollector;
1013            if (params.isCalculateCategories()) {
1014                // USE THIS OPTION WITH CAUTION
1015                // this may slow down searched by an order of magnitude
1016                categoryCollector = new CmsSearchCategoryCollector(searcher);
1017                // perform a first search to collect the categories
1018                searcher.search(finalQuery, categoryCollector);
1019                // store the result
1020                searchResults.setCategories(categoryCollector.getCategoryCountResult());
1021            }
1022
1023            // get maxScore first, since Lucene 8, it's not computed automatically anymore
1024            TopDocs scoreHits = searcher.search(query, 1);
1025            float maxScore = scoreHits.scoreDocs.length == 0 ? Float.NaN : scoreHits.scoreDocs[0].score;
1026            // perform the search operation
1027            if ((params.getSort() == null) || (params.getSort() == CmsSearchParameters.SORT_DEFAULT)) {
1028                // apparently scoring is always enabled by Lucene if no sort order is provided
1029                hits = searcher.search(finalQuery, getMaxHits());
1030            } else {
1031                // if  a sort order is provided, we must check if scoring must be calculated by the searcher
1032                boolean isSortScore = isSortScoring(searcher, params.getSort());
1033                hits = searcher.search(finalQuery, getMaxHits(), params.getSort(), isSortScore);
1034            }
1035
1036            timeLucene += System.currentTimeMillis();
1037            timeResultProcessing = -System.currentTimeMillis();
1038
1039            if (hits != null) {
1040                long hitCount = hits.totalHits.value() > hits.scoreDocs.length
1041                ? hits.scoreDocs.length
1042                : hits.totalHits.value();
1043                int page = params.getSearchPage();
1044                long start = -1, end = -1;
1045                if ((params.getMatchesPerPage() > 0) && (page > 0) && (hitCount > 0)) {
1046                    // calculate the final size of the search result
1047                    start = params.getMatchesPerPage() * (page - 1);
1048                    end = start + params.getMatchesPerPage();
1049                    // ensure that both i and n are inside the range of foundDocuments.size()
1050                    start = (start > hitCount) ? hitCount : start;
1051                    end = (end > hitCount) ? hitCount : end;
1052                } else {
1053                    // return all found documents in the search result
1054                    start = 0;
1055                    end = hitCount;
1056                }
1057
1058                Set<String> returnFields = ((CmsLuceneFieldConfiguration)getFieldConfiguration()).getReturnFields();
1059                Set<String> excerptFields = ((CmsLuceneFieldConfiguration)getFieldConfiguration()).getExcerptFields();
1060
1061                long visibleHitCount = hitCount;
1062                for (int i = 0, cnt = 0; (i < hitCount) && (cnt < end); i++) {
1063                    try {
1064                        Document doc = searcher.storedFields().document(hits.scoreDocs[i].doc, returnFields);
1065                        I_CmsSearchDocument searchDoc = new CmsLuceneDocument(doc);
1066                        searchDoc.setScore(hits.scoreDocs[i].score);
1067                        if ((isInTimeRange(doc, params)) && (hasReadPermission(searchCms, searchDoc))) {
1068                            // user has read permission
1069                            if (cnt >= start) {
1070                                // do not use the resource to obtain the raw content, read it from the lucene document!
1071                                String excerpt = null;
1072                                if (isCreatingExcerpt() && (fieldsQuery != null)) {
1073                                    Document exDoc = searcher.storedFields().document(
1074                                        hits.scoreDocs[i].doc,
1075                                        excerptFields);
1076                                    I_CmsTermHighlighter highlighter = OpenCms.getSearchManager().getHighlighter();
1077                                    excerpt = highlighter.getExcerpt(exDoc, this, params, fieldsQuery, getAnalyzer());
1078                                }
1079                                int score = Math.round(
1080                                    (maxScore != Float.NaN ? (hits.scoreDocs[i].score / maxScore) * 100f : 0));
1081                                searchResults.add(new CmsSearchResult(score, doc, excerpt));
1082                            }
1083                            cnt++;
1084                        } else {
1085                            visibleHitCount--;
1086                        }
1087                    } catch (Exception e) {
1088                        // should not happen, but if it does we want to go on with the next result nevertheless
1089                        if (LOG.isWarnEnabled()) {
1090                            LOG.warn(Messages.get().getBundle().key(Messages.LOG_RESULT_ITERATION_FAILED_0), e);
1091                        }
1092                    }
1093                }
1094
1095                // save the total count of search results
1096                searchResults.setHitCount((int)visibleHitCount);
1097            } else {
1098                searchResults.setHitCount(0);
1099            }
1100
1101            timeResultProcessing += System.currentTimeMillis();
1102        } catch (RuntimeException e) {
1103            throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), e);
1104        } catch (Exception e) {
1105            throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), e);
1106        } finally {
1107
1108            // re-set thread to previous priority
1109            Thread.currentThread().setPriority(previousPriority);
1110        }
1111
1112        if (LOG.isDebugEnabled()) {
1113            timeTotal += System.currentTimeMillis();
1114            Object[] logParams = new Object[] {
1115                Long.valueOf(hits == null ? 0 : hits.totalHits.value()),
1116                Long.valueOf(timeTotal),
1117                Long.valueOf(timeLucene),
1118                Long.valueOf(timeResultProcessing)};
1119            LOG.debug(Messages.get().getBundle().key(Messages.LOG_STAT_RESULTS_TIME_4, logParams));
1120        }
1121
1122        return searchResults;
1123    }
1124
1125    /**
1126     * Sets the Lucene analyzer used for this index.<p>
1127     *
1128     * @param analyzer the Lucene analyzer to set
1129     */
1130    public void setAnalyzer(Analyzer analyzer) {
1131
1132        m_analyzer = analyzer;
1133    }
1134
1135    /**
1136     * Sets the checkPermissions.<p>
1137     *
1138     * @param checkPermissions the checkPermissions to set
1139     */
1140    public void setCheckPermissions(boolean checkPermissions) {
1141
1142        m_checkPermissions = checkPermissions;
1143    }
1144
1145    /**
1146     * Sets the ignoreExpiration.<p>
1147     *
1148     * @param ignoreExpiration the ignoreExpiration to set
1149     */
1150    public void setIgnoreExpiration(boolean ignoreExpiration) {
1151
1152        m_ignoreExpiration = ignoreExpiration;
1153    }
1154
1155    /**
1156     * Sets the number of how many hits are loaded at maximum.<p>
1157     *
1158     * This must be set at least to 50, or this setting is ignored.<p>
1159     *
1160     * @param maxHits the number of how many hits are loaded at maximum to set
1161     *
1162     * @see #getMaxHits()
1163     *
1164     * @since 7.5.1
1165     */
1166    public void setMaxHits(int maxHits) {
1167
1168        if (m_maxHits >= (MAX_HITS_DEFAULT / 100)) {
1169            m_maxHits = maxHits;
1170        }
1171    }
1172
1173    /**
1174     * Controls if a resource requires view permission to be displayed in the result list.<p>
1175     *
1176     * By default this is <code>false</code>.<p>
1177     *
1178     * @param requireViewPermission controls if a resource requires view permission to be displayed in the result list
1179     */
1180    public void setRequireViewPermission(boolean requireViewPermission) {
1181
1182        m_requireViewPermission = requireViewPermission;
1183    }
1184
1185    /**
1186     * Shuts down the search index.<p>
1187     *
1188     * This will close the local Lucene index searcher instance.<p>
1189     */
1190    @Override
1191    public void shutDown() {
1192
1193        super.shutDown();
1194        indexSearcherClose();
1195        if (m_analyzer != null) {
1196            m_analyzer.close();
1197        }
1198        if (CmsLog.INIT.isInfoEnabled()) {
1199            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_INDEX_1, getName()));
1200        }
1201    }
1202
1203    /**
1204     * Returns the name (<code>{@link #getName()}</code>) of this search index.<p>
1205     *
1206     * @return the name (<code>{@link #getName()}</code>) of this search index
1207     *
1208     * @see java.lang.Object#toString()
1209     */
1210    @Override
1211    public String toString() {
1212
1213        return getName();
1214    }
1215
1216    /**
1217     * Appends the a category filter to the given filter clause that matches all given categories.<p>
1218     *
1219     * In case the provided List is null or empty, the original filter is left unchanged.<p>
1220     *
1221     * The original filter parameter is extended and also provided as return value.<p>
1222     *
1223     * @param cms the current OpenCms search context
1224     * @param filter the filter to extend
1225     * @param categories the categories that will compose the filter
1226     *
1227     * @return the extended filter clause
1228     */
1229    protected BooleanQuery.Builder appendCategoryFilter(
1230        CmsObject cms,
1231        BooleanQuery.Builder filter,
1232        List<String> categories) {
1233
1234        if ((categories != null) && (categories.size() > 0)) {
1235            // add query categories (if required)
1236
1237            // categories are indexed as lower-case strings
1238            // @see org.opencms.search.fields.CmsSearchFieldConfiguration#appendCategories
1239            List<String> lowerCaseCategories = new ArrayList<String>();
1240            for (String category : categories) {
1241                lowerCaseCategories.add(category.toLowerCase());
1242            }
1243            filter.add(
1244                new BooleanClause(
1245                    getMultiTermQueryFilter(CmsSearchField.FIELD_CATEGORY, lowerCaseCategories),
1246                    BooleanClause.Occur.MUST));
1247        }
1248
1249        return filter;
1250    }
1251
1252    /**
1253     * Appends a date of creation filter to the given filter clause that matches the
1254     * given time range.<p>
1255     *
1256     * If the start time is equal to {@link Long#MIN_VALUE} and the end time is equal to {@link Long#MAX_VALUE}
1257     * than the original filter is left unchanged.<p>
1258     *
1259     * The original filter parameter is extended and also provided as return value.<p>
1260     *
1261     * @param filter the filter to extend
1262     * @param startTime start time of the range to search in
1263     * @param endTime end time of the range to search in
1264     *
1265     * @return the extended filter clause
1266     */
1267    protected BooleanQuery.Builder appendDateCreatedFilter(BooleanQuery.Builder filter, long startTime, long endTime) {
1268
1269        // create special optimized sub-filter for the date last modified search
1270        Query dateFilter = createDateRangeFilter(CmsSearchField.FIELD_DATE_CREATED_LOOKUP, startTime, endTime);
1271        if (dateFilter != null) {
1272            // extend main filter with the created date filter
1273            filter.add(new BooleanClause(dateFilter, BooleanClause.Occur.MUST));
1274        }
1275
1276        return filter;
1277    }
1278
1279    /**
1280     * Appends a date of last modification filter to the given filter clause that matches the
1281     * given time range.<p>
1282     *
1283     * If the start time is equal to {@link Long#MIN_VALUE} and the end time is equal to {@link Long#MAX_VALUE}
1284     * than the original filter is left unchanged.<p>
1285     *
1286     * The original filter parameter is extended and also provided as return value.<p>
1287     *
1288     * @param filter the filter to extend
1289     * @param startTime start time of the range to search in
1290     * @param endTime end time of the range to search in
1291     *
1292     * @return the extended filter clause
1293     */
1294    protected BooleanQuery.Builder appendDateLastModifiedFilter(
1295        BooleanQuery.Builder filter,
1296        long startTime,
1297        long endTime) {
1298
1299        // create special optimized sub-filter for the date last modified search
1300        Query dateFilter = createDateRangeFilter(CmsSearchField.FIELD_DATE_LASTMODIFIED_LOOKUP, startTime, endTime);
1301        if (dateFilter != null) {
1302            // extend main filter with the created date filter
1303            filter.add(new BooleanClause(dateFilter, BooleanClause.Occur.MUST));
1304        }
1305
1306        return filter;
1307    }
1308
1309    /**
1310     * Appends the a VFS path filter to the given filter clause that matches all given root paths.<p>
1311     *
1312     * In case the provided List is null or empty, the current request context site root is appended.<p>
1313     *
1314     * The original filter parameter is extended and also provided as return value.<p>
1315     *
1316     * @param cms the current OpenCms search context
1317     * @param filter the filter to extend
1318     * @param roots the VFS root paths that will compose the filter
1319     *
1320     * @return the extended filter clause
1321     */
1322    protected BooleanQuery.Builder appendPathFilter(CmsObject cms, BooleanQuery.Builder filter, List<String> roots) {
1323
1324        // complete the search root
1325        List<Term> terms = new ArrayList<Term>();
1326        if ((roots != null) && (roots.size() > 0)) {
1327            // add the all configured search roots with will request context
1328            for (int i = 0; i < roots.size(); i++) {
1329                String searchRoot = cms.getRequestContext().addSiteRoot(roots.get(i));
1330                extendPathFilter(terms, searchRoot);
1331            }
1332        } else {
1333            // use the current site root as the search root
1334            extendPathFilter(terms, cms.getRequestContext().getSiteRoot());
1335            // also add the shared folder (v 8.0)
1336            if (OpenCms.getSiteManager().getSharedFolder() != null) {
1337                extendPathFilter(terms, OpenCms.getSiteManager().getSharedFolder());
1338            }
1339        }
1340
1341        // add the calculated path filter for the root path
1342        BooleanQuery.Builder build = new BooleanQuery.Builder();
1343        terms.forEach(term -> build.add(new TermQuery(term), Occur.SHOULD));
1344        filter.add(new BooleanClause(build.build(), BooleanClause.Occur.MUST));
1345        return filter;
1346    }
1347
1348    /**
1349     * Appends the a resource type filter to the given filter clause that matches all given resource types.<p>
1350     *
1351     * In case the provided List is null or empty, the original filter is left unchanged.<p>
1352     *
1353     * The original filter parameter is extended and also provided as return value.<p>
1354     *
1355     * @param cms the current OpenCms search context
1356     * @param filter the filter to extend
1357     * @param resourceTypes the resource types that will compose the filter
1358     *
1359     * @return the extended filter clause
1360     */
1361    protected BooleanQuery.Builder appendResourceTypeFilter(
1362        CmsObject cms,
1363        BooleanQuery.Builder filter,
1364        List<String> resourceTypes) {
1365
1366        if ((resourceTypes != null) && (resourceTypes.size() > 0)) {
1367            // add query resource types (if required)
1368            filter.add(
1369                new BooleanClause(
1370                    getMultiTermQueryFilter(CmsSearchField.FIELD_TYPE, resourceTypes),
1371                    BooleanClause.Occur.MUST));
1372        }
1373
1374        return filter;
1375    }
1376
1377    /**
1378     * Creates an optimized date range filter for the date of last modification or creation.<p>
1379     *
1380     * If the start date is equal to {@link Long#MIN_VALUE} and the end date is equal to {@link Long#MAX_VALUE}
1381     * than <code>null</code> is returned.<p>
1382     *
1383     * @param fieldName the name of the field to search
1384     * @param startTime start time of the range to search in
1385     * @param endTime end time of the range to search in
1386     *
1387     * @return an optimized date range filter for the date of last modification or creation
1388     */
1389    protected Query createDateRangeFilter(String fieldName, long startTime, long endTime) {
1390
1391        Query filter = null;
1392        if ((startTime != Long.MIN_VALUE) || (endTime != Long.MAX_VALUE)) {
1393            // a date range has been set for this document search
1394            if (startTime == Long.MIN_VALUE) {
1395                // default start will always be "yyyy1231" in order to reduce term size
1396                Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone());
1397                cal.setTimeInMillis(endTime);
1398                cal.set(cal.get(Calendar.YEAR) - MAX_YEAR_RANGE, 11, 31, 0, 0, 0);
1399                startTime = cal.getTimeInMillis();
1400            } else if (endTime == Long.MAX_VALUE) {
1401                // default end will always be "yyyy0101" in order to reduce term size
1402                Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone());
1403                cal.setTimeInMillis(startTime);
1404                cal.set(cal.get(Calendar.YEAR) + MAX_YEAR_RANGE, 0, 1, 0, 0, 0);
1405                endTime = cal.getTimeInMillis();
1406            }
1407
1408            // get the list of all possible date range options
1409            List<String> dateRange = getDateRangeSpan(startTime, endTime);
1410            List<Term> terms = new ArrayList<Term>();
1411            for (String range : dateRange) {
1412                terms.add(new Term(fieldName, range));
1413            }
1414            // create the filter for the date
1415            BooleanQuery.Builder build = new BooleanQuery.Builder();
1416            terms.forEach(term -> build.add(new TermQuery(term), Occur.SHOULD));
1417            filter = build.build();
1418        }
1419        return filter;
1420    }
1421
1422    /**
1423     * Creates a backup of this index for optimized re-indexing of the whole content.<p>
1424     *
1425     * @return the path to the backup folder, or <code>null</code> in case no backup was created
1426     */
1427    protected String createIndexBackup() {
1428
1429        if (!isBackupReindexing()) {
1430            // if no backup is generated we don't need to do anything
1431            return null;
1432        }
1433
1434        // check if the target directory already exists
1435        File file = new File(getPath());
1436        if (!file.exists()) {
1437            // index does not exist yet, so we can't backup it
1438            return null;
1439        }
1440        String backupPath = getPath() + "_backup";
1441        FSDirectory oldDir = null;
1442        FSDirectory newDir = null;
1443        try {
1444            // open file directory for Lucene
1445            oldDir = FSDirectory.open(file.toPath());
1446            newDir = FSDirectory.open(Paths.get(backupPath));
1447            for (String fileName : oldDir.listAll()) {
1448                newDir.copyFrom(oldDir, fileName, fileName, IOContext.DEFAULT);
1449            }
1450        } catch (Exception e) {
1451            LOG.error(
1452                Messages.get().getBundle().key(Messages.LOG_IO_INDEX_BACKUP_CREATE_3, getName(), getPath(), backupPath),
1453                e);
1454            backupPath = null;
1455        } finally {
1456            if (oldDir != null) {
1457                try {
1458                    oldDir.close();
1459                } catch (IOException e) {
1460                    e.printStackTrace();
1461                }
1462            }
1463            if (newDir != null) {
1464                try {
1465                    newDir.close();
1466                } catch (IOException e) {
1467                    e.printStackTrace();
1468                }
1469            }
1470        }
1471        return backupPath;
1472    }
1473
1474    /**
1475     * Creates a new index writer.<p>
1476     *
1477     * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
1478     * @param report the report
1479     *
1480     * @return the created new index writer
1481     *
1482     * @throws CmsIndexException in case the writer could not be created
1483     *
1484     * @see #getIndexWriter(I_CmsReport, boolean)
1485     */
1486    @Override
1487    protected I_CmsIndexWriter createIndexWriter(boolean create, I_CmsReport report) throws CmsIndexException {
1488
1489        IndexWriter indexWriter = null;
1490        FSDirectory dir = null;
1491        try {
1492            File f = new File(getPath());
1493            if (!f.exists()) {
1494                f = f.getParentFile();
1495                if ((f != null) && (!f.exists())) {
1496                    f.mkdirs();
1497                }
1498
1499                create = true;
1500            }
1501
1502            dir = FSDirectory.open(Paths.get(getPath()));
1503            IndexWriterConfig indexConfig = new IndexWriterConfig(getAnalyzer());
1504            //indexConfig.setMergePolicy(mergePolicy);
1505
1506            if (m_luceneRAMBufferSizeMB != null) {
1507                indexConfig.setRAMBufferSizeMB(m_luceneRAMBufferSizeMB.doubleValue());
1508            }
1509            if (create) {
1510                indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
1511            } else {
1512                indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
1513            }
1514            // register the modified default similarity implementation
1515            indexConfig.setSimilarity(m_sim);
1516
1517            indexWriter = new IndexWriter(dir, indexConfig);
1518        } catch (Exception e) {
1519            if (dir != null) {
1520                try {
1521                    dir.close();
1522                } catch (IOException e1) {
1523                    // TODO Auto-generated catch block
1524                    e1.printStackTrace();
1525                }
1526            }
1527            if (indexWriter != null) {
1528                try {
1529                    indexWriter.close();
1530                } catch (IOException closeExeception) {
1531                    throw new CmsIndexException(
1532                        Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, getPath(), getName()),
1533                        e);
1534                }
1535            }
1536            throw new CmsIndexException(
1537                Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, getPath(), getName()),
1538                e);
1539        }
1540
1541        return new CmsLuceneIndexWriter(indexWriter, this);
1542    }
1543
1544    /**
1545     * Extends the given path query with another term for the given search root element.<p>
1546     *
1547     * @param terms the path filter to extend
1548     * @param searchRoot the search root to add to the path query
1549     */
1550    protected void extendPathFilter(List<Term> terms, String searchRoot) {
1551
1552        if (!CmsResource.isFolder(searchRoot)) {
1553            searchRoot += "/";
1554        }
1555        terms.add(new Term(CmsSearchField.FIELD_PARENT_FOLDERS, searchRoot));
1556    }
1557
1558    /**
1559     * Generates the directory on the RFS for this index.<p>
1560     *
1561     * @return the directory on the RFS for this index
1562     */
1563    protected String generateIndexDirectory() {
1564
1565        return OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(
1566            OpenCms.getSearchManager().getDirectory() + "/" + getName());
1567    }
1568
1569    /**
1570     * Returns a cached Lucene term query filter for the given field and terms.<p>
1571     *
1572     * @param field the field to use
1573     * @param terms the term to use
1574     *
1575     * @return a cached Lucene term query filter for the given field and terms
1576     */
1577    protected Query getMultiTermQueryFilter(String field, List<String> terms) {
1578
1579        return getMultiTermQueryFilter(field, null, terms);
1580    }
1581
1582    /**
1583     * Returns a cached Lucene term query filter for the given field and terms.<p>
1584     *
1585     * @param field the field to use
1586     * @param terms the term to use
1587     *
1588     * @return a cached Lucene term query filter for the given field and terms
1589     */
1590    protected Query getMultiTermQueryFilter(String field, String terms) {
1591
1592        return getMultiTermQueryFilter(field, terms, null);
1593    }
1594
1595    /**
1596     * Returns a cached Lucene term query filter for the given field and terms.<p>
1597     *
1598     * @param field the field to use
1599     * @param termsStr the terms to use as a String separated by a space ' ' char
1600     * @param termsList the list of terms to use
1601     *
1602     * @return a cached Lucene term query filter for the given field and terms
1603     */
1604    protected Query getMultiTermQueryFilter(String field, String termsStr, List<String> termsList) {
1605
1606        if (termsStr == null) {
1607            StringBuffer buf = new StringBuffer(64);
1608            for (int i = 0; i < termsList.size(); i++) {
1609                if (i > 0) {
1610                    buf.append(' ');
1611                }
1612                buf.append(termsList.get(i));
1613            }
1614            termsStr = buf.toString();
1615        }
1616        Query result = m_displayFilters.get(
1617            (new StringBuffer(64)).append(field).append('|').append(termsStr).toString());
1618        if (result == null) {
1619            List<Term> terms = new ArrayList<Term>();
1620            if (termsList == null) {
1621                termsList = CmsStringUtil.splitAsList(termsStr, ' ');
1622            }
1623            for (int i = 0; i < termsList.size(); i++) {
1624                terms.add(new Term(field, termsList.get(i)));
1625            }
1626
1627            BooleanQuery.Builder build = new BooleanQuery.Builder();
1628            terms.forEach(term -> build.add(new TermQuery(term), Occur.SHOULD));
1629            Query termsQuery = build.build(); //termsFilter
1630
1631            try {
1632                result = termsQuery.createWeight(m_indexSearcher, ScoreMode.COMPLETE_NO_SCORES, 1).getQuery();
1633                m_displayFilters.put(field + termsStr, result);
1634            } catch (IOException e) {
1635                // TODO don't know what happend
1636                e.printStackTrace();
1637            }
1638        }
1639        return result;
1640    }
1641
1642    /**
1643     * Checks if the OpenCms resource referenced by the result document can be read
1644     * by the user of the given OpenCms context.
1645     *
1646     * Returns the referenced <code>CmsResource</code> or <code>null</code> if
1647     * the user is not permitted to read the resource.<p>
1648     *
1649     * @param cms the OpenCms user context to use for permission testing
1650     * @param doc the search result document to check
1651     *
1652     * @return the referenced <code>CmsResource</code> or <code>null</code> if the user is not permitted
1653     */
1654    protected CmsResource getResource(CmsObject cms, I_CmsSearchDocument doc) {
1655
1656        // check if the resource exits in the VFS,
1657        // this will implicitly check read permission and if the resource was deleted
1658        CmsResourceFilter filter = CmsResourceFilter.DEFAULT;
1659        if (isRequireViewPermission()) {
1660            filter = CmsResourceFilter.DEFAULT_ONLY_VISIBLE;
1661        } else if (isIgnoreExpiration()) {
1662            filter = CmsResourceFilter.IGNORE_EXPIRATION;
1663        }
1664
1665        return getResource(cms, doc, filter);
1666    }
1667
1668    /**
1669     * Checks if the OpenCms resource referenced by the result document can be read
1670     * by the user of the given OpenCms context.
1671     *
1672     * Returns the referenced <code>CmsResource</code> or <code>null</code> if
1673     * the user is not permitted to read the resource.<p>
1674     *
1675     * @param cms the OpenCms user context to use for permission testing
1676     * @param doc the search result document to check
1677     * @param filter the resource filter to apply
1678     *
1679     * @return the referenced <code>CmsResource</code> or <code>null</code> if the user is not permitted
1680     */
1681    protected CmsResource getResource(CmsObject cms, I_CmsSearchDocument doc, CmsResourceFilter filter) {
1682
1683        try {
1684            CmsObject clone = OpenCms.initCmsObject(cms);
1685            clone.getRequestContext().setSiteRoot("");
1686            return clone.readResource(doc.getPath(), filter);
1687        } catch (CmsException e) {
1688            // Do nothing
1689        }
1690
1691        return null;
1692    }
1693
1694    /**
1695     * Returns a cached Lucene term query filter for the given field and term.<p>
1696     *
1697     * @param field the field to use
1698     * @param term the term to use
1699     *
1700     * @return a cached Lucene term query filter for the given field and term
1701     */
1702    protected Query getTermQueryFilter(String field, String term) {
1703
1704        return getMultiTermQueryFilter(field, term, Collections.singletonList(term));
1705    }
1706
1707    /**
1708     * Checks if the OpenCms resource referenced by the result document can be read
1709     * be the user of the given OpenCms context.<p>
1710     *
1711     * @param cms the OpenCms user context to use for permission testing
1712     * @param doc the search result document to check
1713     * @return <code>true</code> if the user has read permissions to the resource
1714     */
1715    protected boolean hasReadPermission(CmsObject cms, I_CmsSearchDocument doc) {
1716
1717        // If no permission check is needed: the document can be read
1718        // Else try to read the resource if this is not possible the user does not have enough permissions
1719        return !needsPermissionCheck(doc) ? true : (null != getResource(cms, doc));
1720    }
1721
1722    /**
1723     * Closes the index searcher for this index.<p>
1724     *
1725     * @see #indexSearcherOpen(String)
1726     */
1727    protected synchronized void indexSearcherClose() {
1728
1729        indexSearcherClose(m_indexSearcher);
1730    }
1731
1732    /**
1733     * Closes the given Lucene index searcher.<p>
1734     *
1735     * @param searcher the searcher to close
1736     */
1737    protected synchronized void indexSearcherClose(IndexSearcher searcher) {
1738
1739        // in case there is an index searcher available close it
1740        if ((searcher != null) && (searcher.getIndexReader() != null)) {
1741            try {
1742                searcher.getIndexReader().close();
1743            } catch (Exception e) {
1744                LOG.error(Messages.get().getBundle().key(Messages.ERR_INDEX_SEARCHER_CLOSE_1, getName()), e);
1745            }
1746        }
1747    }
1748
1749    /**
1750     * Initializes the index searcher for this index.<p>
1751     *
1752     * In case there is an index searcher still open, it is closed first.<p>
1753     *
1754     * For performance reasons, one instance of the index searcher should be kept
1755     * for all searches. However, if the index is updated or changed
1756     * this searcher instance needs to be re-initialized.<p>
1757     *
1758     * @param path the path to the index directory
1759     */
1760    protected synchronized void indexSearcherOpen(String path) {
1761
1762        IndexSearcher oldSearcher = null;
1763        Directory indexDirectory = null;
1764        try {
1765            indexDirectory = FSDirectory.open(Paths.get(path));
1766            if (DirectoryReader.indexExists(indexDirectory)) {
1767                IndexReader reader = UninvertingReader.wrap(
1768                    DirectoryReader.open(indexDirectory),
1769                    createUninvertingMap());
1770                if (m_indexSearcher != null) {
1771                    // store old searcher instance to close it later
1772                    oldSearcher = m_indexSearcher;
1773                }
1774                m_indexSearcher = new IndexSearcher(reader);
1775                m_indexSearcher.setSimilarity(m_sim);
1776                m_displayFilters = new HashMap<>();
1777            }
1778        } catch (IOException e) {
1779            LOG.error(Messages.get().getBundle().key(Messages.ERR_INDEX_SEARCHER_1, getName()), e);
1780            if (indexDirectory != null) {
1781                try {
1782                    indexDirectory.close();
1783                } catch (IOException closeException) {
1784                    // do nothing
1785                }
1786            }
1787        }
1788        if (oldSearcher != null) {
1789            // close the old searcher if required
1790            indexSearcherClose(oldSearcher);
1791        }
1792    }
1793
1794    /**
1795     * Reopens the index search reader for this index, required after the index has been changed.<p>
1796     *
1797     * @see #indexSearcherOpen(String)
1798     */
1799    protected synchronized void indexSearcherUpdate() {
1800
1801        IndexSearcher oldSearcher = m_indexSearcher;
1802        if ((oldSearcher != null) && (oldSearcher.getIndexReader() != null)) {
1803            // in case there is an index searcher available close it
1804            try {
1805                if (oldSearcher.getIndexReader() instanceof DirectoryReader) {
1806                    IndexReader newReader = DirectoryReader.openIfChanged(
1807                        (DirectoryReader)oldSearcher.getIndexReader());
1808                    if (newReader != null) {
1809                        m_indexSearcher = new IndexSearcher(newReader);
1810                        m_indexSearcher.setSimilarity(m_sim);
1811                        indexSearcherClose(oldSearcher);
1812                    }
1813                }
1814            } catch (Exception e) {
1815                LOG.error(Messages.get().getBundle().key(Messages.ERR_INDEX_SEARCHER_REOPEN_1, getName()), e);
1816            }
1817        } else {
1818            // make sure we end up with an open index searcher / reader
1819            indexSearcherOpen(getPath());
1820        }
1821    }
1822
1823    /**
1824     * Checks if the document is in the time range specified in the search parameters.<p>
1825     *
1826     * The creation date and/or the last modification date are checked.<p>
1827     *
1828     * @param doc the document to check the dates against the given time range
1829     * @param params the search parameters where the time ranges are specified
1830     *
1831     * @return true if document is in time range or not time range set otherwise false
1832     */
1833    protected boolean isInTimeRange(Document doc, CmsSearchParameters params) {
1834
1835        if (!isCheckingTimeRange()) {
1836            // time range check disabled
1837            return true;
1838        }
1839
1840        try {
1841            // check the creation date of the document against the given time range
1842            Date dateCreated = DateTools.stringToDate(doc.getField(CmsSearchField.FIELD_DATE_CREATED).stringValue());
1843            if (dateCreated.getTime() < params.getMinDateCreated()) {
1844                return false;
1845            }
1846            if (dateCreated.getTime() > params.getMaxDateCreated()) {
1847                return false;
1848            }
1849
1850            // check the last modification date of the document against the given time range
1851            Date dateLastModified = DateTools.stringToDate(
1852                doc.getField(CmsSearchField.FIELD_DATE_LASTMODIFIED).stringValue());
1853            if (dateLastModified.getTime() < params.getMinDateLastModified()) {
1854                return false;
1855            }
1856            if (dateLastModified.getTime() > params.getMaxDateLastModified()) {
1857                return false;
1858            }
1859
1860        } catch (ParseException ex) {
1861            // date could not be parsed -> doc is in time range
1862        }
1863
1864        return true;
1865    }
1866
1867    /**
1868     * Checks if the score for the results must be calculated based on the provided sort option.<p>
1869     *
1870     * Since Lucene 3 apparently the score is no longer calculated by default, but only if the
1871     * searcher is explicitly told so. This methods checks if, based on the given sort,
1872     * the score must be calculated.<p>
1873     *
1874     * @param searcher the index searcher to prepare
1875     * @param sort the sort option to use
1876     *
1877     * @return true if the sort option should be used
1878     */
1879    protected boolean isSortScoring(IndexSearcher searcher, Sort sort) {
1880
1881        boolean doScoring = false;
1882        if (sort != null) {
1883            if ((sort == CmsSearchParameters.SORT_DEFAULT) || (sort == CmsSearchParameters.SORT_TITLE)) {
1884                // these default sorts do need score calculation
1885                doScoring = true;
1886            } else if ((sort == CmsSearchParameters.SORT_DATE_CREATED)
1887                || (sort == CmsSearchParameters.SORT_DATE_LASTMODIFIED)) {
1888                    // these default sorts don't need score calculation
1889                    doScoring = false;
1890                } else {
1891                    // for all non-defaults: check if the score field is present, in that case we must calculate the score
1892                    SortField[] fields = sort.getSort();
1893                    for (SortField field : fields) {
1894                        if (field == SortField.FIELD_SCORE) {
1895                            doScoring = true;
1896                            break;
1897                        }
1898                    }
1899                }
1900        }
1901        return doScoring;
1902    }
1903
1904    /**
1905     * Checks if the OpenCms resource referenced by the result document needs to be checked.<p>
1906     *
1907     * @param doc the search result document to check
1908     *
1909     * @return <code>true</code> if the document needs to be checked <code>false</code> otherwise
1910     */
1911    protected boolean needsPermissionCheck(I_CmsSearchDocument doc) {
1912
1913        if (!isCheckingPermissions()) {
1914            // no permission check is performed at all
1915            return false;
1916        }
1917
1918        if ((doc.getType() == null) || (doc.getPath() == null)) {
1919            // permission check needs only to be performed for VFS documents that contain both fields
1920            return false;
1921        }
1922
1923        if (!I_CmsSearchDocument.VFS_DOCUMENT_KEY_PREFIX.equals(doc.getType())
1924            && !OpenCms.getResourceManager().hasResourceType(doc.getType())) {
1925            // this is an unknown VFS resource type (also not the generic "VFS" type of OpenCms before 7.0)
1926            return false;
1927        }
1928        return true;
1929    }
1930
1931    /**
1932     * Removes the given backup folder of this index.<p>
1933     *
1934     * @param path the backup folder to remove
1935     */
1936    protected void removeIndexBackup(String path) {
1937
1938        if (!isBackupReindexing()) {
1939            // if no backup is generated we don't need to do anything
1940            return;
1941        }
1942
1943        // check if the target directory already exists
1944        File file = new File(path);
1945        if (!file.exists()) {
1946            // index does not exist yet
1947            return;
1948        }
1949        try {
1950            FSDirectory dir = FSDirectory.open(file.toPath());
1951            dir.close();
1952            CmsFileUtil.purgeDirectory(file);
1953        } catch (Exception e) {
1954            LOG.error(Messages.get().getBundle().key(Messages.LOG_IO_INDEX_BACKUP_REMOVE_2, getName(), path), e);
1955        }
1956    }
1957
1958    /**
1959     * Generates the uninverting map and adds it to the field configuration.
1960     * @return the generated uninverting map
1961     *
1962     * @see CmsSearchField#addUninvertingMappings(Map)
1963     */
1964    private Map<String, Type> createUninvertingMap() {
1965
1966        Map<String, UninvertingReader.Type> uninvertingMap = new HashMap<String, UninvertingReader.Type>();
1967        CmsSearchField.addUninvertingMappings(uninvertingMap);
1968        getFieldConfiguration().addUninvertingMappings(uninvertingMap);
1969        return uninvertingMap;
1970    }
1971
1972}