001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.configuration.CmsParameterConfiguration;
031import org.opencms.file.CmsObject;
032import org.opencms.file.CmsPropertyDefinition;
033import org.opencms.file.CmsResource;
034import org.opencms.i18n.CmsLocaleManager;
035import org.opencms.main.CmsException;
036import org.opencms.main.CmsIllegalArgumentException;
037import org.opencms.main.CmsLog;
038import org.opencms.main.OpenCms;
039import org.opencms.report.I_CmsReport;
040import org.opencms.search.documents.I_CmsDocumentFactory;
041import org.opencms.search.extractors.I_CmsExtractionResult;
042import org.opencms.search.fields.CmsSearchFieldConfiguration;
043import org.opencms.search.fields.I_CmsSearchFieldConfiguration;
044import org.opencms.util.CmsStringUtil;
045
046import java.io.IOException;
047import java.util.ArrayList;
048import java.util.Collections;
049import java.util.HashMap;
050import java.util.Iterator;
051import java.util.List;
052import java.util.Locale;
053import java.util.Map;
054import java.util.Objects;
055
056import org.apache.commons.logging.Log;
057
058/**
059 * Abstract base class for search indexes. It provides default implementations that should fit most use
060 * cases when adding own index implementations.
061 */
062public abstract class A_CmsSearchIndex implements I_CmsSearchIndex {
063
064    /** Special value for the search.exclude property. */
065    public static final String PROPERTY_SEARCH_EXCLUDE_VALUE_ALL = "all";
066
067    /** Special value for the search.exclude property. */
068    public static final String PROPERTY_SEARCH_EXCLUDE_VALUE_GALLERY = "gallery";
069
070    /** The use all locale. */
071    public static final String USE_ALL_LOCALE = "all";
072
073    /** The log object for this class. */
074    private static final Log LOG = CmsLog.getLog(A_CmsSearchIndex.class);
075
076    /** The serial version id. */
077    private static final long serialVersionUID = 5831386499514765251L;
078
079    /** Document types of folders/channels. */
080    private Map<String, List<String>> m_documenttypes;
081
082    /** An internal enabled flag, used to disable the index if for instance the configured project does not exist. */
083    private boolean m_enabled;
084
085    /** The content extraction mode for this index. */
086    private boolean m_extractContent;
087
088    /** The search field configuration of this index. */
089    private I_CmsSearchFieldConfiguration m_fieldConfiguration;
090
091    /** The name of the search field configuration used by this index. */
092    private String m_fieldConfigurationName;
093
094    /** The index writer to use. */
095    private transient I_CmsIndexWriter m_indexWriter;
096
097    /** Signals whether the language detection. */
098    private boolean m_languageDetection;
099
100    /** The locale of this index. */
101    private Locale m_locale;
102
103    /** The name of this index. */
104    private String m_name;
105
106    /** The path where this index stores it's data in the "real" file system. */
107    private String m_path;
108
109    /** The project of this index. */
110    private String m_project;
111
112    /** The rebuild mode for this index. */
113    private String m_rebuild;
114
115    /** The list of configured index source names. */
116    private List<String> m_sourceNames;
117
118    /** The list of configured index sources. */
119    private List<CmsSearchIndexSource> m_sources;
120
121    /**
122     * Default constructor only intended to be used by the XML configuration. <p>
123     *
124     * It is recommended to use the constructor <code>{@link #A_CmsSearchIndex(String)}</code>
125     * as it enforces the mandatory name argument. <p>
126     */
127    public A_CmsSearchIndex() {
128
129        m_sourceNames = new ArrayList<String>();
130        m_documenttypes = new HashMap<String, List<String>>();
131        m_enabled = true;
132        m_extractContent = true;
133    }
134
135    /**
136     * Creates a new CmsSearchIndex with the given name.<p>
137     *
138     * @param name the system-wide unique name for the search index
139     *
140     * @throws CmsIllegalArgumentException if the given name is null, empty or already taken by another search index
141     */
142    public A_CmsSearchIndex(String name)
143    throws CmsIllegalArgumentException {
144
145        this();
146        setName(name);
147    }
148
149    /**
150     * @see org.opencms.search.I_CmsSearchIndex#addConfigurationParameter(java.lang.String, java.lang.String)
151     */
152    public void addConfigurationParameter(String key, String value) {
153
154        // by default no parameters are excepted
155
156    }
157
158    /**
159     * Adds am index source to this search index.<p>
160     *
161     * @param sourceName the index source name to add
162     */
163    public void addSourceName(String sourceName) {
164
165        m_sourceNames.add(sourceName);
166    }
167
168    /**
169     * @see org.opencms.search.I_CmsSearchIndex#checkConfiguration(org.opencms.file.CmsObject)
170     */
171    public boolean checkConfiguration(CmsObject cms) {
172
173        if (isEnabled()) {
174            // check if the project for the index exists
175            try {
176                cms.readProject(getProject());
177                setEnabled(true);
178            } catch (CmsException e) {
179                // the project does not exist, disable the index
180                setEnabled(false);
181                if (LOG.isErrorEnabled()) {
182                    LOG.error(
183                        Messages.get().getBundle().key(
184                            Messages.LOG_SEARCHINDEX_CREATE_BAD_PROJECT_2,
185                            getProject(),
186                            getName()));
187                }
188            }
189        } else {
190            if (LOG.isInfoEnabled()) {
191                LOG.info(Messages.get().getBundle().key(Messages.LOG_SEARCHINDEX_DISABLED_1, getName()));
192            }
193        }
194
195        return isEnabled();
196    }
197
198    /**
199     * @see java.lang.Object#equals(java.lang.Object)
200     */
201    @Override
202    public boolean equals(Object obj) {
203
204        if (obj == this) {
205            return true;
206        }
207        if ((null != obj) && this.getClass().getName().equals(obj.getClass().getName())) {
208            return ((I_CmsSearchIndex)obj).getName().equals(m_name);
209        }
210        return false;
211    }
212
213    /**
214     * Checks if the provided resource should be excluded from this search index.<p>
215     *
216     * @param cms the OpenCms context used for building the search index
217     * @param resource the resource to index
218     *
219     * @return true if the resource should be excluded, false if it should be included in this index
220     */
221    public boolean excludeFromIndex(CmsObject cms, CmsResource resource) {
222
223        // check if this resource should be excluded from the index, if so skip it
224        boolean excludeFromIndex = false;
225
226        if (resource.isInternal()
227            || resource.isFolder()
228            || resource.isTemporaryFile()
229            || (resource.getDateExpired() <= System.currentTimeMillis())) {
230            // don't index internal resources, folders or temporary files or resources with expire date in the past
231            return true;
232        }
233
234        try {
235            // do property lookup with folder search
236            String propValue = cms.readPropertyObject(
237                resource,
238                CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE,
239                true).getValue();
240            excludeFromIndex = Boolean.valueOf(propValue).booleanValue();
241            if (!excludeFromIndex && (propValue != null)) {
242                // property value was neither "true" nor null, must check for "all"
243                excludeFromIndex = PROPERTY_SEARCH_EXCLUDE_VALUE_ALL.equalsIgnoreCase(propValue.trim());
244            }
245        } catch (CmsException e) {
246            if (LOG.isDebugEnabled()) {
247                LOG.debug(
248                    Messages.get().getBundle().key(Messages.LOG_UNABLE_TO_READ_PROPERTY_1, resource.getRootPath()));
249            }
250        }
251        if (!excludeFromIndex && !USE_ALL_LOCALE.equalsIgnoreCase(getLocale().getLanguage())) {
252            // check if any resource default locale has a match with the index locale, if not skip resource
253            List<Locale> locales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource);
254            Locale match = OpenCms.getLocaleManager().getFirstMatchingLocale(
255                Collections.singletonList(getLocale()),
256                locales);
257            excludeFromIndex = (match == null);
258        }
259
260        return excludeFromIndex;
261    }
262
263    /**
264     * Returns the empty configuration.
265     * Override the method if your index is configurable.
266     *
267     * @see org.opencms.search.I_CmsSearchIndex#getConfiguration()
268     */
269    public CmsParameterConfiguration getConfiguration() {
270
271        return new CmsParameterConfiguration();
272    }
273
274    /**
275     * We always assume we have no unchanged copy of the content, since it depends on the concrete index.
276     * Override the method to enhance indexing performance if you know where to grap the content from your index.
277     * See the implementation {@link org.opencms.search.CmsSearchIndex#getContentIfUnchanged(CmsResource)} for an example.
278     * @see org.opencms.search.I_CmsSearchIndex#getContentIfUnchanged(org.opencms.file.CmsResource)
279     */
280    public I_CmsExtractionResult getContentIfUnchanged(CmsResource resource) {
281
282        return null;
283    }
284
285    /**
286     * Returns the document type factory used for the given resource in this index, or <code>null</code>
287     * in case the resource is not indexed by this index.<p>
288     *
289     * A resource is indexed if the following is all true: <ol>
290     * <li>The index contains at last one index source matching the root path of the given resource.
291     * <li>For this matching index source, the document type factory needed by the resource is also configured.
292     * </ol>
293     *
294     * @param res the resource to check
295     *
296     * @return the document type factory used for the given resource in this index, or <code>null</code>
297     * in case the resource is not indexed by this index
298     */
299    @Override
300    public I_CmsDocumentFactory getDocumentFactory(CmsResource res) {
301
302        if ((res != null) && (getSources() != null)) {
303            // the result can only be null or the type configured for the resource
304            List<String> documentTypeKeys = OpenCms.getSearchManager().getDocumentTypeKeys(res);
305            for (String documentTypeKey : documentTypeKeys) {
306                for (CmsSearchIndexSource source : getSources()) {
307                    if (source.isIndexing(res.getRootPath(), documentTypeKey)) {
308                        // we found an index source that indexes the resource
309                        return source.getDocumentFactory(documentTypeKey);
310                    }
311                }
312            }
313        }
314        return null;
315    }
316
317    /**
318     * @see org.opencms.search.I_CmsSearchIndex#getFieldConfiguration()
319     */
320    public I_CmsSearchFieldConfiguration getFieldConfiguration() {
321
322        return m_fieldConfiguration;
323    }
324
325    /**
326     * @see org.opencms.search.I_CmsSearchIndex#getFieldConfigurationName()
327     */
328    public String getFieldConfigurationName() {
329
330        return m_fieldConfigurationName;
331    }
332
333    /**
334     * Returns a new index writer for this index.<p>
335     *
336     * @param report the report to write error messages on
337     * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
338     *
339     * @return a new instance of IndexWriter
340     *
341     * @throws CmsIndexException if the index can not be opened
342     */
343    public I_CmsIndexWriter getIndexWriter(I_CmsReport report, boolean create) throws CmsIndexException {
344
345        // note - create will be:
346        //   true if the index is to be fully rebuild,
347        //   false if the index is to be incrementally updated
348        if (m_indexWriter != null) {
349            if (!create) {
350                // re-use existing index writer
351                return m_indexWriter;
352            }
353            // need to close the index writer if create is "true"
354            try {
355                m_indexWriter.close();
356                m_indexWriter = null;
357            } catch (IOException e) {
358                // if we can't close the index we are busted!
359                throw new CmsIndexException(
360                    Messages.get().container(Messages.LOG_IO_INDEX_WRITER_CLOSE_2, getPath(), getName()),
361                    e);
362            }
363
364        }
365
366        // now create is true of false, but the index writer is definitely null / closed
367        I_CmsIndexWriter indexWriter = createIndexWriter(create, report);
368
369        if (!create) {
370            m_indexWriter = indexWriter;
371        }
372
373        return indexWriter;
374    }
375
376    /**
377     * Returns the language locale of this index.<p>
378     *
379     * @return the language locale of this index, for example "en"
380     */
381    public Locale getLocale() {
382
383        return m_locale;
384    }
385
386    /**
387     * Returns the language locale for the given resource in this index.<p>
388     *
389     * @param cms the current OpenCms user context
390     * @param resource the resource to check
391     * @param availableLocales a list of locales supported by the resource
392     *
393     * @return the language locale for the given resource in this index
394     */
395    public Locale getLocaleForResource(CmsObject cms, CmsResource resource, List<Locale> availableLocales) {
396
397        Locale result = null;
398        List<Locale> defaultLocales = OpenCms.getLocaleManager().getDefaultLocales(cms, resource);
399        if ((availableLocales != null) && (availableLocales.size() > 0)) {
400            result = OpenCms.getLocaleManager().getBestMatchingLocale(
401                defaultLocales.get(0),
402                defaultLocales,
403                availableLocales);
404        }
405        if (result == null) {
406            result = ((availableLocales != null) && availableLocales.isEmpty())
407            ? availableLocales.get(0)
408            : defaultLocales.get(0);
409        }
410        return result;
411    }
412
413    /**
414     * @see org.opencms.search.I_CmsSearchIndex#getName()
415     */
416    public String getName() {
417
418        return m_name;
419    }
420
421    /**
422     * @see org.opencms.search.I_CmsSearchIndex#getPath()
423     */
424    public String getPath() {
425
426        return m_path;
427    }
428
429    /**
430     * @see org.opencms.search.I_CmsSearchIndex#getProject()
431     */
432    public String getProject() {
433
434        return m_project;
435    }
436
437    /**
438     * @see org.opencms.search.I_CmsSearchIndex#getRebuildMode()
439     */
440    public String getRebuildMode() {
441
442        return m_rebuild;
443    }
444
445    /**
446     * @see org.opencms.search.I_CmsSearchIndex#getSourceNames()
447     */
448    public List<String> getSourceNames() {
449
450        return m_sourceNames;
451    }
452
453    /**
454    * @see org.opencms.search.I_CmsSearchIndex#getSources()
455    */
456    public List<CmsSearchIndexSource> getSources() {
457
458        return m_sources;
459    }
460
461    /**
462     * @see java.lang.Object#hashCode()
463     */
464    @Override
465    public int hashCode() {
466
467        return m_name != null ? m_name.hashCode() : 0;
468    }
469
470    /**
471     * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#initConfiguration()
472     */
473    public void initConfiguration() {
474
475        // Do nothing by default
476
477    }
478
479    /**
480     * Initializes the search index.<p>
481     *
482     * @throws CmsSearchException if the index source association failed or a configuration error occurred
483     */
484
485    public void initialize() throws CmsSearchException {
486
487        if (!isEnabled()) {
488            // index is disabled, no initialization is required
489            return;
490        }
491
492        String sourceName = null;
493        CmsSearchIndexSource indexSource = null;
494        List<String> searchIndexSourceDocumentTypes = null;
495        List<String> resourceNames = null;
496        String resourceName = null;
497        m_sources = new ArrayList<CmsSearchIndexSource>();
498
499        m_path = getPath();
500
501        for (int i = 0, n = m_sourceNames.size(); i < n; i++) {
502
503            try {
504                sourceName = m_sourceNames.get(i);
505                indexSource = OpenCms.getSearchManager().getIndexSource(sourceName);
506                m_sources.add(indexSource);
507
508                resourceNames = indexSource.getResourcesNames();
509                searchIndexSourceDocumentTypes = indexSource.getDocumentTypes();
510                for (int j = 0, m = resourceNames.size(); j < m; j++) {
511
512                    resourceName = resourceNames.get(j);
513                    m_documenttypes.put(resourceName, searchIndexSourceDocumentTypes);
514                }
515            } catch (Exception e) {
516                // mark this index as disabled
517                setEnabled(false);
518                throw new CmsSearchException(
519                    Messages.get().container(Messages.ERR_INDEX_SOURCE_ASSOCIATION_1, sourceName),
520                    e);
521            }
522        }
523
524        // initialize the search field configuration
525        if (m_fieldConfigurationName == null) {
526            // if not set, use standard field configuration
527            m_fieldConfigurationName = CmsSearchFieldConfiguration.STR_STANDARD;
528        }
529        m_fieldConfiguration = OpenCms.getSearchManager().getFieldConfiguration(m_fieldConfigurationName);
530        if (m_fieldConfiguration == null) {
531            // we must have a valid field configuration to continue
532            throw new CmsSearchException(
533                Messages.get().container(Messages.ERR_FIELD_CONFIGURATION_UNKNOWN_2, m_name, m_fieldConfigurationName));
534        }
535
536        // initialize the index searcher instance
537        onIndexChanged(true);
538    }
539
540    /**
541     * @see org.opencms.search.I_CmsSearchIndex#isEnabled()
542     */
543    public boolean isEnabled() {
544
545        return m_enabled;
546    }
547
548    /**
549    * @see org.opencms.search.I_CmsSearchIndex#isExtractingContent()
550    */
551    public boolean isExtractingContent() {
552
553        return m_extractContent;
554    }
555
556    /**
557     * @see org.opencms.search.I_CmsSearchIndex#isInitialized()
558     */
559    public boolean isInitialized() {
560
561        return m_sources != null;
562    }
563
564    /**
565     * @see org.opencms.search.I_CmsSearchIndex#isLanguageDetection()
566     */
567    public boolean isLanguageDetection() {
568
569        return m_languageDetection;
570    }
571
572    /**
573     * Returns <code>true</code> in case this index is updated incremental.<p>
574     *
575     * An index is updated incremental if the index rebuild mode as defined by
576     * {@link #getRebuildMode()} is either set to {@value I_CmsSearchIndex#REBUILD_MODE_AUTO} or
577     * {@value I_CmsSearchIndex#REBUILD_MODE_OFFLINE}. Moreover, at least one update must have
578     * been written to the index already.
579     *
580     * @return <code>true</code> in case this index is updated incremental
581     */
582    public boolean isUpdatedIncremental() {
583
584        return m_indexWriter != null;
585    }
586
587    /**
588     * @see org.opencms.search.I_CmsSearchIndex#onIndexChanged(boolean)
589     */
590    public void onIndexChanged(boolean force) {
591
592        // Do nothing by default.
593
594    }
595
596    /**
597     * @see org.opencms.search.I_CmsSearchIndex#removeSourceName(String)
598     */
599    public void removeSourceName(String sourceName) {
600
601        Iterator<CmsSearchIndexSource> it = m_sources.iterator();
602        while (it.hasNext()) {
603            if (Objects.equals(it.next().getName(), sourceName)) {
604                it.remove();
605            }
606        }
607        m_sourceNames.remove(sourceName);
608    }
609
610    /**
611     * @see org.opencms.search.I_CmsSearchIndex#setEnabled(boolean)
612     */
613    public void setEnabled(boolean enabled) {
614
615        m_enabled = enabled;
616
617    }
618
619    /**
620     * Sets the field configuration used for this index.<p>
621     *
622     * @param fieldConfiguration the field configuration to set
623     */
624    public void setFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) {
625
626        m_fieldConfiguration = fieldConfiguration;
627    }
628
629    /**
630     * Sets the name of the field configuration used for this index.<p>
631     *
632     * @param fieldConfigurationName the name of the field configuration to set
633     */
634    public void setFieldConfigurationName(String fieldConfigurationName) {
635
636        m_fieldConfigurationName = fieldConfigurationName;
637    }
638
639    /**
640     * Sets the languageDetection.<p>
641     *
642     * @param languageDetection the languageDetection to set
643     */
644    public void setLanguageDetection(boolean languageDetection) {
645
646        m_languageDetection = languageDetection;
647    }
648
649    /**
650     * @see org.opencms.search.I_CmsSearchIndex#setLocale(java.util.Locale)
651     */
652    public void setLocale(Locale locale) {
653
654        m_locale = locale;
655    }
656
657    /**
658     * @see org.opencms.search.I_CmsSearchIndex#setLocaleString(java.lang.String)
659     */
660    public void setLocaleString(String locale) {
661
662        setLocale(CmsLocaleManager.getLocale(locale));
663    }
664
665    /**
666    * @see org.opencms.search.I_CmsSearchIndex#setName(java.lang.String)
667    */
668    public void setName(String name) throws CmsIllegalArgumentException {
669
670        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
671            throw new CmsIllegalArgumentException(
672                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
673        } else {
674            // check if already used, but only if the name was modified:
675            // this is important as unmodifiable DisplayWidgets will also invoke this...
676            if (!name.equals(m_name)) {
677                // don't mess with XML configuration
678                if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
679                    // not needed at startup and additionally getSearchManager may return null
680                    Iterator<String> itIdxNames = OpenCms.getSearchManager().getIndexNames().iterator();
681                    while (itIdxNames.hasNext()) {
682                        if (itIdxNames.next().equals(name)) {
683                            throw new CmsIllegalArgumentException(
684                                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
685                        }
686                    }
687                }
688            }
689        }
690        m_name = name;
691    }
692
693    /**
694     * Set the path to the index/core. This can either be the path to the directory where the
695     * index is stored or the URL where the index/core is reached.
696     * @param path to the index/core.
697     */
698    public void setPath(String path) {
699
700        m_path = path;
701    }
702
703    /**
704     * @see org.opencms.search.I_CmsSearchIndex#setProject(java.lang.String)
705     */
706    public void setProject(String project) {
707
708        m_project = project;
709    }
710
711    /**
712     * @see org.opencms.search.I_CmsSearchIndex#setRebuildMode(java.lang.String)
713     */
714    public void setRebuildMode(String rebuildMode) {
715
716        m_rebuild = rebuildMode;
717
718    }
719
720    /**
721     * @see org.opencms.search.I_CmsSearchIndex#shutDown()
722     */
723    public void shutDown() {
724
725        // close the index writer
726        if (m_indexWriter != null) {
727            try {
728                m_indexWriter.commit();
729                m_indexWriter.close();
730            } catch (IOException e) {
731                LOG.error(
732                    Messages.get().getBundle().key(Messages.LOG_IO_INDEX_WRITER_CLOSE_2, getPath(), getName()),
733                    e);
734            }
735        }
736    }
737
738    /**
739     * Creates a new index writer.<p>
740     *
741     * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
742     * @param report the report
743     *
744     * @return the created new index writer
745     *
746     * @throws CmsIndexException in case the writer could not be created
747     *
748     * @see #getIndexWriter(I_CmsReport, boolean)
749     */
750    protected abstract I_CmsIndexWriter createIndexWriter(boolean create, I_CmsReport report) throws CmsIndexException;
751
752    /**
753     * Checks if the given resource should be indexed by this index or not.<p>
754     *
755     * @param res the resource candidate
756     *
757     * @return <code>true</code> if the given resource should be indexed or <code>false</code> if not
758     */
759    protected boolean isIndexing(CmsResource res) {
760
761        // NOTE: This method checks also if the resource is on a path that should be indexed.
762        return getDocumentFactory(res) != null;
763    }
764
765    /**
766     * Sets a flag, indicating if the index should extract content.
767     * @param extract a flag, indicating if the index should extract content.
768     */
769    protected void setExtractContent(boolean extract) {
770
771        m_extractContent = extract;
772    }
773
774    /**
775     * Sets the index writer.<p>
776     *
777     * @param writer the index writer to set
778     */
779    protected void setIndexWriter(I_CmsIndexWriter writer) {
780
781        m_indexWriter = writer;
782    }
783
784}