001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
031import org.opencms.configuration.CmsConfigurationException;
032import org.opencms.db.CmsDriverManager;
033import org.opencms.db.CmsPublishedResource;
034import org.opencms.db.CmsResourceState;
035import org.opencms.file.CmsObject;
036import org.opencms.file.CmsProject;
037import org.opencms.file.CmsResource;
038import org.opencms.file.CmsResourceFilter;
039import org.opencms.file.CmsUser;
040import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
041import org.opencms.file.types.CmsResourceTypeXmlContent;
042import org.opencms.file.types.I_CmsResourceType;
043import org.opencms.i18n.CmsLocaleManager;
044import org.opencms.i18n.CmsMessageContainer;
045import org.opencms.loader.CmsLoaderException;
046import org.opencms.loader.CmsResourceManager;
047import org.opencms.main.CmsBroadcast.ContentMode;
048import org.opencms.main.CmsEvent;
049import org.opencms.main.CmsException;
050import org.opencms.main.CmsIllegalArgumentException;
051import org.opencms.main.CmsIllegalStateException;
052import org.opencms.main.CmsLog;
053import org.opencms.main.I_CmsEventListener;
054import org.opencms.main.OpenCms;
055import org.opencms.main.OpenCmsSolrHandler;
056import org.opencms.relations.CmsRelation;
057import org.opencms.relations.CmsRelationFilter;
058import org.opencms.relations.CmsRelationType;
059import org.opencms.report.CmsLogReport;
060import org.opencms.report.CmsShellLogReport;
061import org.opencms.report.I_CmsReport;
062import org.opencms.scheduler.I_CmsScheduledJob;
063import org.opencms.search.documents.A_CmsVfsDocument;
064import org.opencms.search.documents.CmsExtractionResultCache;
065import org.opencms.search.documents.I_CmsDocumentFactory;
066import org.opencms.search.documents.I_CmsTermHighlighter;
067import org.opencms.search.fields.CmsLuceneField;
068import org.opencms.search.fields.CmsLuceneFieldConfiguration;
069import org.opencms.search.fields.CmsSearchField;
070import org.opencms.search.fields.CmsSearchFieldConfiguration;
071import org.opencms.search.fields.CmsSearchFieldMapping;
072import org.opencms.search.fields.I_CmsSearchFieldConfiguration;
073import org.opencms.search.solr.CmsSolrConfiguration;
074import org.opencms.search.solr.CmsSolrFieldConfiguration;
075import org.opencms.search.solr.CmsSolrIndex;
076import org.opencms.search.solr.I_CmsSolrIndexWriter;
077import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
078import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer;
079import org.opencms.security.CmsRole;
080import org.opencms.security.CmsRoleViolationException;
081import org.opencms.util.A_CmsModeStringEnumeration;
082import org.opencms.util.CmsFileUtil;
083import org.opencms.util.CmsStringUtil;
084import org.opencms.util.CmsUUID;
085import org.opencms.util.CmsWaitHandle;
086
087import java.io.File;
088import java.io.IOException;
089import java.nio.file.FileSystems;
090import java.nio.file.Paths;
091import java.util.ArrayList;
092import java.util.Collection;
093import java.util.Collections;
094import java.util.HashMap;
095import java.util.HashSet;
096import java.util.Iterator;
097import java.util.LinkedHashMap;
098import java.util.List;
099import java.util.ListIterator;
100import java.util.Locale;
101import java.util.Map;
102import java.util.Set;
103import java.util.TreeMap;
104import java.util.concurrent.locks.ReentrantLock;
105import java.util.stream.Collectors;
106
107import org.apache.commons.logging.Log;
108import org.apache.lucene.analysis.Analyzer;
109import org.apache.lucene.analysis.CharArraySet;
110import org.apache.lucene.analysis.standard.StandardAnalyzer;
111import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
112import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
113import org.apache.solr.core.CoreContainer;
114import org.apache.solr.core.CoreDescriptor;
115import org.apache.solr.core.SolrCore;
116
117/**
118 * Implements the general management and configuration of the search and
119 * indexing facilities in OpenCms.<p>
120 *
121 * @since 6.0.0
122 */
123public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener {
124
125    /**
126     *  Enumeration class for force unlock types.<p>
127     */
128    public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration {
129
130        /** Force unlock type "always". */
131        public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always");
132
133        /** Force unlock type "never". */
134        public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never");
135
136        /** Force unlock type "only full". */
137        public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull");
138
139        /** Serializable version id. */
140        private static final long serialVersionUID = 74746076708908673L;
141
142        /**
143         * Creates a new force unlock type with the given name.<p>
144         *
145         * @param mode the mode id to use
146         */
147        protected CmsSearchForceUnlockMode(String mode) {
148
149            super(mode);
150        }
151
152        /**
153         * Returns the lock type for the given type value.<p>
154         *
155         * @param type the type value to get the lock type for
156         *
157         * @return the lock type for the given type value
158         */
159        public static CmsSearchForceUnlockMode valueOf(String type) {
160
161            if (type.equals(ALWAYS.toString())) {
162                return ALWAYS;
163            } else if (type.equals(NEVER.toString())) {
164                return NEVER;
165            } else {
166                return ONLYFULL;
167            }
168        }
169    }
170
171    /**
172     * Handles offline index generation.<p>
173     */
174    protected class CmsSearchOfflineHandler implements I_CmsEventListener {
175
176        /** Indicates if the event handlers for the offline search have been already registered. */
177        private boolean m_isEventRegistered;
178
179        /** The list of resources to index. */
180        private List<CmsPublishedResource> m_resourcesToIndex;
181
182        /**
183         * Initializes the offline index handler.<p>
184         */
185        protected CmsSearchOfflineHandler() {
186
187            m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
188        }
189
190        /**
191         * Implements the event listener of this class.<p>
192         *
193         * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
194         */
195        @SuppressWarnings("unchecked")
196        public void cmsEvent(CmsEvent event) {
197
198            Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
199            switch (event.getType()) {
200                case I_CmsEventListener.EVENT_PROPERTY_MODIFIED:
201                case I_CmsEventListener.EVENT_RESOURCE_CREATED:
202                case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED:
203                case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
204                    if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
205                        // skip lock & unlock
206                        return;
207                    }
208                    // skip indexing if flag is set in event
209                    Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX);
210                    if (skip != null) {
211                        return;
212                    }
213
214                    // a resource has been modified - offline indexes require (re)indexing
215                    List<CmsResource> resources = Collections.singletonList(
216                        (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE));
217                    reIndexResources(resources);
218                    break;
219                case I_CmsEventListener.EVENT_RESOURCE_DELETED:
220                    List<CmsResource> eventResources = (List<CmsResource>)event.getData().get(
221                        I_CmsEventListener.KEY_RESOURCES);
222                    List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources);
223                    for (CmsResource res : resourcesToDelete) {
224                        if (res.getState().isNew()) {
225                            // if the resource is new and a delete action was performed
226                            // --> set the state of the resource to deleted
227                            res.setState(CmsResourceState.STATE_DELETED);
228                        }
229                    }
230                    reIndexResources(resourcesToDelete);
231                    break;
232                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED:
233                    if (I_CmsEventListener.VALUE_CREATE_SIBLING.equals(change)) {
234                        List<CmsResource> resList = (List<CmsResource>)event.getData().get(
235                            I_CmsEventListener.KEY_RESOURCES);
236                        if ((resList != null) && (resList.size() >= 3)) {
237                            System.out.println("Sibling creation case, resource = " + resList.get(1).getRootPath());
238                            reIndexResources(Collections.singletonList(resList.get(1)));
239
240                        }
241                    } else {
242                        reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
243                    }
244                    break;
245                case I_CmsEventListener.EVENT_RESOURCE_MOVED:
246                case I_CmsEventListener.EVENT_RESOURCE_COPIED:
247                case I_CmsEventListener.EVENT_RESOURCES_MODIFIED:
248
249                    // a list of resources has been modified - offline indexes require (re)indexing
250                    reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
251                    break;
252                default:
253                    // no operation
254            }
255        }
256
257        /**
258         * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p>
259         *
260         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed
261         */
262        protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) {
263
264            m_resourcesToIndex.addAll(resourcesToIndex);
265        }
266
267        /**
268         * Returns the list of {@link CmsPublishedResource} objects to index.<p>
269         *
270         * @return the resources to index
271         */
272        protected List<CmsPublishedResource> getResourcesToIndex() {
273
274            List<CmsPublishedResource> result;
275            synchronized (this) {
276                result = m_resourcesToIndex;
277                m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
278            }
279            try {
280                CmsObject cms = m_adminCms;
281                CmsProject offline = getOfflineIndexProject();
282                if (offline != null) {
283                    // switch to the offline project if available
284                    cms = OpenCms.initCmsObject(m_adminCms);
285                    cms.getRequestContext().setCurrentProject(offline);
286                }
287                addAdditionallyAffectedResources(cms, result);
288            } catch (CmsException e) {
289                LOG.error(e.getLocalizedMessage(), e);
290            }
291            return result;
292        }
293
294        /**
295         * Initializes this offline search handler, registering the event handlers if required.<p>
296         */
297        protected void initialize() {
298
299            if (m_offlineIndexes.size() > 0) {
300                // there is at least one offline index configured
301                if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) {
302                    // create the offline indexing thread
303                    m_offlineIndexThread = new CmsSearchOfflineIndexThread(this);
304                    // start the offline index thread
305                    m_offlineIndexThread.start();
306                }
307            } else {
308                if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
309                    // no offline indexes but thread still running, stop the thread
310                    m_offlineIndexThread.shutDown();
311                    m_offlineIndexThread = null;
312                }
313            }
314            // do this only in case there are offline indexes configured
315            if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) {
316                m_isEventRegistered = true;
317                // register this object as event listener
318                OpenCms.addCmsEventListener(
319                    this,
320                    new int[] {
321                        I_CmsEventListener.EVENT_PROPERTY_MODIFIED,
322                        I_CmsEventListener.EVENT_RESOURCE_CREATED,
323                        I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED,
324                        I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
325                        I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED,
326                        I_CmsEventListener.EVENT_RESOURCE_MOVED,
327                        I_CmsEventListener.EVENT_RESOURCE_DELETED,
328                        I_CmsEventListener.EVENT_RESOURCE_COPIED,
329                        I_CmsEventListener.EVENT_RESOURCES_MODIFIED});
330            }
331        }
332
333        /**
334         * Updates all offline indexes for the given list of {@link CmsResource} objects.<p>
335         *
336         * @param resources a list of {@link CmsResource} objects to update in the offline indexes
337         */
338        protected synchronized void reIndexResources(List<CmsResource> resources) {
339
340            List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size());
341            for (CmsResource res : resources) {
342                CmsPublishedResource pubRes = new CmsPublishedResource(res);
343                resourcesToIndex.add(pubRes);
344            }
345            if (resourcesToIndex.size() > 0) {
346                // add the resources found to the offline index thread
347                addResourcesToIndex(resourcesToIndex);
348            }
349        }
350    }
351
352    /**
353     * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p>
354     */
355    protected class CmsSearchOfflineIndexThread extends Thread {
356
357        /** The event handler that triggers this thread. */
358        CmsSearchOfflineHandler m_handler;
359
360        /** Indicates if this thread is still alive. */
361        boolean m_isAlive;
362
363        /** Indicates that an index update thread is currently running. */
364        private boolean m_isUpdating;
365
366        /** If true a manual update (after file upload) was triggered. */
367        private boolean m_updateTriggered;
368
369        /** The wait handle used for signalling when the worker thread has finished. */
370        private CmsWaitHandle m_waitHandle = new CmsWaitHandle();
371
372        /**
373         * Constructor.<p>
374         *
375         * @param handler the offline index event handler
376         */
377        protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) {
378
379            super("OpenCms: Offline Search Indexer");
380            m_handler = handler;
381        }
382
383        /**
384         * Gets the wait handle used for signalling when the worker thread has finished.
385         *
386         * @return the wait handle
387         **/
388        public CmsWaitHandle getWaitHandle() {
389
390            return m_waitHandle;
391        }
392
393        /**
394         * @see java.lang.Thread#interrupt()
395         */
396        @Override
397        public void interrupt() {
398
399            super.interrupt();
400            m_updateTriggered = true;
401        }
402
403        /**
404         * @see java.lang.Thread#run()
405         */
406        @Override
407        public void run() {
408
409            // create a log report for the output
410            I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class);
411            long offlineUpdateFrequency = getOfflineUpdateFrequency();
412            m_updateTriggered = false;
413            try {
414                while (m_isAlive) {
415                    if (!m_updateTriggered) {
416                        try {
417                            sleep(offlineUpdateFrequency);
418                        } catch (InterruptedException e) {
419                            // continue the thread after interruption
420                            if (!m_isAlive) {
421                                // the thread has been shut down while sleeping
422                                continue;
423                            }
424                            if (offlineUpdateFrequency != getOfflineUpdateFrequency()) {
425                                // offline update frequency change - clear interrupt status
426                                offlineUpdateFrequency = getOfflineUpdateFrequency();
427                            }
428                            LOG.info(e.getLocalizedMessage(), e);
429                        }
430                    }
431                    if (m_isAlive) {
432                        // set update trigger to false since we do the update now
433                        m_updateTriggered = false;
434                        // get list of resource to update
435                        List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex();
436                        if (resourcesToIndex.size() > 0) {
437                            // only start indexing if there is at least one resource
438                            startOfflineUpdateThread(report, resourcesToIndex);
439                        } else {
440                            getWaitHandle().release();
441                        }
442                        // this is just called to clear the interrupt status of the thread
443                        interrupted();
444                    }
445                }
446            } finally {
447                // make sure that live status is reset in case of Exceptions
448                m_isAlive = false;
449            }
450
451        }
452
453        /**
454         * @see java.lang.Thread#start()
455         */
456        @Override
457        public synchronized void start() {
458
459            m_isAlive = true;
460            super.start();
461        }
462
463        /**
464         * Obtains the list of resource to update in the offline index,
465         * then optimizes the list by removing duplicate entries.<p>
466         *
467         * @return the list of resource to update in the offline index
468         */
469        protected List<CmsPublishedResource> getResourcesToIndex() {
470
471            List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex();
472            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size());
473
474            // Reverse to always keep the last list entries
475            Collections.reverse(resourcesToIndex);
476            for (CmsPublishedResource pubRes : resourcesToIndex) {
477                boolean addResource = true;
478                for (CmsPublishedResource resRes : result) {
479                    if (pubRes.equals(resRes)
480                        && (pubRes.getState() == resRes.getState())
481                        && (pubRes.getMovedState() == resRes.getMovedState())
482                        && pubRes.getRootPath().equals(resRes.getRootPath())) {
483                        // resource already in the update list
484                        addResource = false;
485                        break;
486                    }
487                }
488                if (addResource) {
489                    result.add(pubRes);
490                }
491
492            }
493            Collections.reverse(result);
494            return changeStateOfMoveOriginsToDeleted(result);
495        }
496
497        /**
498         * Shuts down this offline index thread.<p>
499         */
500        protected void shutDown() {
501
502            m_isAlive = false;
503            interrupt();
504            if (m_isUpdating) {
505                long waitTime = getOfflineUpdateFrequency() / 2;
506                int waitSteps = 0;
507                do {
508                    try {
509                        // wait half the time of the offline index frequency for the thread to finish
510                        Thread.sleep(waitTime);
511                    } catch (InterruptedException e) {
512                        // continue
513                        LOG.info(e.getLocalizedMessage(), e);
514                    }
515                    waitSteps++;
516                    // wait 5 times then stop waiting
517                } while ((waitSteps < 5) && m_isUpdating);
518            }
519        }
520
521        /**
522         * Updates the offline search indexes for the given list of resources.<p>
523         *
524         * @param report the report to write the index information to
525         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
526         */
527        protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
528
529            CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex);
530            long startTime = System.currentTimeMillis();
531            long waitTime = getOfflineUpdateFrequency() / 2;
532            if (LOG.isDebugEnabled()) {
533                LOG.debug(
534                    Messages.get().getBundle().key(
535                        Messages.LOG_OI_UPDATE_START_1,
536                        Integer.valueOf(resourcesToIndex.size())));
537            }
538
539            m_isUpdating = true;
540            thread.start();
541
542            do {
543                try {
544                    // wait half the time of the offline index frequency for the thread to finish
545                    thread.join(waitTime);
546                } catch (InterruptedException e) {
547                    // continue
548                    LOG.info(e.getLocalizedMessage(), e);
549                }
550                if (thread.isAlive()) {
551                    LOG.warn(
552                        Messages.get().getBundle().key(
553                            Messages.LOG_OI_UPDATE_LONG_2,
554                            Integer.valueOf(resourcesToIndex.size()),
555                            Long.valueOf(System.currentTimeMillis() - startTime)));
556                }
557            } while (thread.isAlive());
558            m_isUpdating = false;
559
560            if (LOG.isDebugEnabled()) {
561                LOG.debug(
562                    Messages.get().getBundle().key(
563                        Messages.LOG_OI_UPDATE_FINISH_2,
564                        Integer.valueOf(resourcesToIndex.size()),
565                        Long.valueOf(System.currentTimeMillis() - startTime)));
566            }
567        }
568
569        /**
570         * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'.
571         * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index,
572         *
573         * @param resourcesToIndex the resources to index
574         *
575         * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths
576         */
577        private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted(
578            List<CmsPublishedResource> resourcesToIndex) {
579
580            Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>();
581            for (CmsPublishedResource resource : resourcesToIndex) {
582                if (resource.getState().isDeleted()) {
583                    // we don't want the last path to be from a deleted resource
584                    continue;
585                }
586                lastValidPaths.put(resource.getStructureId(), resource.getRootPath());
587            }
588            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>();
589            for (CmsPublishedResource resource : resourcesToIndex) {
590                if (resource.getState().isDeleted()) {
591                    result.add(resource);
592                    continue;
593                }
594                String lastValidPath = lastValidPaths.get(resource.getStructureId());
595                if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) {
596                    result.add(resource);
597                } else {
598                    result.add(
599                        new CmsPublishedResource(
600                            resource.getStructureId(),
601                            resource.getResourceId(),
602                            resource.getPublishTag(),
603                            resource.getRootPath(),
604                            resource.getType(),
605                            resource.isFolder(),
606                            CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted
607                            resource.getSiblingCount()));
608                }
609            }
610            return result;
611        }
612    }
613
614    /**
615     * An offline index worker Thread runs each time for every offline index update action.<p>
616     *
617     * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid
618     * problems if a single operation "hangs" the Tread.<p>
619     */
620    protected class CmsSearchOfflineIndexWorkThread extends Thread {
621
622        /** The report to write the index information to. */
623        I_CmsReport m_report;
624
625        /** The list of {@link CmsPublishedResource} objects to index. */
626        List<CmsPublishedResource> m_resourcesToIndex;
627
628        /**
629         * Updates the offline search indexes for the given list of resources.<p>
630         *
631         * @param report the report to write the index information to
632         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
633         */
634        protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
635
636            super("OpenCms: Offline Search Index Worker");
637            m_report = report;
638            m_resourcesToIndex = resourcesToIndex;
639        }
640
641        /**
642         * @see java.lang.Thread#run()
643         */
644        @Override
645        public void run() {
646
647            updateIndexOffline(m_report, m_resourcesToIndex);
648            if (m_offlineIndexThread != null) {
649                m_offlineIndexThread.getWaitHandle().release();
650            }
651        }
652    }
653
654    /** This needs to be a fair lock to preserve order of threads accessing the search manager. */
655    private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true);
656
657    /** The default value used for generating search result excerpts (1024 chars). */
658    public static final int DEFAULT_EXCERPT_LENGTH = 1024;
659
660    /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
661    public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
662
663    /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */
664    public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500;
665
666    /** The default update frequency for offline indexes (15000 msec = 15 sec). */
667    public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000;
668
669    /** The default maximal wait time for re-indexing after editing a content. */
670    public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000;
671
672    /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
673    public static final int DEFAULT_TIMEOUT = 60000;
674
675    /** Scheduler parameter: Update only a specified list of indexes. */
676    public static final String JOB_PARAM_INDEXLIST = "indexList";
677
678    /** Scheduler parameter: Write the output of the update to the logfile. */
679    public static final String JOB_PARAM_WRITELOG = "writeLog";
680
681    /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */
682    public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core.";
683
684    /** The log object for this class. */
685    protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class);
686
687    /** List of resource types which represent groups of elements. */
688    private static final String[] groupTypes = {
689        CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME,
690        CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME,
691        CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME};
692
693    /** The administrator OpenCms user context to access OpenCms VFS resources. */
694    protected CmsObject m_adminCms;
695
696    /** The list of indexes that are configured for offline index mode. */
697    protected List<I_CmsSearchIndex> m_offlineIndexes;
698
699    /** The thread used of offline indexing. */
700    protected CmsSearchOfflineIndexThread m_offlineIndexThread;
701
702    /** Configured analyzers for languages using &lt;analyzer&gt;. */
703    private HashMap<Locale, CmsSearchAnalyzer> m_analyzers;
704
705    /** Stores the offline update frequency while indexing is paused. */
706    private long m_configuredOfflineIndexingFrequency;
707
708    /** The Solr core container. */
709    private CoreContainer m_coreContainer;
710
711    /** A map of document factory configurations. */
712    private List<CmsSearchDocumentType> m_documentTypeConfigs;
713
714    /** A map of document factories keyed first by their name and then by their extraction keys. */
715    private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes;
716
717    /** The set of all globally available extraction keys for document factories. */
718    private Set<String> m_extractionKeys;
719
720    /** The max age for extraction results to remain in the cache. */
721    private float m_extractionCacheMaxAge;
722
723    /** The cache for the extraction results. */
724    private CmsExtractionResultCache m_extractionResultCache;
725
726    /** Contains the available field configurations. */
727    private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations;
728
729    /** The force unlock type. */
730    private CmsSearchForceUnlockMode m_forceUnlockMode;
731
732    /** The class used to highlight the search terms in the excerpt of a search result. */
733    private I_CmsTermHighlighter m_highlighter;
734
735    /** A list of search indexes. */
736    private List<I_CmsSearchIndex> m_indexes;
737
738    /** Seconds to wait for an index lock. */
739    private int m_indexLockMaxWaitSeconds = 10;
740
741    /** Configured index sources. */
742    private Map<String, CmsSearchIndexSource> m_indexSources;
743
744    /** The max. char. length of the excerpt in the search result. */
745    private int m_maxExcerptLength;
746
747    /** The maximum number of modifications before a commit in the search index is triggered. */
748    private int m_maxModificationsBeforeCommit;
749
750    /** The offline index search handler. */
751    private CmsSearchOfflineHandler m_offlineHandler;
752
753    /** The update frequency of the offline indexer in milliseconds. */
754    private long m_offlineUpdateFrequency;
755
756    /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */
757    private long m_maxIndexWaitTime;
758
759    /** Path to index files below WEB-INF/. */
760    private String m_path;
761
762    /** The Solr configuration. */
763    private CmsSolrConfiguration m_solrConfig;
764
765    /** Timeout for abandoning indexing thread. */
766    private long m_timeout;
767
768    /**
769     * Default constructor when called as cron job.<p>
770     */
771    public CmsSearchManager() {
772
773        m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>();
774        m_extractionKeys = new HashSet<String>();
775        m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>();
776        m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>();
777        m_indexes = new ArrayList<I_CmsSearchIndex>();
778        m_indexSources = new TreeMap<String, CmsSearchIndexSource>();
779        m_offlineHandler = new CmsSearchOfflineHandler();
780        m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
781        m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
782        m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY;
783        m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME;
784        m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT;
785
786        m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>();
787        // make sure we have a "standard" field configuration
788        addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD);
789
790        if (CmsLog.INIT.isInfoEnabled()) {
791            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0));
792        }
793    }
794
795    /**
796     * Returns an analyzer for the given class name.<p>
797     *
798     * @param className the class name of the analyzer
799     *
800     * @return the appropriate lucene analyzer
801     *
802     * @throws Exception if something goes wrong
803     */
804    public static Analyzer getAnalyzer(String className) throws Exception {
805
806        Analyzer analyzer = null;
807        Class<?> analyzerClass;
808        try {
809            analyzerClass = Class.forName(className);
810        } catch (ClassNotFoundException e) {
811            // allow Lucene standard classes to be written in a short form
812            analyzerClass = Class.forName(LUCENE_ANALYZER + className);
813        }
814
815        // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor
816        if (StandardAnalyzer.class.equals(analyzerClass)) {
817            // the Lucene standard analyzer is used - but without any stopwords.
818            analyzer = new StandardAnalyzer(new CharArraySet(0, false));
819        } else {
820            analyzer = (Analyzer)analyzerClass.newInstance();
821        }
822        return analyzer;
823    }
824
825    /**
826     * Returns the Solr index configured with the parameters name.
827     * The parameters must contain a key/value pair with an existing
828     * Solr index, otherwise <code>null</code> is returned.<p>
829     *
830     * @param cms the current context
831     * @param params the parameter map
832     *
833     * @return the best matching Solr index
834     */
835    public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) {
836
837        String indexName = null;
838        CmsSolrIndex index = null;
839        // try to get the index name from the parameters: 'core' or 'index'
840        if (params != null) {
841            indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null
842            ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0]
843            : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null
844            ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0]
845            : null);
846        }
847        if (indexName == null) {
848            // if no parameter is specified try to use the default online/offline indexes by context
849            indexName = cms.getRequestContext().getCurrentProject().isOnlineProject()
850            ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE
851            : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE;
852        }
853        // try to get the index
854        index = OpenCms.getSearchManager().getIndexSolr(indexName);
855        if (index == null) {
856            // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice.
857            List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes();
858            if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) {
859                index = solrs.get(0);
860            }
861        }
862        return index;
863    }
864
865    /**
866     * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p>
867     *
868     * @param indexName the name of the index to check
869     *
870     * @return <code>true</code> if the index for the given name is a Lucene index
871     */
872    public static boolean isLuceneIndex(String indexName) {
873
874        I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName);
875        return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex));
876    }
877
878    /**
879     * Adds an analyzer.<p>
880     *
881     * @param analyzer an analyzer
882     */
883    public void addAnalyzer(CmsSearchAnalyzer analyzer) {
884
885        m_analyzers.put(analyzer.getLocale(), analyzer);
886
887        if (CmsLog.INIT.isInfoEnabled()) {
888            CmsLog.INIT.info(
889                Messages.get().getBundle().key(
890                    Messages.INIT_ADD_ANALYZER_2,
891                    analyzer.getLocale(),
892                    analyzer.getClassName()));
893        }
894    }
895
896    /**
897     * Adds a document type.<p>
898     *
899     * @param documentType a document type
900     */
901    public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
902
903        m_documentTypeConfigs.add(documentType);
904
905        if (CmsLog.INIT.isInfoEnabled()) {
906            CmsLog.INIT.info(
907                Messages.get().getBundle().key(
908                    Messages.INIT_SEARCH_DOC_TYPES_2,
909                    documentType.getName(),
910                    documentType.getClassName()));
911        }
912    }
913
914    /**
915     * Adds a search field configuration to the search manager.<p>
916     *
917     * @param fieldConfiguration the search field configuration to add
918     */
919    public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) {
920
921        m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration);
922    }
923
924    /**
925     * Adds a search index to the configuration.<p>
926     *
927     * @param searchIndex the search index to add
928     */
929    public void addSearchIndex(I_CmsSearchIndex searchIndex) {
930
931        if (!searchIndex.isInitialized()) {
932            if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
933                try {
934                    searchIndex.initialize();
935                } catch (CmsException e) {
936                    // should never happen
937                    LOG.error(e.getMessage(), e);
938                }
939            }
940        }
941
942        // name: not null or emtpy and unique
943        String name = searchIndex.getName();
944        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
945            throw new CmsIllegalArgumentException(
946                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
947        }
948        if (m_indexSources.keySet().contains(name)) {
949            throw new CmsIllegalArgumentException(
950                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
951        }
952
953        m_indexes.add(searchIndex);
954        if (m_adminCms != null) {
955            initOfflineIndexes();
956        }
957
958        if (CmsLog.INIT.isInfoEnabled()) {
959            CmsLog.INIT.info(
960                Messages.get().getBundle().key(
961                    Messages.INIT_ADD_SEARCH_INDEX_2,
962                    searchIndex.getName(),
963                    searchIndex.getProject()));
964        }
965    }
966
967    /**
968     * Adds a search index source configuration.<p>
969     *
970     * @param searchIndexSource a search index source configuration
971     */
972    public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) {
973
974        m_indexSources.put(searchIndexSource.getName(), searchIndexSource);
975
976        if (CmsLog.INIT.isInfoEnabled()) {
977            CmsLog.INIT.info(
978                Messages.get().getBundle().key(
979                    Messages.INIT_SEARCH_INDEX_SOURCE_2,
980                    searchIndexSource.getName(),
981                    searchIndexSource.getIndexerClassName()));
982        }
983    }
984
985    /**
986     * Implements the event listener of this class.<p>
987     *
988     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
989     */
990    public void cmsEvent(CmsEvent event) {
991
992        switch (event.getType()) {
993            case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
994                List<String> indexNames = null;
995                if ((event.getData() != null)
996                    && CmsStringUtil.isNotEmptyOrWhitespaceOnly(
997                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) {
998                    indexNames = CmsStringUtil.splitAsList(
999                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES),
1000                        ",",
1001                        true);
1002                }
1003                try {
1004                    if (LOG.isDebugEnabled()) {
1005                        LOG.debug(
1006                            Messages.get().getBundle().key(
1007                                Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
1008                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1009                            new Exception());
1010                    }
1011                    if (indexNames == null) {
1012                        rebuildAllIndexes(getEventReport(event));
1013                    } else {
1014                        rebuildIndexes(indexNames, getEventReport(event));
1015                    }
1016                } catch (CmsException e) {
1017                    if (LOG.isErrorEnabled()) {
1018                        LOG.error(
1019                            Messages.get().getBundle().key(
1020                                Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
1021                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1022                            e);
1023                    }
1024                }
1025                break;
1026            case I_CmsEventListener.EVENT_CLEAR_CACHES:
1027                if (LOG.isDebugEnabled()) {
1028                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception());
1029                }
1030                break;
1031            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
1032                // event data contains a list of the published resources
1033                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
1034                if (LOG.isDebugEnabled()) {
1035                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId));
1036                }
1037                updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event));
1038                if (LOG.isDebugEnabled()) {
1039                    LOG.debug(
1040                        Messages.get().getBundle().key(
1041                            Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
1042                            publishHistoryId));
1043                }
1044                break;
1045            case I_CmsEventListener.EVENT_REINDEX_OFFLINE:
1046            case I_CmsEventListener.EVENT_REINDEX_ONLINE:
1047                boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType();
1048                Map<String, Object> eventData = event.getData();
1049                CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID);
1050                CmsUser user = null;
1051                if (userId != null) {
1052                    try {
1053                        user = m_adminCms.readUser(userId);
1054                    } catch (Throwable t) {
1055                        // should not normally happen
1056                        LOG.debug(t.getMessage(), t);
1057                    }
1058                }
1059                try {
1060                    SEARCH_MANAGER_LOCK.lock();
1061                    if (LOG.isDebugEnabled()) {
1062                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0));
1063                    }
1064                    CmsObject cms = m_adminCms;
1065                    if (!isOnline) {
1066                        OpenCms.initCmsObject(m_adminCms);
1067                        cms.getRequestContext().setCurrentProject(
1068                            cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID)));
1069                    }
1070                    @SuppressWarnings("unchecked")
1071                    List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES);
1072                    I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT);
1073                    List<CmsResource> resourcesToIndex = new ArrayList<>();
1074                    for (CmsResource res : resources) {
1075                        if (res.isFile()) {
1076                            resourcesToIndex.add(res);
1077                        } else {
1078                            try {
1079                                resourcesToIndex.addAll(
1080                                    cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true));
1081                            } catch (CmsException e) {
1082                                LOG.error(e, e);
1083                            }
1084                        }
1085                    }
1086                    // we reindex and prevent using cached results
1087                    cleanExtractionCache();
1088                    List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map(
1089                        res -> new CmsPublishedResource(res)).collect(Collectors.toList());
1090                    if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) {
1091                        addAdditionallyAffectedResources(cms, publishedResourcesToIndex);
1092                    }
1093                    if (isOnline) {
1094                        updateAllIndexes(
1095                            m_adminCms,
1096                            publishedResourcesToIndex,
1097                            new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE));
1098                    } else {
1099                        updateIndexOffline(report, publishedResourcesToIndex);
1100                    }
1101                    cms = null;
1102                    SEARCH_MANAGER_LOCK.unlock();
1103                    if (null != user) {
1104                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1105                        OpenCms.getSessionManager().sendBroadcast(
1106                            null,
1107                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0),
1108                            user,
1109                            ContentMode.html);
1110                    }
1111                    if (LOG.isDebugEnabled()) {
1112                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0));
1113                    }
1114
1115                } catch (Throwable e) {
1116                    if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) {
1117                        SEARCH_MANAGER_LOCK.unlock();
1118                    }
1119                    if (null != user) {
1120                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1121                        OpenCms.getSessionManager().sendBroadcast(
1122                            null,
1123                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0),
1124                            user,
1125                            ContentMode.html);
1126                    }
1127                    if (LOG.isDebugEnabled()) {
1128                        LOG.error(
1129                            Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()),
1130                            e);
1131                    } else if (LOG.isErrorEnabled()) {
1132                        LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()));
1133                    }
1134                }
1135                break;
1136            default:
1137                // no operation
1138        }
1139    }
1140
1141    /**
1142     * Returns all Solr index.<p>
1143     *
1144     * @return all Solr indexes
1145     */
1146    public List<CmsSolrIndex> getAllSolrIndexes() {
1147
1148        List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>();
1149        for (String indexName : getIndexNames()) {
1150            CmsSolrIndex index = getIndexSolr(indexName);
1151            if (index != null) {
1152                result.add(index);
1153            }
1154        }
1155        return result;
1156    }
1157
1158    /**
1159     * Returns an analyzer for the given language.<p>
1160     *
1161     * The analyzer is selected according to the analyzer configuration.<p>
1162     *
1163     * @param locale the locale to get the analyzer for
1164     * @return the appropriate lucene analyzer
1165     *
1166     * @throws CmsSearchException if something goes wrong
1167     */
1168    public Analyzer getAnalyzer(Locale locale) throws CmsSearchException {
1169
1170        Analyzer analyzer = null;
1171        String className = null;
1172
1173        CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale);
1174        if (analyzerConf == null) {
1175            throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1176        }
1177
1178        try {
1179            analyzer = getAnalyzer(analyzerConf.getClassName());
1180        } catch (Exception e) {
1181            throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e);
1182        }
1183
1184        return analyzer;
1185    }
1186
1187    /**
1188     * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
1189     *
1190     * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
1191     *
1192     * @return an unmodifiable view of the Analyzers Map
1193     */
1194    public Map<Locale, CmsSearchAnalyzer> getAnalyzers() {
1195
1196        return Collections.unmodifiableMap(m_analyzers);
1197    }
1198
1199    /**
1200     * Returns the search analyzer for the given locale.<p>
1201     *
1202     * @param locale the locale to get the analyzer for
1203     *
1204     * @return the search analyzer for the given locale
1205     */
1206    public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
1207
1208        return m_analyzers.get(locale);
1209    }
1210
1211    /**
1212     * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1213     *
1214     * @return the name of the directory below WEB-INF/ where the search indexes are stored
1215     */
1216    public String getDirectory() {
1217
1218        return m_path;
1219    }
1220
1221    /**
1222     * Returns the configured Solr home directory <code>null</code> if not set.<p>
1223     *
1224     * @return the Solr home directory
1225     */
1226    public String getDirectorySolr() {
1227
1228        return m_solrConfig != null ? m_solrConfig.getHome() : null;
1229    }
1230
1231    /**
1232     * Returns the document factory configured under the provided name.
1233     * @param docTypeName the name of the document type.
1234     * @return the factory for the provided name.
1235     */
1236    public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) {
1237
1238        Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName);
1239        if (factoryMap != null) {
1240            Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator();
1241            if (factoryIt.hasNext()) {
1242                return factoryMap.values().iterator().next();
1243            }
1244        }
1245        return null;
1246    }
1247
1248    /**
1249     * Returns a document type config.<p>
1250     *
1251     * @param name the name of the document type config
1252     * @return the document type config.
1253     */
1254    public CmsSearchDocumentType getDocumentTypeConfig(String name) {
1255
1256        // this is really used only for the search manager GUI,
1257        // so performance is not an issue and no lookup map is generated
1258        for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
1259            CmsSearchDocumentType type = m_documentTypeConfigs.get(i);
1260            if (type.getName().equals(name)) {
1261                return type;
1262            }
1263        }
1264        return null;
1265    }
1266
1267    /**
1268     * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
1269     *
1270     * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
1271     */
1272    public List<CmsSearchDocumentType> getDocumentTypeConfigs() {
1273
1274        return Collections.unmodifiableList(m_documentTypeConfigs);
1275    }
1276
1277    /**
1278     * Returns the document type keys used to specify the correct document factory.
1279     *
1280     * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys.
1281     *
1282     * @param resource the resource to generate the list of document type keys for.
1283     * @return the document type keys.
1284     */
1285    public List<String> getDocumentTypeKeys(CmsResource resource) {
1286
1287        // first get the MIME type of the resource
1288        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown");
1289        String resourceType = null;
1290        try {
1291            resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName();
1292        } catch (CmsLoaderException e) {
1293            // ignore, unknown resource type, resource can not be indexed
1294            LOG.info(e.getLocalizedMessage(), e);
1295        }
1296        return getDocumentTypeKeys(resourceType, mimeType);
1297    }
1298
1299    /**
1300     * Returns the document type keys used to specify the correct document factory.
1301     * One resource typically has more than one key. The document factories are matched
1302     * in the provided order and the first matching factory is used.
1303     *
1304     * The keys for type name "typename" and mimetype "mimetype" would be a subset of:
1305     * <ul>
1306     *  <li><code>typename_mimetype</code></li>
1307     *  <li><code>typename</code></li>
1308     *  <li>if <code>typename</code> is a sub-type of <code>containerpage</code>
1309     *      <ul>
1310     *          <li><code>containerpage_mimetype</code></li>
1311     *          <li><code>containerpage</code></li>
1312     *      </ul>
1313     *  </li>
1314     *  <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code>
1315     *      <ul>
1316     *          <li><code>xmlcontent_mimetype</code></li>
1317     *          <li><code>xmlcontent</code></li>
1318     *      </ul>
1319     *  </li>
1320     *  <li><code>__unconfigured___mimetype</code></li>
1321     *  <li><code>__unconfigured__</code></li>
1322     *  <li><code>__all___mimetype</code></li>
1323     *  <li><code>__all__</code></li>
1324     * <ul>
1325     * Note that all keys except the "__all__"-keys are only added as long as globally
1326     * there is no matching factory for the key.
1327     * This in particular means that a factory matching "typename" will never be used
1328     * if you have a factory for "typename__mimetype" - even if this is not configured
1329     * for the used index source. Eventually, the content will not be indexed in such cases.
1330     * @param resourceType the resource type to generate the list of document type keys for.
1331     * @param mimeType the mime type to generate the list of document type keys for.
1332     * @return the document type keys.
1333     */
1334    public List<String> getDocumentTypeKeys(String resourceType, String mimeType) {
1335
1336        List<String> result = new ArrayList<>(8);
1337        if (null != resourceType) {
1338            String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType);
1339            result.add(currentKey);
1340            if (!m_extractionKeys.contains(currentKey)) {
1341                currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null);
1342                result.add(currentKey);
1343                if (!m_extractionKeys.contains(currentKey)) {
1344                    boolean hasGlobalMatch = false;
1345                    try {
1346                        String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName();
1347                        I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType);
1348                        if (!resourceType.equals(containerpageTypeName)) {
1349                            if (type instanceof CmsResourceTypeXmlContainerPage) {
1350                                if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) {
1351                                    currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType);
1352                                    result.add(currentKey);
1353                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1354                                    if (!hasGlobalMatch) {
1355                                        currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null);
1356                                        result.add(currentKey);
1357                                        hasGlobalMatch = m_extractionKeys.contains(currentKey);
1358                                    }
1359                                }
1360                            }
1361                        }
1362                        String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName();
1363                        if (!resourceType.equals(containerpageTypeName)) {
1364                            if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) {
1365                                currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType);
1366                                result.add(currentKey);
1367                                hasGlobalMatch = m_extractionKeys.contains(currentKey);
1368                                if (!hasGlobalMatch) {
1369                                    currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null);
1370                                    result.add(currentKey);
1371                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1372                                }
1373                            }
1374                        }
1375                    } catch (Throwable t) {
1376                        LOG.warn("Could not read type for name \"" + resourceType + "\".", t);
1377                    }
1378                    if (!hasGlobalMatch) {
1379                        result.add(
1380                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType));
1381                        result.add(
1382                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null));
1383                    }
1384                }
1385            }
1386            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType));
1387            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null));
1388        }
1389        return result;
1390
1391    }
1392
1393    /**
1394     * Returns the map from document type keys to document factories with all entries for the provided document type names.
1395     * @param documentTypeNames list of document type names to generate the map for.
1396     * @return the map from document type keys to document factories.
1397     */
1398    public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) {
1399
1400        Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>();
1401        if (null != documentTypeNames) {
1402            // Iterate the list in reverse order to prefer factories that are added by document types listed earlier.
1403            ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size());
1404            while (typesIterator.hasPrevious()) {
1405                Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous());
1406                if (null != factories) {
1407                    result.putAll(factories);
1408                }
1409            }
1410        }
1411        return result;
1412    }
1413
1414    /**
1415     * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
1416     *
1417     * @return the maximum age a text extraction result is kept in the cache (in hours)
1418     */
1419    public float getExtractionCacheMaxAge() {
1420
1421        return m_extractionCacheMaxAge;
1422    }
1423
1424    /**
1425     * Returns the search field configuration with the given name.<p>
1426     *
1427     * In case no configuration is available with the given name, <code>null</code> is returned.<p>
1428     *
1429     * @param name the name to get the search field configuration for
1430     *
1431     * @return the search field configuration with the given name
1432     */
1433    public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) {
1434
1435        return m_fieldConfigurations.get(name);
1436    }
1437
1438    /**
1439     * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p>
1440     *
1441     * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries
1442     */
1443    public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() {
1444
1445        List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>(
1446            m_fieldConfigurations.values());
1447        Collections.sort(result);
1448        return Collections.unmodifiableList(result);
1449    }
1450
1451    /**
1452     * Returns the Lucene search field configurations only.<p>
1453     *
1454     * @return the Lucene search field configurations
1455     */
1456    public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() {
1457
1458        List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>();
1459        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1460            if (conf instanceof CmsLuceneFieldConfiguration) {
1461                result.add((CmsLuceneFieldConfiguration)conf);
1462            }
1463        }
1464        Collections.sort(result);
1465        return Collections.unmodifiableList(result);
1466    }
1467
1468    /**
1469     * Returns the Solr search field configurations only.<p>
1470     *
1471     * @return the Solr search field configurations
1472     */
1473    public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() {
1474
1475        List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>();
1476        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1477            if (conf instanceof CmsSolrFieldConfiguration) {
1478                result.add((CmsSolrFieldConfiguration)conf);
1479            }
1480        }
1481        Collections.sort(result);
1482        return Collections.unmodifiableList(result);
1483    }
1484
1485    /**
1486     * Returns the force unlock mode during indexing.<p>
1487     *
1488     * @return the force unlock mode during indexing
1489     */
1490    public CmsSearchForceUnlockMode getForceunlock() {
1491
1492        return m_forceUnlockMode;
1493    }
1494
1495    /**
1496     * Returns the highlighter.<p>
1497     *
1498     * @return the highlighter
1499     */
1500    public I_CmsTermHighlighter getHighlighter() {
1501
1502        return m_highlighter;
1503    }
1504
1505    /**
1506     * Returns the Lucene search index configured with the given name.<p>
1507     * The index must exist, otherwise <code>null</code> is returned.
1508     *
1509     * @param indexName then name of the requested search index
1510     *
1511     * @return the Lucene search index configured with the given name
1512     */
1513    public I_CmsSearchIndex getIndex(String indexName) {
1514
1515        for (I_CmsSearchIndex index : m_indexes) {
1516            if (indexName.equalsIgnoreCase(index.getName())) {
1517                return index;
1518            }
1519        }
1520        return null;
1521    }
1522
1523    /**
1524     * Returns the seconds to wait for an index lock during an update operation.<p>
1525     *
1526     * @return the seconds to wait for an index lock during an update operation
1527     */
1528    public int getIndexLockMaxWaitSeconds() {
1529
1530        return m_indexLockMaxWaitSeconds;
1531    }
1532
1533    /**
1534     * Returns the names of all configured indexes.<p>
1535     *
1536     * @return list of names
1537     */
1538    public List<String> getIndexNames() {
1539
1540        List<String> indexNames = new ArrayList<String>();
1541        for (int i = 0, n = m_indexes.size(); i < n; i++) {
1542            indexNames.add((m_indexes.get(i)).getName());
1543        }
1544
1545        return indexNames;
1546    }
1547
1548    /**
1549     * Returns the Solr index configured with the given name.<p>
1550     * The index must exist, otherwise <code>null</code> is returned.
1551     *
1552     * @param indexName then name of the requested Solr index
1553     * @return the Solr index configured with the given name
1554     */
1555    public CmsSolrIndex getIndexSolr(String indexName) {
1556
1557        I_CmsSearchIndex index = getIndex(indexName);
1558        if (index instanceof CmsSolrIndex) {
1559            return (CmsSolrIndex)index;
1560        }
1561        return null;
1562    }
1563
1564    /**
1565     * Returns a search index source for a specified source name.<p>
1566     *
1567     * @param sourceName the name of the index source
1568     * @return a search index source
1569     */
1570    public CmsSearchIndexSource getIndexSource(String sourceName) {
1571
1572        return m_indexSources.get(sourceName);
1573    }
1574
1575    /**
1576     * Returns the max. excerpt length.<p>
1577     *
1578     * @return the max excerpt length
1579     */
1580    public int getMaxExcerptLength() {
1581
1582        return m_maxExcerptLength;
1583    }
1584
1585    /**
1586     * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p>
1587     *
1588     * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds)
1589     */
1590    public long getMaxIndexWaitTime() {
1591
1592        return m_maxIndexWaitTime;
1593    }
1594
1595    /**
1596     * Returns the maximum number of modifications before a commit in the search index is triggered.<p>
1597     *
1598     * @return the maximum number of modifications before a commit in the search index is triggered
1599     */
1600    public int getMaxModificationsBeforeCommit() {
1601
1602        return m_maxModificationsBeforeCommit;
1603    }
1604
1605    /**
1606     * Returns the update frequency of the offline indexer in milliseconds.<p>
1607     *
1608     * @return the update frequency of the offline indexer in milliseconds
1609     */
1610    public long getOfflineUpdateFrequency() {
1611
1612        return m_offlineUpdateFrequency;
1613    }
1614
1615    /**
1616     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1617     *
1618     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1619     */
1620    public List<I_CmsSearchIndex> getSearchIndexes() {
1621
1622        return Collections.unmodifiableList(m_indexes);
1623    }
1624
1625    /**
1626     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1627     *
1628     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1629     */
1630    public List<I_CmsSearchIndex> getSearchIndexesAll() {
1631
1632        return Collections.unmodifiableList(m_indexes);
1633    }
1634
1635    /**
1636     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1637     *
1638     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1639     */
1640    public List<CmsSolrIndex> getSearchIndexesSolr() {
1641
1642        List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>();
1643        for (I_CmsSearchIndex index : m_indexes) {
1644            if (index instanceof CmsSolrIndex) {
1645                indexes.add((CmsSolrIndex)index);
1646            }
1647        }
1648        return Collections.unmodifiableList(indexes);
1649    }
1650
1651    /**
1652     * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
1653     *
1654     * @return an unmodifiable view (read-only) of the SearchIndexSources Map
1655     */
1656    public Map<String, CmsSearchIndexSource> getSearchIndexSources() {
1657
1658        return Collections.unmodifiableMap(m_indexSources);
1659    }
1660
1661    /**
1662     * Return singleton instance of the OpenCms spellchecker.<p>
1663     *
1664     * @return instance of CmsSolrSpellchecker.
1665     */
1666    public CmsSolrSpellchecker getSolrDictionary() {
1667
1668        // get the core container that contains one core for each configured index
1669        if (m_coreContainer == null) {
1670            m_coreContainer = createCoreContainer();
1671        }
1672        return CmsSolrSpellchecker.getInstance(m_coreContainer);
1673    }
1674
1675    /**
1676     * Returns the Solr configuration.<p>
1677     *
1678     * @return the Solr configuration
1679     */
1680    public CmsSolrConfiguration getSolrServerConfiguration() {
1681
1682        return m_solrConfig;
1683    }
1684
1685    /**
1686     * Returns the timeout to abandon threads indexing a resource.<p>
1687     *
1688     * @return the timeout to abandon threads indexing a resource
1689     */
1690    public long getTimeout() {
1691
1692        return m_timeout;
1693    }
1694
1695    /**
1696     * Initializes the search manager.<p>
1697     *
1698     * @param cms the cms object
1699     *
1700     * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
1701     */
1702    public void initialize(CmsObject cms) throws CmsRoleViolationException {
1703
1704        OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER);
1705        try {
1706            // store the Admin cms to index Cms resources
1707            m_adminCms = OpenCms.initCmsObject(cms);
1708        } catch (CmsException e) {
1709            // this should never happen
1710            LOG.error(e.getLocalizedMessage(), e);
1711        }
1712        // make sure the site root is the root site
1713        m_adminCms.getRequestContext().setSiteRoot("/");
1714
1715        // create the extraction result cache
1716        m_extractionResultCache = new CmsExtractionResultCache(
1717            OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()),
1718            "/extractCache");
1719        initializeFieldConfigurations();
1720        initializeIndexes();
1721        initOfflineIndexes();
1722
1723        // register this object as event listener
1724        OpenCms.addCmsEventListener(
1725            this,
1726            new int[] {
1727                I_CmsEventListener.EVENT_CLEAR_CACHES,
1728                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
1729                I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES,
1730                I_CmsEventListener.EVENT_REINDEX_OFFLINE,
1731                I_CmsEventListener.EVENT_REINDEX_ONLINE});
1732    }
1733
1734    /**
1735     * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations.
1736     */
1737    public void initializeFieldConfigurations() {
1738
1739        for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) {
1740            config.init();
1741        }
1742
1743    }
1744
1745    /**
1746     * Initializes all configured document types, index sources and search indexes.<p>
1747     *
1748     * This methods needs to be called if after a change in the index configuration has been made.
1749     */
1750    public void initializeIndexes() {
1751
1752        initAvailableDocumentTypes();
1753        initIndexSources();
1754        initSearchIndexes();
1755    }
1756
1757    /**
1758     * Initialize the offline index handler, require after an offline index has been added.<p>
1759     */
1760    public void initOfflineIndexes() {
1761
1762        // check which indexes are configured as offline indexes
1763        List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>();
1764        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
1765        while (i.hasNext()) {
1766            I_CmsSearchIndex index = i.next();
1767            if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
1768                // this is an offline index
1769                offlineIndexes.add(index);
1770            }
1771        }
1772        m_offlineIndexes = offlineIndexes;
1773        m_offlineHandler.initialize();
1774
1775    }
1776
1777    /**
1778     * Initializes the spell check index.<p>
1779     *
1780     * @param adminCms the ROOT_ADMIN cms context
1781     */
1782    public void initSpellcheckIndex(CmsObject adminCms) {
1783
1784        if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) {
1785            final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary();
1786            if (spellchecker != null) {
1787
1788                Runnable initRunner = new Runnable() {
1789
1790                    public void run() {
1791
1792                        try {
1793                            spellchecker.parseAndAddDictionaries(adminCms);
1794                        } catch (CmsRoleViolationException e) {
1795                            LOG.error(e.getLocalizedMessage(), e);
1796                        }
1797                    }
1798                };
1799                new Thread(initRunner).start();
1800            }
1801        }
1802    }
1803
1804    /**
1805     * Returns if the offline indexing is paused.<p>
1806     *
1807     * @return <code>true</code> if the offline indexing is paused
1808     */
1809    public boolean isOfflineIndexingPaused() {
1810
1811        return m_offlineUpdateFrequency == Long.MAX_VALUE;
1812    }
1813
1814    /**
1815     * Updates the indexes from as a scheduled job.<p>
1816     *
1817     * @param cms the OpenCms user context to use when reading resources from the VFS
1818     * @param parameters the parameters for the scheduled job
1819     *
1820     * @throws Exception if something goes wrong
1821     *
1822     * @return the String to write in the scheduler log
1823     *
1824     * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map)
1825     */
1826    public String launch(CmsObject cms, Map<String, String> parameters) throws Exception {
1827
1828        CmsSearchManager manager = OpenCms.getSearchManager();
1829
1830        I_CmsReport report = null;
1831        boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue();
1832
1833        if (writeLog) {
1834            report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
1835        }
1836
1837        List<String> updateList = null;
1838        String indexList = parameters.get(JOB_PARAM_INDEXLIST);
1839        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
1840            // index list has been provided as job parameter
1841            updateList = new ArrayList<String>();
1842            String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|');
1843            for (int i = 0; i < indexNames.length; i++) {
1844                // check if the index actually exists
1845                if (manager.getIndex(indexNames[i]) != null) {
1846                    updateList.add(indexNames[i]);
1847                } else {
1848                    if (LOG.isWarnEnabled()) {
1849                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i]));
1850                    }
1851                }
1852            }
1853        }
1854
1855        long startTime = System.currentTimeMillis();
1856
1857        if (updateList == null) {
1858            // all indexes need to be updated
1859            manager.rebuildAllIndexes(report);
1860        } else {
1861            // rebuild only the selected indexes
1862            manager.rebuildIndexes(updateList, report);
1863        }
1864
1865        long runTime = System.currentTimeMillis() - startTime;
1866
1867        String finishMessage = Messages.get().getBundle().key(
1868            Messages.LOG_REBUILD_INDEXES_FINISHED_1,
1869            CmsStringUtil.formatRuntime(runTime));
1870
1871        if (LOG.isInfoEnabled()) {
1872            LOG.info(finishMessage);
1873        }
1874        return finishMessage;
1875    }
1876
1877    /**
1878     * Pauses the offline indexing.<p>
1879     * May take some time, because the indexes are updated first.<p>
1880     */
1881    public void pauseOfflineIndexing() {
1882
1883        if (m_offlineUpdateFrequency != Long.MAX_VALUE) {
1884            m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency;
1885            m_offlineUpdateFrequency = Long.MAX_VALUE;
1886            updateOfflineIndexes(0);
1887        }
1888    }
1889
1890    /**
1891     * Rebuilds (if required creates) all configured indexes.<p>
1892     *
1893     * @param report the report object to write messages (or <code>null</code>)
1894     *
1895     * @throws CmsException if something goes wrong
1896     */
1897    public void rebuildAllIndexes(I_CmsReport report) throws CmsException {
1898
1899        try {
1900            SEARCH_MANAGER_LOCK.lock();
1901
1902            CmsMessageContainer container = null;
1903            for (int i = 0, n = m_indexes.size(); i < n; i++) {
1904                // iterate all configured search indexes
1905                I_CmsSearchIndex searchIndex = m_indexes.get(i);
1906                try {
1907                    // update the index
1908                    updateIndex(searchIndex, report, null);
1909                } catch (CmsException e) {
1910                    container = new CmsMessageContainer(
1911                        Messages.get(),
1912                        Messages.ERR_INDEX_REBUILD_ALL_1,
1913                        new Object[] {searchIndex.getName()});
1914                    LOG.error(
1915                        Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()),
1916                        e);
1917                }
1918            }
1919            // clean up the extraction result cache
1920            cleanExtractionCache();
1921            if (container != null) {
1922                // throw stored exception
1923                throw new CmsSearchException(container);
1924            }
1925        } finally {
1926            SEARCH_MANAGER_LOCK.unlock();
1927        }
1928    }
1929
1930    /**
1931     * Rebuilds (if required creates) the index with the given name.<p>
1932     *
1933     * @param indexName the name of the index to rebuild
1934     * @param report the report object to write messages (or <code>null</code>)
1935     *
1936     * @throws CmsException if something goes wrong
1937     */
1938    public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException {
1939
1940        try {
1941            SEARCH_MANAGER_LOCK.lock();
1942            // get the search index by name
1943            I_CmsSearchIndex index = getIndex(indexName);
1944            // update the index
1945            updateIndex(index, report, null);
1946            // clean up the extraction result cache
1947            cleanExtractionCache();
1948        } finally {
1949            SEARCH_MANAGER_LOCK.unlock();
1950        }
1951    }
1952
1953    /**
1954     * Rebuilds (if required creates) the List of indexes with the given name.<p>
1955     *
1956     * @param indexNames the names (String) of the index to rebuild
1957     * @param report the report object to write messages (or <code>null</code>)
1958     *
1959     * @throws CmsException if something goes wrong
1960     */
1961    public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException {
1962
1963        try {
1964            SEARCH_MANAGER_LOCK.lock();
1965            Iterator<String> i = indexNames.iterator();
1966            while (i.hasNext()) {
1967                String indexName = i.next();
1968                // get the search index by name
1969                I_CmsSearchIndex index = getIndex(indexName);
1970                if (index != null) {
1971                    // update the index
1972                    updateIndex(index, report, null);
1973                } else {
1974                    if (LOG.isWarnEnabled()) {
1975                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1976                    }
1977                }
1978            }
1979            // clean up the extraction result cache
1980            cleanExtractionCache();
1981        } finally {
1982            SEARCH_MANAGER_LOCK.unlock();
1983        }
1984    }
1985
1986    /**
1987     * Registers a new Solr core for the given index.<p>
1988     *
1989     * @param index the index to register a new Solr core for
1990     *
1991     * @throws CmsConfigurationException if no Solr server is configured
1992     */
1993    @SuppressWarnings("resource")
1994    public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException {
1995
1996        if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) {
1997            // No solr server configured
1998            throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0));
1999        }
2000
2001        if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present.
2002            index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build());
2003        } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present.
2004            // HTTP Server configured
2005            // TODO Implement multi core support for HTTP server
2006            // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml
2007            index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build());
2008        } else { // Default to the embedded Solr Server
2009
2010            // get the core container that contains one core for each configured index
2011            if (m_coreContainer == null) {
2012                m_coreContainer = createCoreContainer();
2013            }
2014
2015            // unload the existing core if it exists to avoid problems with forced unlock.
2016            if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) {
2017                m_coreContainer.unload(index.getCoreName(), false, false, true);
2018            }
2019            // ensure that all locks on the index are gone
2020            ensureIndexIsUnlocked(index.getPath());
2021
2022            // load the core to the container
2023            File dataDir = new File(index.getPath());
2024            if (!dataDir.exists()) {
2025                dataDir.mkdirs();
2026                if (CmsLog.INIT.isInfoEnabled()) {
2027                    CmsLog.INIT.info(
2028                        Messages.get().getBundle().key(
2029                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2030                            index.getName(),
2031                            index.getPath()));
2032                }
2033            }
2034            File instanceDir = new File(
2035                m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName());
2036            if (!instanceDir.exists()) {
2037                instanceDir.mkdirs();
2038                if (CmsLog.INIT.isInfoEnabled()) {
2039                    CmsLog.INIT.info(
2040                        Messages.get().getBundle().key(
2041                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2042                            index.getName(),
2043                            index.getPath()));
2044                }
2045            }
2046
2047            // create the core
2048            // TODO: suboptimal - forces always the same schema
2049            SolrCore core = null;
2050            try {
2051                // creation includes registration.
2052                // TODO: this was the old code: core = m_coreContainer.create(descriptor, false);
2053                Map<String, String> properties = new HashMap<String, String>(3);
2054                properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath());
2055                properties.put(CoreDescriptor.CORE_CONFIGSET, "default");
2056                core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false);
2057            } catch (NullPointerException e) {
2058                if (core != null) {
2059                    core.close();
2060                }
2061                throw new CmsConfigurationException(
2062                    Messages.get().container(
2063                        Messages.ERR_SOLR_SERVER_NOT_CREATED_3,
2064                        index.getName() + " (" + index.getCoreName() + ")",
2065                        index.getPath(),
2066                        m_solrConfig.getSolrConfigFile().getAbsolutePath()),
2067                    e);
2068            }
2069
2070            if (index.isNoSolrServerSet()) {
2071                index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName()));
2072            }
2073            if (CmsLog.INIT.isInfoEnabled()) {
2074                CmsLog.INIT.info(
2075                    Messages.get().getBundle().key(
2076                        Messages.INIT_SOLR_SERVER_CREATED_1,
2077                        index.getName() + " (" + index.getCoreName() + ")"));
2078            }
2079        }
2080    }
2081
2082    /**
2083     * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p>
2084     *
2085     * @param fieldConfiguration the field configuration to remove from the configuration
2086     *
2087     * @return true if remove was successful, false if preconditions for removal are ok but the given
2088     *         field configuration was unknown to the manager.
2089     *
2090     * @throws CmsIllegalStateException if the given field configuration is still used by at least one
2091     *         <code>{@link I_CmsSearchIndex}</code>.
2092     *
2093     */
2094    public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration)
2095    throws CmsIllegalStateException {
2096
2097        // never remove the standard field configuration
2098        if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) {
2099            throw new CmsIllegalStateException(
2100                Messages.get().container(
2101                    Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
2102                    fieldConfiguration.getName()));
2103        }
2104        // validation if removal will be granted
2105        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2106        I_CmsSearchIndex idx;
2107        // the list for collecting indexes that use the given field configuration
2108        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2109        I_CmsSearchFieldConfiguration refFieldConfig;
2110        while (itIndexes.hasNext()) {
2111            idx = itIndexes.next();
2112            refFieldConfig = idx.getFieldConfiguration();
2113            if (refFieldConfig.equals(fieldConfiguration)) {
2114                referrers.add(idx);
2115            }
2116        }
2117        if (referrers.size() > 0) {
2118            throw new CmsIllegalStateException(
2119                Messages.get().container(
2120                    Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
2121                    fieldConfiguration.getName(),
2122                    referrers.toString()));
2123        }
2124
2125        // remove operation (no exception)
2126        return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null;
2127
2128    }
2129
2130    /**
2131     * Removes a search field from the field configuration.<p>
2132     *
2133     * @param fieldConfiguration the field configuration
2134     * @param field field to remove from the field configuration
2135     *
2136     * @return true if remove was successful, false if preconditions for removal are ok but the given
2137     *         field was unknown.
2138     */
2139    public boolean removeSearchFieldConfigurationField(
2140        I_CmsSearchFieldConfiguration fieldConfiguration,
2141        CmsSearchField field) {
2142
2143        if (LOG.isInfoEnabled()) {
2144            LOG.info(
2145                Messages.get().getBundle().key(
2146                    Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
2147                    field.getName(),
2148                    fieldConfiguration.getName()));
2149        }
2150
2151        return fieldConfiguration.getFields().remove(field);
2152    }
2153
2154    /**
2155     * Removes a search field mapping from the given field.<p>
2156     *
2157     * @param field the field
2158     * @param mapping mapping to remove from the field
2159     *
2160     * @return true if remove was successful, false if preconditions for removal are ok but the given
2161     *         mapping was unknown.
2162     *
2163     * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
2164     */
2165    public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping)
2166    throws CmsIllegalStateException {
2167
2168        if (field.getMappings().size() < 2) {
2169            throw new CmsIllegalStateException(
2170                Messages.get().container(
2171                    Messages.ERR_FIELD_MAPPING_DELETE_2,
2172                    mapping.getType().toString(),
2173                    field.getName()));
2174        } else {
2175
2176            if (LOG.isInfoEnabled()) {
2177                LOG.info(
2178                    Messages.get().getBundle().key(
2179                        Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
2180                        mapping.toString(),
2181                        field.getName()));
2182            }
2183            return field.getMappings().remove(mapping);
2184        }
2185    }
2186
2187    /**
2188     * Removes a search index from the configuration.<p>
2189     *
2190     * @param searchIndex the search index to remove
2191     */
2192    public void removeSearchIndex(I_CmsSearchIndex searchIndex) {
2193
2194        // shut down index to remove potential config files of Solr indexes
2195        searchIndex.shutDown();
2196        if (searchIndex instanceof CmsSolrIndex) {
2197            CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex;
2198            m_coreContainer.unload(solrIndex.getCoreName(), true, true, true);
2199        }
2200        m_indexes.remove(searchIndex);
2201        initOfflineIndexes();
2202
2203        if (LOG.isInfoEnabled()) {
2204            LOG.info(
2205                Messages.get().getBundle().key(
2206                    Messages.LOG_REMOVE_SEARCH_INDEX_2,
2207                    searchIndex.getName(),
2208                    searchIndex.getProject()));
2209        }
2210    }
2211
2212    /**
2213     * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
2214     *
2215     * @param indexNames the names of the index to remove
2216     */
2217    public void removeSearchIndexes(List<String> indexNames) {
2218
2219        Iterator<String> i = indexNames.iterator();
2220        while (i.hasNext()) {
2221            String indexName = i.next();
2222            // get the search index by name
2223            I_CmsSearchIndex index = getIndex(indexName);
2224            if (index != null) {
2225                // remove the index
2226                removeSearchIndex(index);
2227            } else {
2228                if (LOG.isWarnEnabled()) {
2229                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
2230                }
2231            }
2232        }
2233    }
2234
2235    /**
2236     * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
2237     *
2238     * @param indexsource the indexsource to remove from the configuration
2239     *
2240     * @return true if remove was successful, false if preconditions for removal are ok but the given
2241     *         searchindex was unknown to the manager.
2242     *
2243     * @throws CmsIllegalStateException if the given indexsource is still used by at least one
2244     *         <code>{@link I_CmsSearchIndex}</code>.
2245     *
2246     */
2247    public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException {
2248
2249        // validation if removal will be granted
2250        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2251        I_CmsSearchIndex idx;
2252        // the list for collecting indexes that use the given index source
2253        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2254        // the current list of referred index sources of the iterated index
2255        List<CmsSearchIndexSource> refsources;
2256        while (itIndexes.hasNext()) {
2257            idx = itIndexes.next();
2258            refsources = idx.getSources();
2259            if (refsources != null) {
2260                if (refsources.contains(indexsource)) {
2261                    referrers.add(idx);
2262                }
2263            }
2264        }
2265        if (referrers.size() > 0) {
2266            throw new CmsIllegalStateException(
2267                Messages.get().container(
2268                    Messages.ERR_INDEX_SOURCE_DELETE_2,
2269                    indexsource.getName(),
2270                    referrers.toString()));
2271        }
2272
2273        // remove operation (no exception)
2274        return m_indexSources.remove(indexsource.getName()) != null;
2275
2276    }
2277
2278    /**
2279     * Resumes offline indexing if it was paused.<p>
2280     */
2281    public void resumeOfflineIndexing() {
2282
2283        if (m_offlineUpdateFrequency == Long.MAX_VALUE) {
2284            setOfflineUpdateFrequency(
2285                m_configuredOfflineIndexingFrequency > 0
2286                ? m_configuredOfflineIndexingFrequency
2287                : DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2288        }
2289    }
2290
2291    /**
2292     * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
2293     *
2294     * @param value the name of the directory below WEB-INF/ where the search indexes are stored
2295     */
2296    public void setDirectory(String value) {
2297
2298        m_path = value;
2299    }
2300
2301    /**
2302     * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
2303     *
2304     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2305     */
2306    public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
2307
2308        m_extractionCacheMaxAge = extractionCacheMaxAge;
2309    }
2310
2311    /**
2312     * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
2313     *
2314     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2315     */
2316    public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
2317
2318        try {
2319            setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge));
2320        } catch (NumberFormatException e) {
2321            LOG.error(
2322                Messages.get().getBundle().key(
2323                    Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
2324                    extractionCacheMaxAge,
2325                    new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)),
2326                e);
2327            setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
2328        }
2329    }
2330
2331    /**
2332     * Sets the unlock mode during indexing.<p>
2333     *
2334     * @param value the value
2335     */
2336    public void setForceunlock(String value) {
2337
2338        m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
2339    }
2340
2341    /**
2342     * Sets the highlighter.<p>
2343     *
2344     * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
2345     *
2346     * @param highlighter the package/class name of the highlighter
2347     */
2348    public void setHighlighter(String highlighter) {
2349
2350        try {
2351            m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance();
2352        } catch (Exception e) {
2353            m_highlighter = null;
2354            LOG.error(e.getLocalizedMessage(), e);
2355        }
2356    }
2357
2358    /**
2359     * Sets the seconds to wait for an index lock during an update operation.<p>
2360     *
2361     * @param value the seconds to wait for an index lock during an update operation
2362     */
2363    public void setIndexLockMaxWaitSeconds(int value) {
2364
2365        m_indexLockMaxWaitSeconds = value;
2366    }
2367
2368    /**
2369     * Sets the max. excerpt length.<p>
2370     *
2371     * @param maxExcerptLength the max. excerpt length to set
2372     */
2373    public void setMaxExcerptLength(int maxExcerptLength) {
2374
2375        m_maxExcerptLength = maxExcerptLength;
2376    }
2377
2378    /**
2379     * Sets the max. excerpt length as a String.<p>
2380     *
2381     * @param maxExcerptLength the max. excerpt length to set
2382     */
2383    public void setMaxExcerptLength(String maxExcerptLength) {
2384
2385        try {
2386            setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
2387        } catch (Exception e) {
2388            LOG.error(
2389                Messages.get().getBundle().key(
2390                    Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
2391                    maxExcerptLength,
2392                    new Integer(DEFAULT_EXCERPT_LENGTH)),
2393                e);
2394            setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
2395        }
2396    }
2397
2398    /**
2399     * Sets the maximal wait time for offline index updates after edit operations.<p>
2400     *
2401     * @param maxIndexWaitTime  the maximal wait time to set in milliseconds
2402     */
2403    public void setMaxIndexWaitTime(long maxIndexWaitTime) {
2404
2405        m_maxIndexWaitTime = maxIndexWaitTime;
2406    }
2407
2408    /**
2409     * Sets the maximal wait time for offline index updates after edit operations.<p>
2410     *
2411     * @param maxIndexWaitTime the maximal wait time to set in milliseconds
2412     */
2413    public void setMaxIndexWaitTime(String maxIndexWaitTime) {
2414
2415        try {
2416            setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime));
2417        } catch (Exception e) {
2418            LOG.error(
2419                Messages.get().getBundle().key(
2420                    Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2,
2421                    maxIndexWaitTime,
2422                    new Long(DEFAULT_MAX_INDEX_WAITTIME)),
2423                e);
2424            setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME);
2425        }
2426    }
2427
2428    /**
2429     * Sets the maximum number of modifications before a commit in the search index is triggered.<p>
2430     *
2431     * @param maxModificationsBeforeCommit the maximum number of modifications to set
2432     */
2433    public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) {
2434
2435        m_maxModificationsBeforeCommit = maxModificationsBeforeCommit;
2436    }
2437
2438    /**
2439     * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p>
2440     *
2441     * @param value the maximum number of modifications to set
2442     */
2443    public void setMaxModificationsBeforeCommit(String value) {
2444
2445        try {
2446            setMaxModificationsBeforeCommit(Integer.parseInt(value));
2447        } catch (Exception e) {
2448            LOG.error(
2449                Messages.get().getBundle().key(
2450                    Messages.LOG_PARSE_MAXCOMMIT_FAILED_2,
2451                    value,
2452                    new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)),
2453                e);
2454            setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT);
2455        }
2456    }
2457
2458    /**
2459     * Sets the update frequency of the offline indexer in milliseconds.<p>
2460     *
2461     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2462     */
2463    public void setOfflineUpdateFrequency(long offlineUpdateFrequency) {
2464
2465        m_offlineUpdateFrequency = offlineUpdateFrequency;
2466        updateOfflineIndexes(0);
2467    }
2468
2469    /**
2470     * Sets the update frequency of the offline indexer in milliseconds.<p>
2471     *
2472     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2473     */
2474    public void setOfflineUpdateFrequency(String offlineUpdateFrequency) {
2475
2476        try {
2477            setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency));
2478        } catch (Exception e) {
2479            LOG.error(
2480                Messages.get().getBundle().key(
2481                    Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2,
2482                    offlineUpdateFrequency,
2483                    new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)),
2484                e);
2485            setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2486        }
2487    }
2488
2489    /**
2490     * Sets the Solr configuration.<p>
2491     *
2492     * @param config the Solr configuration
2493     */
2494    public void setSolrServerConfiguration(CmsSolrConfiguration config) {
2495
2496        m_solrConfig = config;
2497    }
2498
2499    /**
2500     * Sets the timeout to abandon threads indexing a resource.<p>
2501     *
2502     * @param value the timeout in milliseconds
2503     */
2504    public void setTimeout(long value) {
2505
2506        m_timeout = value;
2507    }
2508
2509    /**
2510     * Sets the timeout to abandon threads indexing a resource as a String.<p>
2511     *
2512     * @param value the timeout in milliseconds
2513     */
2514    public void setTimeout(String value) {
2515
2516        try {
2517            setTimeout(Long.parseLong(value));
2518        } catch (Exception e) {
2519            LOG.error(
2520                Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)),
2521                e);
2522            setTimeout(DEFAULT_TIMEOUT);
2523        }
2524    }
2525
2526    /**
2527     * Shuts down the search manager.<p>
2528     *
2529     * This will cause all search indices to be shut down.<p>
2530     */
2531    public void shutDown() {
2532
2533        if (m_offlineIndexThread != null) {
2534            m_offlineIndexThread.shutDown();
2535        }
2536
2537        if (m_offlineHandler != null) {
2538            OpenCms.removeCmsEventListener(m_offlineHandler);
2539        }
2540
2541        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
2542        while (i.hasNext()) {
2543            I_CmsSearchIndex index = i.next();
2544            index.shutDown();
2545            index = null;
2546        }
2547        m_indexes.clear();
2548
2549        shutDownSolrContainer();
2550
2551        if (CmsLog.INIT.isInfoEnabled()) {
2552            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0));
2553        }
2554    }
2555
2556    /**
2557     * Updates all offline indexes.<p>
2558     *
2559     * Can be used to force an index update when it's not convenient to wait until the
2560     * offline update interval has eclipsed.<p>
2561     *
2562     * Since the offline indexes still need some time to update the new resources,
2563     * the method waits for at most the configurable <code>maxIndexWaitTime</code>
2564     * to ensure that updating is finished.
2565     *
2566     * @see #updateOfflineIndexes(long)
2567     *
2568     */
2569    public void updateOfflineIndexes() {
2570
2571        updateOfflineIndexes(getMaxIndexWaitTime());
2572    }
2573
2574    /**
2575     * Updates all offline indexes.<p>
2576     *
2577     * Can be used to force an index update when it's not convenient to wait until the
2578     * offline update interval has eclipsed.<p>
2579     *
2580     * Since the offline index will still need some time to update the new resources even if it runs directly,
2581     * a wait time of 2500 or so should be given in order to make sure the index finished updating.
2582     *
2583     * @param waitTime milliseconds to wait after the offline update index was notified of the changes
2584     */
2585    public void updateOfflineIndexes(long waitTime) {
2586
2587        if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
2588            // notify existing thread of update frequency change
2589            if (LOG.isDebugEnabled()) {
2590                LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0));
2591            }
2592            m_offlineIndexThread.interrupt();
2593            if (waitTime > 0) {
2594                m_offlineIndexThread.getWaitHandle().enter(waitTime);
2595            }
2596        }
2597    }
2598
2599    /**
2600     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2601     * We take transitive dependencies into account and handle cyclic dependencies correctly as well.
2602     *
2603     * @param adminCms an OpenCms user context with Admin permissions
2604     * @param updateResources the resources to be re-indexed
2605     *
2606     * @return the updated list of resource to re-index
2607     */
2608    protected List<CmsPublishedResource> addAdditionallyAffectedResources(
2609        CmsObject adminCms,
2610        List<CmsPublishedResource> updateResources) {
2611
2612        if (updateResources.size() > 0) {
2613            Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources);
2614            Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet;
2615            Collection<CmsPublishedResource> additionalResources = Collections.emptySet();
2616            do {
2617                additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck);
2618                additionalResources.addAll(
2619                    addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck));
2620                updateResources.addAll(additionalResources);
2621                updateResourceSet.addAll(additionalResources);
2622                resourcesToCheck = additionalResources;
2623            } while (resourcesToCheck.size() > 0);
2624        }
2625        return updateResources;
2626    }
2627
2628    /**
2629     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2630     *
2631     * @param adminCms an OpenCms user context with Admin permissions
2632     * @param updateResources the resources to be re-indexed
2633     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2634     *
2635     * @return the list of resources that need to be additionally re-index
2636     */
2637    protected Collection<CmsPublishedResource> addIndexContentRelatedResources(
2638        CmsObject adminCms,
2639        Collection<CmsPublishedResource> updateResources,
2640        Collection<CmsPublishedResource> updateResourcesToCheck) {
2641
2642        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2643        for (CmsPublishedResource checkedRes : updateResourcesToCheck) {
2644            try {
2645                CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId());
2646                filter = filter.filterType(CmsRelationType.INDEX_CONTENT);
2647                List<CmsRelation> relations = adminCms.readRelations(filter);
2648                for (CmsRelation relation : relations) {
2649                    CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2650                    CmsPublishedResource additionalPubRes = new CmsPublishedResource(res);
2651                    if (!updateResources.contains(additionalPubRes)) {
2652                        additionalUpdateResources.add(additionalPubRes);
2653                    }
2654                }
2655            } catch (CmsException e) {
2656                LOG.error(e.getLocalizedMessage(), e);
2657            }
2658        }
2659        return additionalUpdateResources;
2660    }
2661
2662    /**
2663     * Cleans up the extraction result cache.<p>
2664     */
2665    protected void cleanExtractionCache() {
2666
2667        // clean up the extraction result cache
2668        m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
2669    }
2670
2671    /**
2672     * Collects the related containerpages to the resources that have been published.<p>
2673     *
2674     * @param adminCms an OpenCms user context with Admin permissions
2675     * @param updateResources the resources to be re-indexed
2676     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2677     *
2678     * @return the list of resources that need to be additionally re-index
2679     */
2680    protected Collection<CmsPublishedResource> findRelatedContainerPages(
2681        CmsObject adminCms,
2682        Collection<CmsPublishedResource> updateResources,
2683        Collection<CmsPublishedResource> updateResourcesToCheck) {
2684
2685        CmsResourceManager resMan = OpenCms.getResourceManager();
2686        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2687
2688        Set<CmsResource> containerPages = new HashSet<CmsResource>();
2689        int containerPageTypeId = -1;
2690        try {
2691            containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId();
2692        } catch (CmsLoaderException e) {
2693            // will happen during setup, when container page type is not available yet
2694            LOG.info(e.getLocalizedMessage(), e);
2695        }
2696        if (containerPageTypeId != -1) {
2697            for (CmsPublishedResource pubRes : updateResourcesToCheck) {
2698                try {
2699                    if (resMan.getResourceType(pubRes.getType()) instanceof CmsResourceTypeXmlContent) {
2700                        if (!isGroup(pubRes.getType())) {
2701                            CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(
2702                                pubRes.getStructureId()).filterStrong();
2703                            List<CmsRelation> relations = adminCms.readRelations(filter);
2704                            for (CmsRelation relation : relations) {
2705                                CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2706                                if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2707                                    containerPages.add(res);
2708                                    if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2709                                        adminCms,
2710                                        adminCms.getSitePath(res))) {
2711                                        addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2712                                    }
2713                                }
2714                            }
2715                        }
2716                    }
2717                    if (containerPageTypeId == pubRes.getType()) {
2718                        addDetailContent(
2719                            adminCms,
2720                            containerPages,
2721                            adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath()));
2722                    }
2723                } catch (CmsException e) {
2724                    LOG.error(e.getLocalizedMessage(), e);
2725                }
2726            }
2727            // add all found container pages as published resource objects to the list
2728            for (CmsResource page : containerPages) {
2729                CmsPublishedResource pubCont = new CmsPublishedResource(page);
2730                if (!updateResources.contains(pubCont)) {
2731                    // ensure container page is added only once
2732                    additionalUpdateResources.add(pubCont);
2733                }
2734            }
2735        }
2736        return additionalUpdateResources;
2737    }
2738
2739    /**
2740     * Returns the set of names of all configured document types.<p>
2741     *
2742     * @return the set of names of all configured document types
2743     */
2744    protected List<String> getDocumentTypes() {
2745
2746        return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet()));
2747    }
2748
2749    /**
2750     * Returns the a offline project used for offline indexing.<p>
2751     *
2752     * @return the offline project if available
2753     */
2754    protected CmsProject getOfflineIndexProject() {
2755
2756        CmsProject result = null;
2757        for (I_CmsSearchIndex index : m_offlineIndexes) {
2758            try {
2759                result = m_adminCms.readProject(index.getProject());
2760
2761                if (!result.isOnlineProject()) {
2762                    break;
2763                }
2764            } catch (Exception e) {
2765                // may be a missconfigured index, ignore
2766                LOG.error(e.getLocalizedMessage(), e);
2767            }
2768        }
2769        return result;
2770    }
2771
2772    /**
2773     * Returns a new thread manager for the indexing threads.<p>
2774     *
2775     * @return a new thread manager for the indexing threads
2776     */
2777    protected CmsIndexingThreadManager getThreadManager() {
2778
2779        return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit);
2780    }
2781
2782    /**
2783     * Initializes the available Cms resource types to be indexed.<p>
2784     *
2785     * A map stores document factories keyed by a string representing
2786     * a colon separated list of Cms resource types and/or mimetypes.<p>
2787     *
2788     * The keys of this map are used to trigger a document factory to convert
2789     * a Cms resource into a Lucene index document.<p>
2790     *
2791     * A document factory is a class implementing the interface
2792     * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
2793     */
2794    protected void initAvailableDocumentTypes() {
2795
2796        CmsSearchDocumentType documenttype = null;
2797        String className = null;
2798        String name = null;
2799        I_CmsDocumentFactory documentFactory = null;
2800        List<String> resourceTypes = null;
2801        List<String> mimeTypes = null;
2802        Class<?> c = null;
2803
2804        m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>();
2805
2806        for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
2807
2808            documenttype = m_documentTypeConfigs.get(i);
2809            name = documenttype.getName();
2810
2811            try {
2812                className = documenttype.getClassName();
2813                resourceTypes = documenttype.getResourceTypes();
2814                mimeTypes = documenttype.getMimeTypes();
2815
2816                if (name == null) {
2817                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0));
2818                }
2819                if (className == null) {
2820                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
2821                }
2822                if (resourceTypes.size() == 0) {
2823                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
2824                }
2825
2826                try {
2827                    c = Class.forName(className);
2828                    documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance(
2829                        new Object[] {name});
2830                } catch (ClassNotFoundException exc) {
2831                    throw new CmsIndexException(
2832                        Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className),
2833                        exc);
2834                } catch (Exception exc) {
2835                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc);
2836                }
2837
2838                if (documentFactory.isUsingCache()) {
2839                    // init cache if used by the factory
2840                    documentFactory.setCache(m_extractionResultCache);
2841                }
2842
2843                Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>();
2844                for (Iterator<String> keyIt = documentFactory.getDocumentKeys(
2845                    resourceTypes,
2846                    mimeTypes).iterator(); keyIt.hasNext();) {
2847                    String key = keyIt.next();
2848                    matchingTypes.put(key, documentFactory);
2849                    m_extractionKeys.add(key);
2850                }
2851                m_documentTypes.put(name, matchingTypes);
2852
2853            } catch (CmsException e) {
2854                if (LOG.isWarnEnabled()) {
2855                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e);
2856                }
2857            }
2858        }
2859    }
2860
2861    /**
2862     * Initializes the index sources.
2863     */
2864    protected void initIndexSources() {
2865
2866        for (CmsSearchIndexSource source : m_indexSources.values()) {
2867            source.init();
2868        }
2869    }
2870
2871    /**
2872     * Initializes the configured search indexes.<p>
2873     *
2874     * This initializes also the list of Cms resources types
2875     * to be indexed by an index source.<p>
2876     */
2877    protected void initSearchIndexes() {
2878
2879        I_CmsSearchIndex index = null;
2880        for (int i = 0, n = m_indexes.size(); i < n; i++) {
2881            index = m_indexes.get(i);
2882            // reset disabled flag
2883            index.setEnabled(true);
2884            // check if the index has been configured correctly
2885            if (index.checkConfiguration(m_adminCms)) {
2886                // the index is configured correctly
2887                try {
2888                    index.initialize();
2889                } catch (Exception e) {
2890                    if (CmsLog.INIT.isWarnEnabled()) {
2891                        // in this case the index will be disabled
2892                        CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e);
2893                    }
2894                }
2895            }
2896            // output a log message if the index was successfully configured or not
2897            if (CmsLog.INIT.isInfoEnabled()) {
2898                if (index.isEnabled()) {
2899                    CmsLog.INIT.info(
2900                        Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject()));
2901                } else {
2902                    CmsLog.INIT.warn(
2903                        Messages.get().getBundle().key(
2904                            Messages.INIT_INDEX_NOT_CONFIGURED_2,
2905                            index,
2906                            index.getProject()));
2907                }
2908            }
2909        }
2910    }
2911
2912    /**
2913     * Checks, if the index should be rebuilt/updated at all by the search manager.
2914     * @param index the index to check.
2915     * @return a flag, indicating if the index should be rebuilt/updated at all.
2916     */
2917    protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) {
2918
2919        if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) {
2920            LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName()));
2921            return false;
2922        } else {
2923            return true;
2924        }
2925
2926    }
2927
2928    /**
2929     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
2930     * after resources have been published.<p>
2931     *
2932     * @param adminCms an OpenCms user context with Admin permissions
2933     * @param publishHistoryId the history ID of the published project
2934     * @param report the report to write the output to
2935     */
2936    protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) {
2937
2938        int oldPriority = Thread.currentThread().getPriority();
2939        try {
2940            SEARCH_MANAGER_LOCK.lock();
2941            Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
2942            List<CmsPublishedResource> publishedResources;
2943            try {
2944                // read the list of all published resources
2945                publishedResources = adminCms.readPublishedResources(publishHistoryId);
2946            } catch (CmsException e) {
2947                LOG.error(
2948                    Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId),
2949                    e);
2950                return;
2951            }
2952            Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources);
2953            // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved
2954
2955            List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>();
2956            for (CmsPublishedResource res : publishedResources) {
2957                if (res.isFolder() || res.getState().isUnchanged()) {
2958                    // folders and unchanged resources don't need to be indexed after publish
2959                    continue;
2960                }
2961                if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) {
2962                    if (updateResources.contains(res)) {
2963                        // resource may have been added as a sibling of another resource
2964                        // in this case we make sure to use the value from the publish list because of the "deleted" flag
2965                        boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId())
2966                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION)
2967                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE);
2968                        // check it this is a moved resource with source / target info, in this case we need both entries
2969                        if (!hasMoved) {
2970                            // if the resource was moved, we must contain both entries
2971                            updateResources.remove(res);
2972                        }
2973                        // "equals()" implementation of published resource checks for id,
2974                        // so the removed value may have a different "deleted" or "modified" status value
2975                        updateResources.add(res);
2976                    } else {
2977                        // resource not yet contained in the list
2978                        updateResources.add(res);
2979                        // check for the siblings (not for deleted resources, these are already gone)
2980                        if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) {
2981                            // this resource has siblings
2982                            try {
2983                                // read siblings from the online project
2984                                List<CmsResource> siblings = adminCms.readSiblings(
2985                                    res.getRootPath(),
2986                                    CmsResourceFilter.ALL);
2987                                Iterator<CmsResource> itSib = siblings.iterator();
2988                                while (itSib.hasNext()) {
2989                                    // check all siblings
2990                                    CmsResource sibling = itSib.next();
2991                                    CmsPublishedResource sib = new CmsPublishedResource(sibling);
2992                                    if (!updateResources.contains(sib)) {
2993                                        // ensure sibling is added only once
2994                                        updateResources.add(sib);
2995                                    }
2996                                }
2997                            } catch (CmsException e) {
2998                                // ignore, just use the original resource
2999                                if (LOG.isWarnEnabled()) {
3000                                    LOG.warn(
3001                                        Messages.get().getBundle().key(
3002                                            Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
3003                                            res.getRootPath()),
3004                                        e);
3005                                }
3006                            }
3007                        }
3008                    }
3009                }
3010            }
3011
3012            addAdditionallyAffectedResources(adminCms, updateResources);
3013            updateAllIndexes(adminCms, updateResources, report);
3014        } finally {
3015            SEARCH_MANAGER_LOCK.unlock();
3016            Thread.currentThread().setPriority(oldPriority);
3017        }
3018    }
3019
3020    /**
3021     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p>
3022     *
3023     * @param adminCms an OpenCms user context with Admin permissions
3024     * @param updateResources the resources to update
3025     * @param report the report to write the output to
3026     */
3027    protected void updateAllIndexes(
3028        CmsObject adminCms,
3029        List<CmsPublishedResource> updateResources,
3030        I_CmsReport report) {
3031
3032        try {
3033            SEARCH_MANAGER_LOCK.lock();
3034            if (!updateResources.isEmpty()) {
3035                // sort the resource to update
3036                Collections.sort(updateResources);
3037                // only update the indexes if the list of remaining published resources is not empty
3038                Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
3039                while (i.hasNext()) {
3040                    I_CmsSearchIndex index = i.next();
3041                    if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) {
3042                        // only update indexes which have the rebuild mode set to "auto"
3043                        try {
3044                            updateIndex(index, report, updateResources);
3045                        } catch (CmsException e) {
3046                            LOG.error(
3047                                Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()),
3048                                e);
3049                        }
3050                    }
3051                }
3052            }
3053            // clean up the extraction result cache
3054            cleanExtractionCache();
3055        } finally {
3056            SEARCH_MANAGER_LOCK.unlock();
3057        }
3058
3059    }
3060
3061    /**
3062     * Updates (if required creates) the index with the given name.<p>
3063     *
3064     * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
3065     * incrementally updated for these resources only. If this List is <code>null</code> or empty,
3066     * the index will be fully rebuild.<p>
3067     *
3068     * @param index the index to update or rebuild
3069     * @param report the report to write output messages to
3070     * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3071     *
3072     * @throws CmsException if something goes wrong
3073     */
3074    protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex)
3075    throws CmsException {
3076
3077        if (shouldUpdateAtAll(index)) {
3078            try {
3079                SEARCH_MANAGER_LOCK.lock();
3080
3081                // copy the stored admin context for the indexing
3082                CmsObject cms = OpenCms.initCmsObject(m_adminCms);
3083                // make sure a report is available
3084                if (report == null) {
3085                    report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
3086                }
3087
3088                // check if the index has been configured correctly
3089                if (!index.checkConfiguration(cms)) {
3090                    // the index is disabled
3091                    return;
3092                }
3093
3094                // set site root and project for this index
3095                cms.getRequestContext().setSiteRoot("/");
3096                // switch to the index project
3097                cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3098
3099                if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
3100                    // rebuild the complete index
3101
3102                    updateIndexCompletely(cms, index, report);
3103                } else {
3104                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3105                }
3106            } finally {
3107                SEARCH_MANAGER_LOCK.unlock();
3108            }
3109        }
3110    }
3111
3112    /**
3113     * The method updates all OpenCms documents that are indexed.
3114     * @param cms the OpenCms user context to use for accessing the VFS
3115     * @param index the index to update
3116     * @param report the report to write output messages to
3117     * @throws CmsIndexException thrown if indexing fails for some reason
3118     */
3119    @SuppressWarnings("null")
3120    protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report)
3121    throws CmsIndexException {
3122
3123        // create a new thread manager for the indexing threads
3124        CmsIndexingThreadManager threadManager = getThreadManager();
3125
3126        boolean isOfflineIndex = false;
3127        if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
3128            // disable offline indexing while the complete index is rebuild
3129            isOfflineIndex = true;
3130            index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL);
3131            // re-initialize the offline indexes, this will disable this offline index
3132            initOfflineIndexes();
3133        }
3134
3135        I_CmsIndexWriter writer = null;
3136        try {
3137            // create a backup of the existing index
3138            CmsSearchIndex indexInternal = null;
3139            String backup = null;
3140            if (index instanceof CmsSearchIndex) {
3141                indexInternal = (CmsSearchIndex)index;
3142                backup = indexInternal.createIndexBackup();
3143                if (backup != null) {
3144                    indexInternal.indexSearcherOpen(backup);
3145                }
3146            }
3147
3148            // create a new index writer
3149            writer = index.getIndexWriter(report, true);
3150            if (writer instanceof I_CmsSolrIndexWriter) {
3151                try {
3152                    ((I_CmsSolrIndexWriter)writer).deleteAllDocuments();
3153                } catch (IOException e) {
3154                    LOG.error(e.getMessage(), e);
3155                }
3156            }
3157
3158            // output start information on the report
3159            report.println(
3160                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()),
3161                I_CmsReport.FORMAT_HEADLINE);
3162
3163            // iterate all configured index sources of this index
3164            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3165            while (sources.hasNext()) {
3166                // get the next index source
3167                CmsSearchIndexSource source = sources.next();
3168                // create the indexer
3169                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3170                // new index creation, use all resources from the index source
3171                indexer.rebuildIndex(writer, threadManager, source);
3172
3173                // wait for indexing threads to finish
3174                while (threadManager.isRunning()) {
3175                    try {
3176                        Thread.sleep(500);
3177                    } catch (InterruptedException e) {
3178                        // just continue with the loop after interruption
3179                        LOG.info(e.getLocalizedMessage(), e);
3180                    }
3181                }
3182
3183                // commit and optimize the index after each index source has been finished
3184                try {
3185                    writer.commit();
3186                } catch (IOException e) {
3187                    if (LOG.isWarnEnabled()) {
3188                        LOG.warn(
3189                            Messages.get().getBundle().key(
3190                                Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3191                                index.getName(),
3192                                index.getPath()),
3193                            e);
3194                    }
3195                }
3196                try {
3197                    writer.optimize();
3198                } catch (IOException e) {
3199                    if (LOG.isWarnEnabled()) {
3200                        LOG.warn(
3201                            Messages.get().getBundle().key(
3202                                Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2,
3203                                index.getName(),
3204                                index.getPath()),
3205                            e);
3206                    }
3207                }
3208            }
3209
3210            // we are sure here that indexInternal is not null
3211            if (backup != null) {
3212                // remove the backup after the files have been re-indexed
3213                indexInternal.indexSearcherClose();
3214                indexInternal.removeIndexBackup(backup);
3215            }
3216
3217            // output finish information on the report
3218            report.println(
3219                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()),
3220                I_CmsReport.FORMAT_HEADLINE);
3221
3222        } finally {
3223            if (writer != null) {
3224                try {
3225                    writer.close();
3226                } catch (IOException e) {
3227                    if (LOG.isWarnEnabled()) {
3228                        LOG.warn(
3229                            Messages.get().getBundle().key(
3230                                Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
3231                                index.getPath(),
3232                                index.getName()),
3233                            e);
3234                    }
3235                }
3236            }
3237            if (isOfflineIndex) {
3238                // reset the mode of the offline index
3239                index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE);
3240                // re-initialize the offline indexes, this will re-enable this index
3241                initOfflineIndexes();
3242            }
3243            // index has changed - initialize the index searcher instance
3244            index.onIndexChanged(true);
3245        }
3246
3247        // show information about indexing runtime
3248        threadManager.reportStatistics(report);
3249    }
3250
3251    /**
3252     * Incrementally updates the given index.<p>
3253     *
3254     * @param cms the OpenCms user context to use for accessing the VFS
3255     * @param index the index to update
3256     * @param report the report to write output messages to
3257     * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3258     *
3259     * @throws CmsException if something goes wrong
3260     */
3261    protected void updateIndexIncremental(
3262        CmsObject cms,
3263        I_CmsSearchIndex index,
3264        I_CmsReport report,
3265        List<CmsPublishedResource> resourcesToIndex)
3266    throws CmsException {
3267
3268        try {
3269            SEARCH_MANAGER_LOCK.lock();
3270
3271            // update the existing index
3272            List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>();
3273
3274            boolean hasResourcesToDelete = false;
3275            boolean hasResourcesToUpdate = false;
3276
3277            // iterate all configured index sources of this index
3278            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3279            while (sources.hasNext()) {
3280                // get the next index source
3281                CmsSearchIndexSource source = sources.next();
3282                // create the indexer
3283                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3284                // collect the resources to update
3285                CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex);
3286                if (!updateData.isEmpty()) {
3287                    // add the update collection to the internal pipeline
3288                    updateCollections.add(updateData);
3289                    hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete();
3290                    hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate();
3291                }
3292            }
3293
3294            // only start index modification if required
3295            if (hasResourcesToDelete || hasResourcesToUpdate) {
3296                // output start information on the report
3297                report.println(
3298                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()),
3299                    I_CmsReport.FORMAT_HEADLINE);
3300
3301                I_CmsIndexWriter writer = null;
3302                try {
3303                    // obtain an index writer that updates the current index
3304                    writer = index.getIndexWriter(report, false);
3305
3306                    if (hasResourcesToDelete) {
3307                        // delete the resource from the index
3308                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3309                        while (i.hasNext()) {
3310                            CmsSearchIndexUpdateData updateCollection = i.next();
3311                            if (updateCollection.hasResourcesToDelete()) {
3312                                updateCollection.getIndexer().deleteResources(
3313                                    writer,
3314                                    updateCollection.getResourcesToDelete());
3315                            }
3316                        }
3317                    }
3318
3319                    if (hasResourcesToUpdate) {
3320                        // create a new thread manager
3321                        CmsIndexingThreadManager threadManager = getThreadManager();
3322
3323                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3324                        while (i.hasNext()) {
3325                            CmsSearchIndexUpdateData updateCollection = i.next();
3326                            if (updateCollection.hasResourceToUpdate()) {
3327                                updateCollection.getIndexer().updateResources(
3328                                    writer,
3329                                    threadManager,
3330                                    updateCollection.getResourcesToUpdate());
3331                            }
3332                        }
3333
3334                        // wait for indexing threads to finish
3335                        while (threadManager.isRunning()) {
3336                            try {
3337                                Thread.sleep(500);
3338                            } catch (InterruptedException e) {
3339                                // just continue with the loop after interruption
3340                                LOG.info(e.getLocalizedMessage(), e);
3341                            }
3342                        }
3343                    }
3344                } finally {
3345                    // close the index writer
3346                    if (writer != null) {
3347                        try {
3348                            writer.commit();
3349                        } catch (IOException e) {
3350                            LOG.error(
3351                                Messages.get().getBundle().key(
3352                                    Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3353                                    index.getName(),
3354                                    index.getPath()),
3355                                e);
3356                        }
3357                    }
3358                    // index has changed - initialize the index searcher instance
3359                    index.onIndexChanged(false);
3360                }
3361
3362                // output finish information on the report
3363                report.println(
3364                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()),
3365                    I_CmsReport.FORMAT_HEADLINE);
3366            }
3367        } finally {
3368            SEARCH_MANAGER_LOCK.unlock();
3369        }
3370    }
3371
3372    /**
3373     * Updates the offline search indexes for the given list of resources.<p>
3374     *
3375     * @param report the report to write the index information to
3376     * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
3377     */
3378    protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
3379
3380        CmsObject cms = m_adminCms;
3381        try {
3382            // copy the administration context for the indexing
3383            cms = OpenCms.initCmsObject(m_adminCms);
3384            // set site root and project for this index
3385            cms.getRequestContext().setSiteRoot("/");
3386        } catch (CmsException e) {
3387            LOG.error(e.getLocalizedMessage(), e);
3388        }
3389
3390        Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator();
3391        while (j.hasNext()) {
3392            I_CmsSearchIndex index = j.next();
3393            if (index.getSources() != null) {
3394                try {
3395                    // switch to the index project
3396                    cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3397                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3398                } catch (CmsException e) {
3399                    LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e);
3400                }
3401            }
3402        }
3403    }
3404
3405    /**
3406     * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p>
3407     *
3408     * @param adminCms the cms context
3409     * @param containerPages the containerpages
3410     * @param containerPage the container page site path
3411     */
3412    private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) {
3413
3414        if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) {
3415
3416            try {
3417                CmsResource detailRes = adminCms.readResource(
3418                    CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage),
3419                    CmsResourceFilter.IGNORE_EXPIRATION);
3420                containerPages.add(detailRes);
3421            } catch (Throwable e) {
3422                if (LOG.isWarnEnabled()) {
3423                    LOG.warn(e.getLocalizedMessage(), e);
3424                }
3425            }
3426        }
3427    }
3428
3429    /**
3430     * Creates the Solr core container.<p>
3431     *
3432     * @return the created core container
3433     */
3434    private CoreContainer createCoreContainer() {
3435
3436        CoreContainer container = null;
3437        try {
3438            // get the core container
3439            // still no core container: create it
3440            container = CoreContainer.createAndLoad(
3441                Paths.get(m_solrConfig.getHome()),
3442                m_solrConfig.getSolrFile().toPath());
3443            if (CmsLog.INIT.isInfoEnabled()) {
3444                CmsLog.INIT.info(
3445                    Messages.get().getBundle().key(
3446                        Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2,
3447                        m_solrConfig.getHome(),
3448                        m_solrConfig.getSolrFile().getName()));
3449            }
3450        } catch (Exception e) {
3451            LOG.error(
3452                Messages.get().getBundle().key(
3453                    Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1,
3454                    m_solrConfig.getSolrFile().getAbsolutePath()),
3455                e);
3456        }
3457        return container;
3458
3459    }
3460
3461    /**
3462     * Remove write.lock file in the data directory to ensure the index is unlocked.
3463     * @param dataDir the data directory of the Solr index that should be unlocked.
3464     */
3465    private void ensureIndexIsUnlocked(String dataDir) {
3466
3467        Collection<File> lockFiles = new ArrayList<File>(2);
3468        lockFiles.add(
3469            new File(
3470                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock"));
3471        lockFiles.add(
3472            new File(
3473                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck")
3474                    + "write.lock"));
3475        for (File lockFile : lockFiles) {
3476            if (lockFile.exists()) {
3477                lockFile.delete();
3478                LOG.warn(
3479                    "Forcely unlocking index with data dir \""
3480                        + dataDir
3481                        + "\" by removing file \""
3482                        + lockFile.getAbsolutePath()
3483                        + "\".");
3484            }
3485        }
3486    }
3487
3488    /**
3489     * Returns the report in the given event data, if <code>null</code>
3490     * a new log report is used.<p>
3491     *
3492     * @param event the event to get the report for
3493     *
3494     * @return the report
3495     */
3496    private I_CmsReport getEventReport(CmsEvent event) {
3497
3498        I_CmsReport report = null;
3499        if (event.getData() != null) {
3500            report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT);
3501        }
3502        if (report == null) {
3503            report = new CmsLogReport(Locale.ENGLISH, getClass());
3504        }
3505        return report;
3506    }
3507
3508    /**
3509     * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p>
3510     *
3511     * @param publishedResources a list of published resources
3512     *
3513     * @return the set of structure ids that satisfy the condition above
3514     */
3515    private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted(
3516        List<CmsPublishedResource> publishedResources) {
3517
3518        Set<CmsUUID> result = new HashSet<CmsUUID>();
3519        Set<CmsUUID> deletedSet = new HashSet<CmsUUID>();
3520        for (CmsPublishedResource pubRes : publishedResources) {
3521            if (pubRes.getState().isNew()) {
3522                result.add(pubRes.getStructureId());
3523            }
3524            if (pubRes.getState().isDeleted()) {
3525                deletedSet.add(pubRes.getStructureId());
3526            }
3527        }
3528        result.retainAll(deletedSet);
3529        return result;
3530    }
3531
3532    /**
3533     * Checks if the given type id belongs to a group type.
3534     *
3535     * @param type the type id to check
3536     * @return true if the type is a group type
3537     */
3538    private boolean isGroup(int type) {
3539
3540        for (String groupType : groupTypes) {
3541            if (OpenCms.getResourceManager().matchResourceType(groupType, type)) {
3542                return true;
3543            }
3544        }
3545        return false;
3546
3547    }
3548
3549    /**
3550     * Shuts down the Solr core container.<p>
3551     */
3552    private void shutDownSolrContainer() {
3553
3554        if (m_coreContainer != null) {
3555            for (SolrCore core : m_coreContainer.getCores()) {
3556                // do not unload spellcheck core because otherwise the core.properties file is removed
3557                // even when calling m_coreContainer.unload(core.getName(), false, false, false);
3558                if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) {
3559                    m_coreContainer.unload(core.getName(), false, false, true);
3560                }
3561            }
3562            m_coreContainer.shutdown();
3563            if (CmsLog.INIT.isInfoEnabled()) {
3564                CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0));
3565            }
3566            m_coreContainer = null;
3567        }
3568    }
3569
3570}