001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
031import org.opencms.configuration.CmsConfigurationException;
032import org.opencms.db.CmsDriverManager;
033import org.opencms.db.CmsPublishedResource;
034import org.opencms.db.CmsResourceState;
035import org.opencms.file.CmsObject;
036import org.opencms.file.CmsProject;
037import org.opencms.file.CmsResource;
038import org.opencms.file.CmsResourceFilter;
039import org.opencms.file.CmsUser;
040import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
041import org.opencms.file.types.CmsResourceTypeXmlContent;
042import org.opencms.file.types.I_CmsResourceType;
043import org.opencms.i18n.CmsLocaleManager;
044import org.opencms.i18n.CmsMessageContainer;
045import org.opencms.loader.CmsLoaderException;
046import org.opencms.loader.CmsResourceManager;
047import org.opencms.main.CmsBroadcast.ContentMode;
048import org.opencms.main.CmsEvent;
049import org.opencms.main.CmsException;
050import org.opencms.main.CmsIllegalArgumentException;
051import org.opencms.main.CmsIllegalStateException;
052import org.opencms.main.CmsLog;
053import org.opencms.main.I_CmsEventListener;
054import org.opencms.main.OpenCms;
055import org.opencms.main.OpenCmsSolrHandler;
056import org.opencms.relations.CmsRelation;
057import org.opencms.relations.CmsRelationFilter;
058import org.opencms.relations.CmsRelationType;
059import org.opencms.report.CmsLogReport;
060import org.opencms.report.CmsShellLogReport;
061import org.opencms.report.I_CmsReport;
062import org.opencms.scheduler.I_CmsScheduledJob;
063import org.opencms.search.documents.A_CmsVfsDocument;
064import org.opencms.search.documents.CmsExtractionResultCache;
065import org.opencms.search.documents.I_CmsDocumentFactory;
066import org.opencms.search.documents.I_CmsTermHighlighter;
067import org.opencms.search.fields.CmsLuceneField;
068import org.opencms.search.fields.CmsLuceneFieldConfiguration;
069import org.opencms.search.fields.CmsSearchField;
070import org.opencms.search.fields.CmsSearchFieldConfiguration;
071import org.opencms.search.fields.CmsSearchFieldMapping;
072import org.opencms.search.fields.I_CmsSearchFieldConfiguration;
073import org.opencms.search.solr.CmsSolrConfiguration;
074import org.opencms.search.solr.CmsSolrFieldConfiguration;
075import org.opencms.search.solr.CmsSolrIndex;
076import org.opencms.search.solr.I_CmsSolrIndexWriter;
077import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
078import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer;
079import org.opencms.security.CmsRole;
080import org.opencms.security.CmsRoleViolationException;
081import org.opencms.util.A_CmsModeStringEnumeration;
082import org.opencms.util.CmsFileUtil;
083import org.opencms.util.CmsStringUtil;
084import org.opencms.util.CmsUUID;
085import org.opencms.util.CmsWaitHandle;
086
087import java.io.File;
088import java.io.IOException;
089import java.nio.file.FileSystems;
090import java.nio.file.Paths;
091import java.util.ArrayList;
092import java.util.Collection;
093import java.util.Collections;
094import java.util.HashMap;
095import java.util.HashSet;
096import java.util.Iterator;
097import java.util.LinkedHashMap;
098import java.util.List;
099import java.util.ListIterator;
100import java.util.Locale;
101import java.util.Map;
102import java.util.Set;
103import java.util.TreeMap;
104import java.util.concurrent.locks.ReentrantLock;
105import java.util.stream.Collectors;
106
107import org.apache.commons.logging.Log;
108import org.apache.lucene.analysis.Analyzer;
109import org.apache.lucene.analysis.CharArraySet;
110import org.apache.lucene.analysis.standard.StandardAnalyzer;
111import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
112import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
113import org.apache.solr.core.CoreContainer;
114import org.apache.solr.core.CoreDescriptor;
115import org.apache.solr.core.SolrCore;
116
117/**
118 * Implements the general management and configuration of the search and
119 * indexing facilities in OpenCms.<p>
120 *
121 * @since 6.0.0
122 */
123public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener {
124
125    /**
126     *  Enumeration class for force unlock types.<p>
127     */
128    public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration {
129
130        /** Force unlock type "always". */
131        public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always");
132
133        /** Force unlock type "never". */
134        public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never");
135
136        /** Force unlock type "only full". */
137        public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull");
138
139        /** Serializable version id. */
140        private static final long serialVersionUID = 74746076708908673L;
141
142        /**
143         * Creates a new force unlock type with the given name.<p>
144         *
145         * @param mode the mode id to use
146         */
147        protected CmsSearchForceUnlockMode(String mode) {
148
149            super(mode);
150        }
151
152        /**
153         * Returns the lock type for the given type value.<p>
154         *
155         * @param type the type value to get the lock type for
156         *
157         * @return the lock type for the given type value
158         */
159        public static CmsSearchForceUnlockMode valueOf(String type) {
160
161            if (type.equals(ALWAYS.toString())) {
162                return ALWAYS;
163            } else if (type.equals(NEVER.toString())) {
164                return NEVER;
165            } else {
166                return ONLYFULL;
167            }
168        }
169    }
170
171    /**
172     * Handles offline index generation.<p>
173     */
174    protected class CmsSearchOfflineHandler implements I_CmsEventListener {
175
176        /** Indicates if the event handlers for the offline search have been already registered. */
177        private boolean m_isEventRegistered;
178
179        /** The list of resources to index. */
180        private List<CmsPublishedResource> m_resourcesToIndex;
181
182        /**
183         * Initializes the offline index handler.<p>
184         */
185        protected CmsSearchOfflineHandler() {
186
187            m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
188        }
189
190        /**
191         * Implements the event listener of this class.<p>
192         *
193         * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
194         */
195        @SuppressWarnings("unchecked")
196        public void cmsEvent(CmsEvent event) {
197
198            Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
199            switch (event.getType()) {
200                case I_CmsEventListener.EVENT_PROPERTY_MODIFIED:
201                case I_CmsEventListener.EVENT_RESOURCE_CREATED:
202                case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED:
203                case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
204                    if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
205                        // skip lock & unlock
206                        return;
207                    }
208                    // skip indexing if flag is set in event
209                    Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX);
210                    if (skip != null) {
211                        return;
212                    }
213
214                    // a resource has been modified - offline indexes require (re)indexing
215                    List<CmsResource> resources = Collections.singletonList(
216                        (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE));
217                    reIndexResources(resources);
218                    break;
219                case I_CmsEventListener.EVENT_RESOURCE_DELETED:
220                    List<CmsResource> eventResources = (List<CmsResource>)event.getData().get(
221                        I_CmsEventListener.KEY_RESOURCES);
222                    List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources);
223                    for (CmsResource res : resourcesToDelete) {
224                        if (res.getState().isNew()) {
225                            // if the resource is new and a delete action was performed
226                            // --> set the state of the resource to deleted
227                            res.setState(CmsResourceState.STATE_DELETED);
228                        }
229                    }
230                    reIndexResources(resourcesToDelete);
231                    break;
232                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED:
233                    if (I_CmsEventListener.VALUE_CREATE_SIBLING.equals(change)) {
234                        List<CmsResource> resList = (List<CmsResource>)event.getData().get(
235                            I_CmsEventListener.KEY_RESOURCES);
236                        if ((resList != null) && (resList.size() >= 3)) {
237                            System.out.println("Sibling creation case, resource = " + resList.get(1).getRootPath());
238                            reIndexResources(Collections.singletonList(resList.get(1)));
239
240                        }
241                    } else {
242                        reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
243                    }
244                    break;
245                case I_CmsEventListener.EVENT_RESOURCE_MOVED:
246                case I_CmsEventListener.EVENT_RESOURCE_COPIED:
247                case I_CmsEventListener.EVENT_RESOURCES_MODIFIED:
248
249                    // a list of resources has been modified - offline indexes require (re)indexing
250                    reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
251                    break;
252                default:
253                    // no operation
254            }
255        }
256
257        /**
258         * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p>
259         *
260         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed
261         */
262        protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) {
263
264            m_resourcesToIndex.addAll(resourcesToIndex);
265        }
266
267        /**
268         * Returns the list of {@link CmsPublishedResource} objects to index.<p>
269         *
270         * @return the resources to index
271         */
272        protected List<CmsPublishedResource> getResourcesToIndex() {
273
274            List<CmsPublishedResource> result;
275            synchronized (this) {
276                result = m_resourcesToIndex;
277                m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
278            }
279            try {
280                CmsObject cms = m_adminCms;
281                CmsProject offline = getOfflineIndexProject();
282                if (offline != null) {
283                    // switch to the offline project if available
284                    cms = OpenCms.initCmsObject(m_adminCms);
285                    cms.getRequestContext().setCurrentProject(offline);
286                }
287                addAdditionallyAffectedResources(cms, result);
288            } catch (CmsException e) {
289                LOG.error(e.getLocalizedMessage(), e);
290            }
291            return result;
292        }
293
294        /**
295         * Initializes this offline search handler, registering the event handlers if required.<p>
296         */
297        protected void initialize() {
298
299            if (m_offlineIndexes.size() > 0) {
300                // there is at least one offline index configured
301                if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) {
302                    // create the offline indexing thread
303                    m_offlineIndexThread = new CmsSearchOfflineIndexThread(this);
304                    // start the offline index thread
305                    m_offlineIndexThread.start();
306                }
307            } else {
308                if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
309                    // no offline indexes but thread still running, stop the thread
310                    m_offlineIndexThread.shutDown();
311                    m_offlineIndexThread = null;
312                }
313            }
314            // do this only in case there are offline indexes configured
315            if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) {
316                m_isEventRegistered = true;
317                // register this object as event listener
318                OpenCms.addCmsEventListener(
319                    this,
320                    new int[] {
321                        I_CmsEventListener.EVENT_PROPERTY_MODIFIED,
322                        I_CmsEventListener.EVENT_RESOURCE_CREATED,
323                        I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED,
324                        I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
325                        I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED,
326                        I_CmsEventListener.EVENT_RESOURCE_MOVED,
327                        I_CmsEventListener.EVENT_RESOURCE_DELETED,
328                        I_CmsEventListener.EVENT_RESOURCE_COPIED,
329                        I_CmsEventListener.EVENT_RESOURCES_MODIFIED});
330            }
331        }
332
333        /**
334         * Updates all offline indexes for the given list of {@link CmsResource} objects.<p>
335         *
336         * @param resources a list of {@link CmsResource} objects to update in the offline indexes
337         */
338        protected synchronized void reIndexResources(List<CmsResource> resources) {
339
340            List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size());
341            for (CmsResource res : resources) {
342                CmsPublishedResource pubRes = new CmsPublishedResource(res);
343                resourcesToIndex.add(pubRes);
344            }
345            if (resourcesToIndex.size() > 0) {
346                // add the resources found to the offline index thread
347                addResourcesToIndex(resourcesToIndex);
348            }
349        }
350    }
351
352    /**
353     * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p>
354     */
355    protected class CmsSearchOfflineIndexThread extends Thread {
356
357        /** The event handler that triggers this thread. */
358        CmsSearchOfflineHandler m_handler;
359
360        /** Indicates if this thread is still alive. */
361        boolean m_isAlive;
362
363        /** Indicates that an index update thread is currently running. */
364        private boolean m_isUpdating;
365
366        /** If true a manual update (after file upload) was triggered. */
367        private boolean m_updateTriggered;
368
369        /** The wait handle used for signalling when the worker thread has finished. */
370        private CmsWaitHandle m_waitHandle = new CmsWaitHandle();
371
372        /**
373         * Constructor.<p>
374         *
375         * @param handler the offline index event handler
376         */
377        protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) {
378
379            super("OpenCms: Offline Search Indexer");
380            m_handler = handler;
381        }
382
383        /**
384         * Gets the wait handle used for signalling when the worker thread has finished.
385         *
386         * @return the wait handle
387         **/
388        public CmsWaitHandle getWaitHandle() {
389
390            return m_waitHandle;
391        }
392
393        /**
394         * @see java.lang.Thread#interrupt()
395         */
396        @Override
397        public void interrupt() {
398
399            super.interrupt();
400            m_updateTriggered = true;
401        }
402
403        /**
404         * @see java.lang.Thread#run()
405         */
406        @Override
407        public void run() {
408
409            // create a log report for the output
410            I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class);
411            long offlineUpdateFrequency = getOfflineUpdateFrequency();
412            m_updateTriggered = false;
413            try {
414                while (m_isAlive) {
415                    if (!m_updateTriggered) {
416                        try {
417                            sleep(offlineUpdateFrequency);
418                        } catch (InterruptedException e) {
419                            // continue the thread after interruption
420                            if (!m_isAlive) {
421                                // the thread has been shut down while sleeping
422                                continue;
423                            }
424                            if (offlineUpdateFrequency != getOfflineUpdateFrequency()) {
425                                // offline update frequency change - clear interrupt status
426                                offlineUpdateFrequency = getOfflineUpdateFrequency();
427                            }
428                            LOG.info(e.getLocalizedMessage(), e);
429                        }
430                    }
431                    if (m_isAlive) {
432                        // set update trigger to false since we do the update now
433                        m_updateTriggered = false;
434                        // get list of resource to update
435                        List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex();
436                        if (resourcesToIndex.size() > 0) {
437                            // only start indexing if there is at least one resource
438                            startOfflineUpdateThread(report, resourcesToIndex);
439                        } else {
440                            getWaitHandle().release();
441                        }
442                        // this is just called to clear the interrupt status of the thread
443                        interrupted();
444                    }
445                }
446            } finally {
447                // make sure that live status is reset in case of Exceptions
448                m_isAlive = false;
449            }
450
451        }
452
453        /**
454         * @see java.lang.Thread#start()
455         */
456        @Override
457        public synchronized void start() {
458
459            m_isAlive = true;
460            super.start();
461        }
462
463        /**
464         * Obtains the list of resource to update in the offline index,
465         * then optimizes the list by removing duplicate entries.<p>
466         *
467         * @return the list of resource to update in the offline index
468         */
469        protected List<CmsPublishedResource> getResourcesToIndex() {
470
471            List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex();
472            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size());
473
474            // Reverse to always keep the last list entries
475            Collections.reverse(resourcesToIndex);
476            for (CmsPublishedResource pubRes : resourcesToIndex) {
477                boolean addResource = true;
478                for (CmsPublishedResource resRes : result) {
479                    if (pubRes.equals(resRes)
480                        && (pubRes.getState() == resRes.getState())
481                        && (pubRes.getMovedState() == resRes.getMovedState())
482                        && pubRes.getRootPath().equals(resRes.getRootPath())) {
483                        // resource already in the update list
484                        addResource = false;
485                        break;
486                    }
487                }
488                if (addResource) {
489                    result.add(pubRes);
490                }
491
492            }
493            Collections.reverse(result);
494            return changeStateOfMoveOriginsToDeleted(result);
495        }
496
497        /**
498         * Shuts down this offline index thread.<p>
499         */
500        protected void shutDown() {
501
502            m_isAlive = false;
503            interrupt();
504            if (m_isUpdating) {
505                long waitTime = getOfflineUpdateFrequency() / 2;
506                int waitSteps = 0;
507                do {
508                    try {
509                        // wait half the time of the offline index frequency for the thread to finish
510                        Thread.sleep(waitTime);
511                    } catch (InterruptedException e) {
512                        // continue
513                        LOG.info(e.getLocalizedMessage(), e);
514                    }
515                    waitSteps++;
516                    // wait 5 times then stop waiting
517                } while ((waitSteps < 5) && m_isUpdating);
518            }
519        }
520
521        /**
522         * Updates the offline search indexes for the given list of resources.<p>
523         *
524         * @param report the report to write the index information to
525         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
526         */
527        protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
528
529            CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex);
530            long startTime = System.currentTimeMillis();
531            long waitTime = getOfflineUpdateFrequency() / 2;
532            if (LOG.isDebugEnabled()) {
533                LOG.debug(
534                    Messages.get().getBundle().key(
535                        Messages.LOG_OI_UPDATE_START_1,
536                        Integer.valueOf(resourcesToIndex.size())));
537            }
538
539            m_isUpdating = true;
540            thread.start();
541
542            do {
543                try {
544                    // wait half the time of the offline index frequency for the thread to finish
545                    thread.join(waitTime);
546                } catch (InterruptedException e) {
547                    // continue
548                    LOG.info(e.getLocalizedMessage(), e);
549                }
550                if (thread.isAlive()) {
551                    LOG.warn(
552                        Messages.get().getBundle().key(
553                            Messages.LOG_OI_UPDATE_LONG_2,
554                            Integer.valueOf(resourcesToIndex.size()),
555                            Long.valueOf(System.currentTimeMillis() - startTime)));
556                }
557            } while (thread.isAlive());
558            m_isUpdating = false;
559
560            if (LOG.isDebugEnabled()) {
561                LOG.debug(
562                    Messages.get().getBundle().key(
563                        Messages.LOG_OI_UPDATE_FINISH_2,
564                        Integer.valueOf(resourcesToIndex.size()),
565                        Long.valueOf(System.currentTimeMillis() - startTime)));
566            }
567        }
568
569        /**
570         * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'.
571         * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index,
572         *
573         * @param resourcesToIndex the resources to index
574         *
575         * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths
576         */
577        private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted(
578            List<CmsPublishedResource> resourcesToIndex) {
579
580            Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>();
581            for (CmsPublishedResource resource : resourcesToIndex) {
582                if (resource.getState().isDeleted()) {
583                    // we don't want the last path to be from a deleted resource
584                    continue;
585                }
586                lastValidPaths.put(resource.getStructureId(), resource.getRootPath());
587            }
588            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>();
589            for (CmsPublishedResource resource : resourcesToIndex) {
590                if (resource.getState().isDeleted()) {
591                    result.add(resource);
592                    continue;
593                }
594                String lastValidPath = lastValidPaths.get(resource.getStructureId());
595                if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) {
596                    result.add(resource);
597                } else {
598                    result.add(
599                        new CmsPublishedResource(
600                            resource.getStructureId(),
601                            resource.getResourceId(),
602                            resource.getPublishTag(),
603                            resource.getRootPath(),
604                            resource.getType(),
605                            resource.isFolder(),
606                            CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted
607                            resource.getSiblingCount()));
608                }
609            }
610            return result;
611        }
612    }
613
614    /**
615     * An offline index worker Thread runs each time for every offline index update action.<p>
616     *
617     * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid
618     * problems if a single operation "hangs" the Tread.<p>
619     */
620    protected class CmsSearchOfflineIndexWorkThread extends Thread {
621
622        /** The report to write the index information to. */
623        I_CmsReport m_report;
624
625        /** The list of {@link CmsPublishedResource} objects to index. */
626        List<CmsPublishedResource> m_resourcesToIndex;
627
628        /**
629         * Updates the offline search indexes for the given list of resources.<p>
630         *
631         * @param report the report to write the index information to
632         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
633         */
634        protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
635
636            super("OpenCms: Offline Search Index Worker");
637            m_report = report;
638            m_resourcesToIndex = resourcesToIndex;
639        }
640
641        /**
642         * @see java.lang.Thread#run()
643         */
644        @Override
645        public void run() {
646
647            updateIndexOffline(m_report, m_resourcesToIndex);
648            if (m_offlineIndexThread != null) {
649                m_offlineIndexThread.getWaitHandle().release();
650            }
651        }
652    }
653
654    /** This needs to be a fair lock to preserve order of threads accessing the search manager. */
655    private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true);
656
657    /** The default value used for generating search result excerpts (1024 chars). */
658    public static final int DEFAULT_EXCERPT_LENGTH = 1024;
659
660    /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
661    public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
662
663    /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */
664    public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500;
665
666    /** The default update frequency for offline indexes (15000 msec = 15 sec). */
667    public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000;
668
669    /** The default maximal wait time for re-indexing after editing a content. */
670    public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000;
671
672    /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
673    public static final int DEFAULT_TIMEOUT = 60000;
674
675    /** Scheduler parameter: Update only a specified list of indexes. */
676    public static final String JOB_PARAM_INDEXLIST = "indexList";
677
678    /** Scheduler parameter: Write the output of the update to the logfile. */
679    public static final String JOB_PARAM_WRITELOG = "writeLog";
680
681    /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */
682    public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core.";
683
684    /** The log object for this class. */
685    protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class);
686
687    /** List of resource types which represent groups of elements. */
688    private static final String[] groupTypes = {
689        CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME,
690        CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME,
691        CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME};
692
693    /** The administrator OpenCms user context to access OpenCms VFS resources. */
694    protected CmsObject m_adminCms;
695
696    /** The list of indexes that are configured for offline index mode. */
697    protected List<I_CmsSearchIndex> m_offlineIndexes;
698
699    /** The thread used of offline indexing. */
700    protected CmsSearchOfflineIndexThread m_offlineIndexThread;
701
702    /** Configured analyzers for languages using &lt;analyzer&gt;. */
703    private HashMap<Locale, CmsSearchAnalyzer> m_analyzers;
704
705    /** Stores the offline update frequency while indexing is paused. */
706    private long m_configuredOfflineIndexingFrequency;
707
708    /** The Solr core container. */
709    private CoreContainer m_coreContainer;
710
711    /** A map of document factory configurations. */
712    private List<CmsSearchDocumentType> m_documentTypeConfigs;
713
714    /** A map of document factories keyed first by their name and then by their extraction keys. */
715    private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes;
716
717    /** The set of all globally available extraction keys for document factories. */
718    private Set<String> m_extractionKeys;
719
720    /** The max age for extraction results to remain in the cache. */
721    private float m_extractionCacheMaxAge;
722
723    /** The cache for the extraction results. */
724    private CmsExtractionResultCache m_extractionResultCache;
725
726    /** Contains the available field configurations. */
727    private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations;
728
729    /** The force unlock type. */
730    private CmsSearchForceUnlockMode m_forceUnlockMode;
731
732    /** The class used to highlight the search terms in the excerpt of a search result. */
733    private I_CmsTermHighlighter m_highlighter;
734
735    /** A list of search indexes. */
736    private List<I_CmsSearchIndex> m_indexes;
737
738    /** Seconds to wait for an index lock. */
739    private int m_indexLockMaxWaitSeconds = 10;
740
741    /** Configured index sources. */
742    private Map<String, CmsSearchIndexSource> m_indexSources;
743
744    /** The max. char. length of the excerpt in the search result. */
745    private int m_maxExcerptLength;
746
747    /** The maximum number of modifications before a commit in the search index is triggered. */
748    private int m_maxModificationsBeforeCommit;
749
750    /** The offline index search handler. */
751    private CmsSearchOfflineHandler m_offlineHandler;
752
753    /** The update frequency of the offline indexer in milliseconds. */
754    private long m_offlineUpdateFrequency;
755
756    /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */
757    private long m_maxIndexWaitTime;
758
759    /** Path to index files below WEB-INF/. */
760    private String m_path;
761
762    /** The Solr configuration. */
763    private CmsSolrConfiguration m_solrConfig;
764
765    /** Timeout for abandoning indexing thread. */
766    private long m_timeout;
767
768    /**
769     * Default constructor when called as cron job.<p>
770     */
771    public CmsSearchManager() {
772
773        m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>();
774        m_extractionKeys = new HashSet<String>();
775        m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>();
776        m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>();
777        m_indexes = new ArrayList<I_CmsSearchIndex>();
778        m_indexSources = new TreeMap<String, CmsSearchIndexSource>();
779        m_offlineHandler = new CmsSearchOfflineHandler();
780        m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
781        m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
782        m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY;
783        m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME;
784        m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT;
785
786        m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>();
787        // make sure we have a "standard" field configuration
788        addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD);
789
790        if (CmsLog.INIT.isInfoEnabled()) {
791            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0));
792        }
793    }
794
795    /**
796     * Returns an analyzer for the given class name.<p>
797     *
798     * @param className the class name of the analyzer
799     *
800     * @return the appropriate lucene analyzer
801     *
802     * @throws Exception if something goes wrong
803     */
804    public static Analyzer getAnalyzer(String className) throws Exception {
805
806        Analyzer analyzer = null;
807        Class<?> analyzerClass;
808        try {
809            analyzerClass = Class.forName(className);
810        } catch (ClassNotFoundException e) {
811            // allow Lucene standard classes to be written in a short form
812            analyzerClass = Class.forName(LUCENE_ANALYZER + className);
813        }
814
815        // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor
816        if (StandardAnalyzer.class.equals(analyzerClass)) {
817            // the Lucene standard analyzer is used - but without any stopwords.
818            analyzer = new StandardAnalyzer(new CharArraySet(0, false));
819        } else {
820            analyzer = (Analyzer)analyzerClass.newInstance();
821        }
822        return analyzer;
823    }
824
825    /**
826     * Returns the Solr index configured with the parameters name.
827     * The parameters must contain a key/value pair with an existing
828     * Solr index, otherwise <code>null</code> is returned.<p>
829     *
830     * @param cms the current context
831     * @param params the parameter map
832     *
833     * @return the best matching Solr index
834     */
835    public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) {
836
837        String indexName = null;
838        CmsSolrIndex index = null;
839        // try to get the index name from the parameters: 'core' or 'index'
840        if (params != null) {
841            indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null
842            ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0]
843            : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null
844            ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0]
845            : null);
846        }
847        if (indexName == null) {
848            // if no parameter is specified try to use the default online/offline indexes by context
849            indexName = cms.getRequestContext().getCurrentProject().isOnlineProject()
850            ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE
851            : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE;
852        }
853        // try to get the index
854        index = OpenCms.getSearchManager().getIndexSolr(indexName);
855        if (index == null) {
856            // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice.
857            List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes();
858            if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) {
859                index = solrs.get(0);
860            }
861        }
862        return index;
863    }
864
865    /**
866     * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p>
867     *
868     * @param indexName the name of the index to check
869     *
870     * @return <code>true</code> if the index for the given name is a Lucene index
871     */
872    public static boolean isLuceneIndex(String indexName) {
873
874        I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName);
875        return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex));
876    }
877
878    /**
879     * Adds an analyzer.<p>
880     *
881     * @param analyzer an analyzer
882     */
883    public void addAnalyzer(CmsSearchAnalyzer analyzer) {
884
885        m_analyzers.put(analyzer.getLocale(), analyzer);
886
887        if (CmsLog.INIT.isInfoEnabled()) {
888            CmsLog.INIT.info(
889                Messages.get().getBundle().key(
890                    Messages.INIT_ADD_ANALYZER_2,
891                    analyzer.getLocale(),
892                    analyzer.getClassName()));
893        }
894    }
895
896    /**
897     * Adds a document type.<p>
898     *
899     * @param documentType a document type
900     */
901    public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
902
903        m_documentTypeConfigs.add(documentType);
904
905        if (CmsLog.INIT.isInfoEnabled()) {
906            CmsLog.INIT.info(
907                Messages.get().getBundle().key(
908                    Messages.INIT_SEARCH_DOC_TYPES_2,
909                    documentType.getName(),
910                    documentType.getClassName()));
911        }
912    }
913
914    /**
915     * Adds a search field configuration to the search manager.<p>
916     *
917     * @param fieldConfiguration the search field configuration to add
918     */
919    public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) {
920
921        m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration);
922    }
923
924    /**
925     * Adds a search index to the configuration.<p>
926     *
927     * @param searchIndex the search index to add
928     */
929    public void addSearchIndex(I_CmsSearchIndex searchIndex) {
930
931        if (!searchIndex.isInitialized()) {
932            if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
933                try {
934                    searchIndex.initialize();
935                } catch (CmsException e) {
936                    // should never happen
937                    LOG.error(e.getMessage(), e);
938                }
939            }
940        }
941
942        // name: not null or emtpy and unique
943        String name = searchIndex.getName();
944        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
945            throw new CmsIllegalArgumentException(
946                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
947        }
948        if (m_indexSources.keySet().contains(name)) {
949            throw new CmsIllegalArgumentException(
950                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
951        }
952
953        m_indexes.add(searchIndex);
954        if (m_adminCms != null) {
955            initOfflineIndexes();
956        }
957
958        if (CmsLog.INIT.isInfoEnabled()) {
959            CmsLog.INIT.info(
960                Messages.get().getBundle().key(
961                    Messages.INIT_ADD_SEARCH_INDEX_2,
962                    searchIndex.getName(),
963                    searchIndex.getProject()));
964        }
965    }
966
967    /**
968     * Adds a search index source configuration.<p>
969     *
970     * @param searchIndexSource a search index source configuration
971     */
972    public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) {
973
974        m_indexSources.put(searchIndexSource.getName(), searchIndexSource);
975
976        if (CmsLog.INIT.isInfoEnabled()) {
977            CmsLog.INIT.info(
978                Messages.get().getBundle().key(
979                    Messages.INIT_SEARCH_INDEX_SOURCE_2,
980                    searchIndexSource.getName(),
981                    searchIndexSource.getIndexerClassName()));
982        }
983    }
984
985    /**
986     * Implements the event listener of this class.<p>
987     *
988     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
989     */
990    public void cmsEvent(CmsEvent event) {
991
992        switch (event.getType()) {
993            case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
994                List<String> indexNames = null;
995                if ((event.getData() != null)
996                    && CmsStringUtil.isNotEmptyOrWhitespaceOnly(
997                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) {
998                    indexNames = CmsStringUtil.splitAsList(
999                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES),
1000                        ",",
1001                        true);
1002                }
1003                try {
1004                    if (LOG.isDebugEnabled()) {
1005                        LOG.debug(
1006                            Messages.get().getBundle().key(
1007                                Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
1008                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1009                            new Exception());
1010                    }
1011                    if (indexNames == null) {
1012                        rebuildAllIndexes(getEventReport(event));
1013                    } else {
1014                        rebuildIndexes(indexNames, getEventReport(event));
1015                    }
1016                } catch (CmsException e) {
1017                    if (LOG.isErrorEnabled()) {
1018                        LOG.error(
1019                            Messages.get().getBundle().key(
1020                                Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
1021                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1022                            e);
1023                    }
1024                }
1025                break;
1026            case I_CmsEventListener.EVENT_CLEAR_CACHES:
1027                if (LOG.isDebugEnabled()) {
1028                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception());
1029                }
1030                break;
1031            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
1032                // event data contains a list of the published resources
1033                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
1034                if (LOG.isDebugEnabled()) {
1035                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId));
1036                }
1037                updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event));
1038                if (LOG.isDebugEnabled()) {
1039                    LOG.debug(
1040                        Messages.get().getBundle().key(
1041                            Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
1042                            publishHistoryId));
1043                }
1044                break;
1045            case I_CmsEventListener.EVENT_REINDEX_OFFLINE:
1046            case I_CmsEventListener.EVENT_REINDEX_ONLINE:
1047                boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType();
1048                Map<String, Object> eventData = event.getData();
1049                CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID);
1050                CmsUser user = null;
1051                try {
1052                    user = m_adminCms.readUser(userId);
1053                } catch (Throwable t) {
1054                    // should never happen
1055                }
1056                try {
1057                    SEARCH_MANAGER_LOCK.lock();
1058                    if (LOG.isDebugEnabled()) {
1059                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0));
1060                    }
1061                    CmsObject cms = m_adminCms;
1062                    if (!isOnline) {
1063                        OpenCms.initCmsObject(m_adminCms);
1064                        cms.getRequestContext().setCurrentProject(
1065                            cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID)));
1066                    }
1067                    @SuppressWarnings("unchecked")
1068                    List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES);
1069                    I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT);
1070                    List<CmsResource> resourcesToIndex = new ArrayList<>();
1071                    for (CmsResource res : resources) {
1072                        if (res.isFile()) {
1073                            resourcesToIndex.add(res);
1074                        } else {
1075                            try {
1076                                resourcesToIndex.addAll(
1077                                    cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true));
1078                            } catch (CmsException e) {
1079                                LOG.error(e, e);
1080                            }
1081                        }
1082                    }
1083                    // we reindex and prevent using cached results
1084                    cleanExtractionCache();
1085                    List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map(
1086                        res -> new CmsPublishedResource(res)).collect(Collectors.toList());
1087                    if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) {
1088                        addAdditionallyAffectedResources(cms, publishedResourcesToIndex);
1089                    }
1090                    if (isOnline) {
1091                        updateAllIndexes(
1092                            m_adminCms,
1093                            publishedResourcesToIndex,
1094                            new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE));
1095                    } else {
1096                        updateIndexOffline(report, publishedResourcesToIndex);
1097                    }
1098                    cms = null;
1099                    SEARCH_MANAGER_LOCK.unlock();
1100                    if (null != user) {
1101                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1102                        OpenCms.getSessionManager().sendBroadcast(
1103                            null,
1104                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0),
1105                            user,
1106                            ContentMode.html);
1107                    }
1108                    if (LOG.isDebugEnabled()) {
1109                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0));
1110                    }
1111
1112                } catch (Throwable e) {
1113                    if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) {
1114                        SEARCH_MANAGER_LOCK.unlock();
1115                    }
1116                    if (null != user) {
1117                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1118                        OpenCms.getSessionManager().sendBroadcast(
1119                            null,
1120                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0),
1121                            user,
1122                            ContentMode.html);
1123                    }
1124                    if (LOG.isDebugEnabled()) {
1125                        LOG.error(
1126                            Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()),
1127                            e);
1128                    } else if (LOG.isErrorEnabled()) {
1129                        LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()));
1130                    }
1131                }
1132                break;
1133            default:
1134                // no operation
1135        }
1136    }
1137
1138    /**
1139     * Returns all Solr index.<p>
1140     *
1141     * @return all Solr indexes
1142     */
1143    public List<CmsSolrIndex> getAllSolrIndexes() {
1144
1145        List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>();
1146        for (String indexName : getIndexNames()) {
1147            CmsSolrIndex index = getIndexSolr(indexName);
1148            if (index != null) {
1149                result.add(index);
1150            }
1151        }
1152        return result;
1153    }
1154
1155    /**
1156     * Returns an analyzer for the given language.<p>
1157     *
1158     * The analyzer is selected according to the analyzer configuration.<p>
1159     *
1160     * @param locale the locale to get the analyzer for
1161     * @return the appropriate lucene analyzer
1162     *
1163     * @throws CmsSearchException if something goes wrong
1164     */
1165    public Analyzer getAnalyzer(Locale locale) throws CmsSearchException {
1166
1167        Analyzer analyzer = null;
1168        String className = null;
1169
1170        CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale);
1171        if (analyzerConf == null) {
1172            throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1173        }
1174
1175        try {
1176            analyzer = getAnalyzer(analyzerConf.getClassName());
1177        } catch (Exception e) {
1178            throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e);
1179        }
1180
1181        return analyzer;
1182    }
1183
1184    /**
1185     * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
1186     *
1187     * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
1188     *
1189     * @return an unmodifiable view of the Analyzers Map
1190     */
1191    public Map<Locale, CmsSearchAnalyzer> getAnalyzers() {
1192
1193        return Collections.unmodifiableMap(m_analyzers);
1194    }
1195
1196    /**
1197     * Returns the search analyzer for the given locale.<p>
1198     *
1199     * @param locale the locale to get the analyzer for
1200     *
1201     * @return the search analyzer for the given locale
1202     */
1203    public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
1204
1205        return m_analyzers.get(locale);
1206    }
1207
1208    /**
1209     * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1210     *
1211     * @return the name of the directory below WEB-INF/ where the search indexes are stored
1212     */
1213    public String getDirectory() {
1214
1215        return m_path;
1216    }
1217
1218    /**
1219     * Returns the configured Solr home directory <code>null</code> if not set.<p>
1220     *
1221     * @return the Solr home directory
1222     */
1223    public String getDirectorySolr() {
1224
1225        return m_solrConfig != null ? m_solrConfig.getHome() : null;
1226    }
1227
1228    /**
1229     * Returns the document factory configured under the provided name.
1230     * @param docTypeName the name of the document type.
1231     * @return the factory for the provided name.
1232     */
1233    public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) {
1234
1235        Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName);
1236        if (factoryMap != null) {
1237            Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator();
1238            if (factoryIt.hasNext()) {
1239                return factoryMap.values().iterator().next();
1240            }
1241        }
1242        return null;
1243    }
1244
1245    /**
1246     * Returns a document type config.<p>
1247     *
1248     * @param name the name of the document type config
1249     * @return the document type config.
1250     */
1251    public CmsSearchDocumentType getDocumentTypeConfig(String name) {
1252
1253        // this is really used only for the search manager GUI,
1254        // so performance is not an issue and no lookup map is generated
1255        for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
1256            CmsSearchDocumentType type = m_documentTypeConfigs.get(i);
1257            if (type.getName().equals(name)) {
1258                return type;
1259            }
1260        }
1261        return null;
1262    }
1263
1264    /**
1265     * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
1266     *
1267     * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
1268     */
1269    public List<CmsSearchDocumentType> getDocumentTypeConfigs() {
1270
1271        return Collections.unmodifiableList(m_documentTypeConfigs);
1272    }
1273
1274    /**
1275     * Returns the document type keys used to specify the correct document factory.
1276     *
1277     * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys.
1278     *
1279     * @param resource the resource to generate the list of document type keys for.
1280     * @return the document type keys.
1281     */
1282    public List<String> getDocumentTypeKeys(CmsResource resource) {
1283
1284        // first get the MIME type of the resource
1285        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown");
1286        String resourceType = null;
1287        try {
1288            resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName();
1289        } catch (CmsLoaderException e) {
1290            // ignore, unknown resource type, resource can not be indexed
1291            LOG.info(e.getLocalizedMessage(), e);
1292        }
1293        return getDocumentTypeKeys(resourceType, mimeType);
1294    }
1295
1296    /**
1297     * Returns the document type keys used to specify the correct document factory.
1298     * One resource typically has more than one key. The document factories are matched
1299     * in the provided order and the first matching factory is used.
1300     *
1301     * The keys for type name "typename" and mimetype "mimetype" would be a subset of:
1302     * <ul>
1303     *  <li><code>typename_mimetype</code></li>
1304     *  <li><code>typename</code></li>
1305     *  <li>if <code>typename</code> is a sub-type of <code>containerpage</code>
1306     *      <ul>
1307     *          <li><code>containerpage_mimetype</code></li>
1308     *          <li><code>containerpage</code></li>
1309     *      </ul>
1310     *  </li>
1311     *  <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code>
1312     *      <ul>
1313     *          <li><code>xmlcontent_mimetype</code></li>
1314     *          <li><code>xmlcontent</code></li>
1315     *      </ul>
1316     *  </li>
1317     *  <li><code>__unconfigured___mimetype</code></li>
1318     *  <li><code>__unconfigured__</code></li>
1319     *  <li><code>__all___mimetype</code></li>
1320     *  <li><code>__all__</code></li>
1321     * <ul>
1322     * Note that all keys except the "__all__"-keys are only added as long as globally
1323     * there is no matching factory for the key.
1324     * This in particular means that a factory matching "typename" will never be used
1325     * if you have a factory for "typename__mimetype" - even if this is not configured
1326     * for the used index source. Eventually, the content will not be indexed in such cases.
1327     * @param resourceType the resource type to generate the list of document type keys for.
1328     * @param mimeType the mime type to generate the list of document type keys for.
1329     * @return the document type keys.
1330     */
1331    public List<String> getDocumentTypeKeys(String resourceType, String mimeType) {
1332
1333        List<String> result = new ArrayList<>(8);
1334        if (null != resourceType) {
1335            String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType);
1336            result.add(currentKey);
1337            if (!m_extractionKeys.contains(currentKey)) {
1338                currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null);
1339                result.add(currentKey);
1340                if (!m_extractionKeys.contains(currentKey)) {
1341                    boolean hasGlobalMatch = false;
1342                    try {
1343                        String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName();
1344                        I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType);
1345                        if (!resourceType.equals(containerpageTypeName)) {
1346                            if (type instanceof CmsResourceTypeXmlContainerPage) {
1347                                if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) {
1348                                    currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType);
1349                                    result.add(currentKey);
1350                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1351                                    if (!hasGlobalMatch) {
1352                                        currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null);
1353                                        result.add(currentKey);
1354                                        hasGlobalMatch = m_extractionKeys.contains(currentKey);
1355                                    }
1356                                }
1357                            }
1358                        }
1359                        String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName();
1360                        if (!resourceType.equals(containerpageTypeName)) {
1361                            if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) {
1362                                currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType);
1363                                result.add(currentKey);
1364                                hasGlobalMatch = m_extractionKeys.contains(currentKey);
1365                                if (!hasGlobalMatch) {
1366                                    currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null);
1367                                    result.add(currentKey);
1368                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1369                                }
1370                            }
1371                        }
1372                    } catch (Throwable t) {
1373                        LOG.warn("Could not read type for name \"" + resourceType + "\".", t);
1374                    }
1375                    if (!hasGlobalMatch) {
1376                        result.add(
1377                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType));
1378                        result.add(
1379                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null));
1380                    }
1381                }
1382            }
1383            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType));
1384            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null));
1385        }
1386        return result;
1387
1388    }
1389
1390    /**
1391     * Returns the map from document type keys to document factories with all entries for the provided document type names.
1392     * @param documentTypeNames list of document type names to generate the map for.
1393     * @return the map from document type keys to document factories.
1394     */
1395    public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) {
1396
1397        Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>();
1398        if (null != documentTypeNames) {
1399            // Iterate the list in reverse order to prefer factories that are added by document types listed earlier.
1400            ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size());
1401            while (typesIterator.hasPrevious()) {
1402                Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous());
1403                if (null != factories) {
1404                    result.putAll(factories);
1405                }
1406            }
1407        }
1408        return result;
1409    }
1410
1411    /**
1412     * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
1413     *
1414     * @return the maximum age a text extraction result is kept in the cache (in hours)
1415     */
1416    public float getExtractionCacheMaxAge() {
1417
1418        return m_extractionCacheMaxAge;
1419    }
1420
1421    /**
1422     * Returns the search field configuration with the given name.<p>
1423     *
1424     * In case no configuration is available with the given name, <code>null</code> is returned.<p>
1425     *
1426     * @param name the name to get the search field configuration for
1427     *
1428     * @return the search field configuration with the given name
1429     */
1430    public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) {
1431
1432        return m_fieldConfigurations.get(name);
1433    }
1434
1435    /**
1436     * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p>
1437     *
1438     * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries
1439     */
1440    public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() {
1441
1442        List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>(
1443            m_fieldConfigurations.values());
1444        Collections.sort(result);
1445        return Collections.unmodifiableList(result);
1446    }
1447
1448    /**
1449     * Returns the Lucene search field configurations only.<p>
1450     *
1451     * @return the Lucene search field configurations
1452     */
1453    public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() {
1454
1455        List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>();
1456        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1457            if (conf instanceof CmsLuceneFieldConfiguration) {
1458                result.add((CmsLuceneFieldConfiguration)conf);
1459            }
1460        }
1461        Collections.sort(result);
1462        return Collections.unmodifiableList(result);
1463    }
1464
1465    /**
1466     * Returns the Solr search field configurations only.<p>
1467     *
1468     * @return the Solr search field configurations
1469     */
1470    public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() {
1471
1472        List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>();
1473        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1474            if (conf instanceof CmsSolrFieldConfiguration) {
1475                result.add((CmsSolrFieldConfiguration)conf);
1476            }
1477        }
1478        Collections.sort(result);
1479        return Collections.unmodifiableList(result);
1480    }
1481
1482    /**
1483     * Returns the force unlock mode during indexing.<p>
1484     *
1485     * @return the force unlock mode during indexing
1486     */
1487    public CmsSearchForceUnlockMode getForceunlock() {
1488
1489        return m_forceUnlockMode;
1490    }
1491
1492    /**
1493     * Returns the highlighter.<p>
1494     *
1495     * @return the highlighter
1496     */
1497    public I_CmsTermHighlighter getHighlighter() {
1498
1499        return m_highlighter;
1500    }
1501
1502    /**
1503     * Returns the Lucene search index configured with the given name.<p>
1504     * The index must exist, otherwise <code>null</code> is returned.
1505     *
1506     * @param indexName then name of the requested search index
1507     *
1508     * @return the Lucene search index configured with the given name
1509     */
1510    public I_CmsSearchIndex getIndex(String indexName) {
1511
1512        for (I_CmsSearchIndex index : m_indexes) {
1513            if (indexName.equalsIgnoreCase(index.getName())) {
1514                return index;
1515            }
1516        }
1517        return null;
1518    }
1519
1520    /**
1521     * Returns the seconds to wait for an index lock during an update operation.<p>
1522     *
1523     * @return the seconds to wait for an index lock during an update operation
1524     */
1525    public int getIndexLockMaxWaitSeconds() {
1526
1527        return m_indexLockMaxWaitSeconds;
1528    }
1529
1530    /**
1531     * Returns the names of all configured indexes.<p>
1532     *
1533     * @return list of names
1534     */
1535    public List<String> getIndexNames() {
1536
1537        List<String> indexNames = new ArrayList<String>();
1538        for (int i = 0, n = m_indexes.size(); i < n; i++) {
1539            indexNames.add((m_indexes.get(i)).getName());
1540        }
1541
1542        return indexNames;
1543    }
1544
1545    /**
1546     * Returns the Solr index configured with the given name.<p>
1547     * The index must exist, otherwise <code>null</code> is returned.
1548     *
1549     * @param indexName then name of the requested Solr index
1550     * @return the Solr index configured with the given name
1551     */
1552    public CmsSolrIndex getIndexSolr(String indexName) {
1553
1554        I_CmsSearchIndex index = getIndex(indexName);
1555        if (index instanceof CmsSolrIndex) {
1556            return (CmsSolrIndex)index;
1557        }
1558        return null;
1559    }
1560
1561    /**
1562     * Returns a search index source for a specified source name.<p>
1563     *
1564     * @param sourceName the name of the index source
1565     * @return a search index source
1566     */
1567    public CmsSearchIndexSource getIndexSource(String sourceName) {
1568
1569        return m_indexSources.get(sourceName);
1570    }
1571
1572    /**
1573     * Returns the max. excerpt length.<p>
1574     *
1575     * @return the max excerpt length
1576     */
1577    public int getMaxExcerptLength() {
1578
1579        return m_maxExcerptLength;
1580    }
1581
1582    /**
1583     * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p>
1584     *
1585     * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds)
1586     */
1587    public long getMaxIndexWaitTime() {
1588
1589        return m_maxIndexWaitTime;
1590    }
1591
1592    /**
1593     * Returns the maximum number of modifications before a commit in the search index is triggered.<p>
1594     *
1595     * @return the maximum number of modifications before a commit in the search index is triggered
1596     */
1597    public int getMaxModificationsBeforeCommit() {
1598
1599        return m_maxModificationsBeforeCommit;
1600    }
1601
1602    /**
1603     * Returns the update frequency of the offline indexer in milliseconds.<p>
1604     *
1605     * @return the update frequency of the offline indexer in milliseconds
1606     */
1607    public long getOfflineUpdateFrequency() {
1608
1609        return m_offlineUpdateFrequency;
1610    }
1611
1612    /**
1613     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1614     *
1615     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1616     */
1617    public List<I_CmsSearchIndex> getSearchIndexes() {
1618
1619        return Collections.unmodifiableList(m_indexes);
1620    }
1621
1622    /**
1623     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1624     *
1625     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1626     */
1627    public List<I_CmsSearchIndex> getSearchIndexesAll() {
1628
1629        return Collections.unmodifiableList(m_indexes);
1630    }
1631
1632    /**
1633     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1634     *
1635     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1636     */
1637    public List<CmsSolrIndex> getSearchIndexesSolr() {
1638
1639        List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>();
1640        for (I_CmsSearchIndex index : m_indexes) {
1641            if (index instanceof CmsSolrIndex) {
1642                indexes.add((CmsSolrIndex)index);
1643            }
1644        }
1645        return Collections.unmodifiableList(indexes);
1646    }
1647
1648    /**
1649     * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
1650     *
1651     * @return an unmodifiable view (read-only) of the SearchIndexSources Map
1652     */
1653    public Map<String, CmsSearchIndexSource> getSearchIndexSources() {
1654
1655        return Collections.unmodifiableMap(m_indexSources);
1656    }
1657
1658    /**
1659     * Return singleton instance of the OpenCms spellchecker.<p>
1660     *
1661     * @return instance of CmsSolrSpellchecker.
1662     */
1663    public CmsSolrSpellchecker getSolrDictionary() {
1664
1665        // get the core container that contains one core for each configured index
1666        if (m_coreContainer == null) {
1667            m_coreContainer = createCoreContainer();
1668        }
1669        return CmsSolrSpellchecker.getInstance(m_coreContainer);
1670    }
1671
1672    /**
1673     * Returns the Solr configuration.<p>
1674     *
1675     * @return the Solr configuration
1676     */
1677    public CmsSolrConfiguration getSolrServerConfiguration() {
1678
1679        return m_solrConfig;
1680    }
1681
1682    /**
1683     * Returns the timeout to abandon threads indexing a resource.<p>
1684     *
1685     * @return the timeout to abandon threads indexing a resource
1686     */
1687    public long getTimeout() {
1688
1689        return m_timeout;
1690    }
1691
1692    /**
1693     * Initializes the search manager.<p>
1694     *
1695     * @param cms the cms object
1696     *
1697     * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
1698     */
1699    public void initialize(CmsObject cms) throws CmsRoleViolationException {
1700
1701        OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER);
1702        try {
1703            // store the Admin cms to index Cms resources
1704            m_adminCms = OpenCms.initCmsObject(cms);
1705        } catch (CmsException e) {
1706            // this should never happen
1707            LOG.error(e.getLocalizedMessage(), e);
1708        }
1709        // make sure the site root is the root site
1710        m_adminCms.getRequestContext().setSiteRoot("/");
1711
1712        // create the extraction result cache
1713        m_extractionResultCache = new CmsExtractionResultCache(
1714            OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()),
1715            "/extractCache");
1716        initializeFieldConfigurations();
1717        initializeIndexes();
1718        initOfflineIndexes();
1719
1720        // register this object as event listener
1721        OpenCms.addCmsEventListener(
1722            this,
1723            new int[] {
1724                I_CmsEventListener.EVENT_CLEAR_CACHES,
1725                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
1726                I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES,
1727                I_CmsEventListener.EVENT_REINDEX_OFFLINE,
1728                I_CmsEventListener.EVENT_REINDEX_ONLINE});
1729    }
1730
1731    /**
1732     * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations.
1733     */
1734    public void initializeFieldConfigurations() {
1735
1736        for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) {
1737            config.init();
1738        }
1739
1740    }
1741
1742    /**
1743     * Initializes all configured document types, index sources and search indexes.<p>
1744     *
1745     * This methods needs to be called if after a change in the index configuration has been made.
1746     */
1747    public void initializeIndexes() {
1748
1749        initAvailableDocumentTypes();
1750        initIndexSources();
1751        initSearchIndexes();
1752    }
1753
1754    /**
1755     * Initialize the offline index handler, require after an offline index has been added.<p>
1756     */
1757    public void initOfflineIndexes() {
1758
1759        // check which indexes are configured as offline indexes
1760        List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>();
1761        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
1762        while (i.hasNext()) {
1763            I_CmsSearchIndex index = i.next();
1764            if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
1765                // this is an offline index
1766                offlineIndexes.add(index);
1767            }
1768        }
1769        m_offlineIndexes = offlineIndexes;
1770        m_offlineHandler.initialize();
1771
1772    }
1773
1774    /**
1775     * Initializes the spell check index.<p>
1776     *
1777     * @param adminCms the ROOT_ADMIN cms context
1778     */
1779    public void initSpellcheckIndex(CmsObject adminCms) {
1780
1781        if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) {
1782            final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary();
1783            if (spellchecker != null) {
1784
1785                Runnable initRunner = new Runnable() {
1786
1787                    public void run() {
1788
1789                        try {
1790                            spellchecker.parseAndAddDictionaries(adminCms);
1791                        } catch (CmsRoleViolationException e) {
1792                            LOG.error(e.getLocalizedMessage(), e);
1793                        }
1794                    }
1795                };
1796                new Thread(initRunner).start();
1797            }
1798        }
1799    }
1800
1801    /**
1802     * Returns if the offline indexing is paused.<p>
1803     *
1804     * @return <code>true</code> if the offline indexing is paused
1805     */
1806    public boolean isOfflineIndexingPaused() {
1807
1808        return m_offlineUpdateFrequency == Long.MAX_VALUE;
1809    }
1810
1811    /**
1812     * Updates the indexes from as a scheduled job.<p>
1813     *
1814     * @param cms the OpenCms user context to use when reading resources from the VFS
1815     * @param parameters the parameters for the scheduled job
1816     *
1817     * @throws Exception if something goes wrong
1818     *
1819     * @return the String to write in the scheduler log
1820     *
1821     * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map)
1822     */
1823    public String launch(CmsObject cms, Map<String, String> parameters) throws Exception {
1824
1825        CmsSearchManager manager = OpenCms.getSearchManager();
1826
1827        I_CmsReport report = null;
1828        boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue();
1829
1830        if (writeLog) {
1831            report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
1832        }
1833
1834        List<String> updateList = null;
1835        String indexList = parameters.get(JOB_PARAM_INDEXLIST);
1836        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
1837            // index list has been provided as job parameter
1838            updateList = new ArrayList<String>();
1839            String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|');
1840            for (int i = 0; i < indexNames.length; i++) {
1841                // check if the index actually exists
1842                if (manager.getIndex(indexNames[i]) != null) {
1843                    updateList.add(indexNames[i]);
1844                } else {
1845                    if (LOG.isWarnEnabled()) {
1846                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i]));
1847                    }
1848                }
1849            }
1850        }
1851
1852        long startTime = System.currentTimeMillis();
1853
1854        if (updateList == null) {
1855            // all indexes need to be updated
1856            manager.rebuildAllIndexes(report);
1857        } else {
1858            // rebuild only the selected indexes
1859            manager.rebuildIndexes(updateList, report);
1860        }
1861
1862        long runTime = System.currentTimeMillis() - startTime;
1863
1864        String finishMessage = Messages.get().getBundle().key(
1865            Messages.LOG_REBUILD_INDEXES_FINISHED_1,
1866            CmsStringUtil.formatRuntime(runTime));
1867
1868        if (LOG.isInfoEnabled()) {
1869            LOG.info(finishMessage);
1870        }
1871        return finishMessage;
1872    }
1873
1874    /**
1875     * Pauses the offline indexing.<p>
1876     * May take some time, because the indexes are updated first.<p>
1877     */
1878    public void pauseOfflineIndexing() {
1879
1880        if (m_offlineUpdateFrequency != Long.MAX_VALUE) {
1881            m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency;
1882            m_offlineUpdateFrequency = Long.MAX_VALUE;
1883            updateOfflineIndexes(0);
1884        }
1885    }
1886
1887    /**
1888     * Rebuilds (if required creates) all configured indexes.<p>
1889     *
1890     * @param report the report object to write messages (or <code>null</code>)
1891     *
1892     * @throws CmsException if something goes wrong
1893     */
1894    public void rebuildAllIndexes(I_CmsReport report) throws CmsException {
1895
1896        try {
1897            SEARCH_MANAGER_LOCK.lock();
1898
1899            CmsMessageContainer container = null;
1900            for (int i = 0, n = m_indexes.size(); i < n; i++) {
1901                // iterate all configured search indexes
1902                I_CmsSearchIndex searchIndex = m_indexes.get(i);
1903                try {
1904                    // update the index
1905                    updateIndex(searchIndex, report, null);
1906                } catch (CmsException e) {
1907                    container = new CmsMessageContainer(
1908                        Messages.get(),
1909                        Messages.ERR_INDEX_REBUILD_ALL_1,
1910                        new Object[] {searchIndex.getName()});
1911                    LOG.error(
1912                        Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()),
1913                        e);
1914                }
1915            }
1916            // clean up the extraction result cache
1917            cleanExtractionCache();
1918            if (container != null) {
1919                // throw stored exception
1920                throw new CmsSearchException(container);
1921            }
1922        } finally {
1923            SEARCH_MANAGER_LOCK.unlock();
1924        }
1925    }
1926
1927    /**
1928     * Rebuilds (if required creates) the index with the given name.<p>
1929     *
1930     * @param indexName the name of the index to rebuild
1931     * @param report the report object to write messages (or <code>null</code>)
1932     *
1933     * @throws CmsException if something goes wrong
1934     */
1935    public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException {
1936
1937        try {
1938            SEARCH_MANAGER_LOCK.lock();
1939            // get the search index by name
1940            I_CmsSearchIndex index = getIndex(indexName);
1941            // update the index
1942            updateIndex(index, report, null);
1943            // clean up the extraction result cache
1944            cleanExtractionCache();
1945        } finally {
1946            SEARCH_MANAGER_LOCK.unlock();
1947        }
1948    }
1949
1950    /**
1951     * Rebuilds (if required creates) the List of indexes with the given name.<p>
1952     *
1953     * @param indexNames the names (String) of the index to rebuild
1954     * @param report the report object to write messages (or <code>null</code>)
1955     *
1956     * @throws CmsException if something goes wrong
1957     */
1958    public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException {
1959
1960        try {
1961            SEARCH_MANAGER_LOCK.lock();
1962            Iterator<String> i = indexNames.iterator();
1963            while (i.hasNext()) {
1964                String indexName = i.next();
1965                // get the search index by name
1966                I_CmsSearchIndex index = getIndex(indexName);
1967                if (index != null) {
1968                    // update the index
1969                    updateIndex(index, report, null);
1970                } else {
1971                    if (LOG.isWarnEnabled()) {
1972                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1973                    }
1974                }
1975            }
1976            // clean up the extraction result cache
1977            cleanExtractionCache();
1978        } finally {
1979            SEARCH_MANAGER_LOCK.unlock();
1980        }
1981    }
1982
1983    /**
1984     * Registers a new Solr core for the given index.<p>
1985     *
1986     * @param index the index to register a new Solr core for
1987     *
1988     * @throws CmsConfigurationException if no Solr server is configured
1989     */
1990    @SuppressWarnings("resource")
1991    public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException {
1992
1993        if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) {
1994            // No solr server configured
1995            throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0));
1996        }
1997
1998        if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present.
1999            index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build());
2000        } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present.
2001            // HTTP Server configured
2002            // TODO Implement multi core support for HTTP server
2003            // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml
2004            index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build());
2005        } else { // Default to the embedded Solr Server
2006
2007            // get the core container that contains one core for each configured index
2008            if (m_coreContainer == null) {
2009                m_coreContainer = createCoreContainer();
2010            }
2011
2012            // unload the existing core if it exists to avoid problems with forced unlock.
2013            if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) {
2014                m_coreContainer.unload(index.getCoreName(), false, false, true);
2015            }
2016            // ensure that all locks on the index are gone
2017            ensureIndexIsUnlocked(index.getPath());
2018
2019            // load the core to the container
2020            File dataDir = new File(index.getPath());
2021            if (!dataDir.exists()) {
2022                dataDir.mkdirs();
2023                if (CmsLog.INIT.isInfoEnabled()) {
2024                    CmsLog.INIT.info(
2025                        Messages.get().getBundle().key(
2026                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2027                            index.getName(),
2028                            index.getPath()));
2029                }
2030            }
2031            File instanceDir = new File(
2032                m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName());
2033            if (!instanceDir.exists()) {
2034                instanceDir.mkdirs();
2035                if (CmsLog.INIT.isInfoEnabled()) {
2036                    CmsLog.INIT.info(
2037                        Messages.get().getBundle().key(
2038                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2039                            index.getName(),
2040                            index.getPath()));
2041                }
2042            }
2043
2044            // create the core
2045            // TODO: suboptimal - forces always the same schema
2046            SolrCore core = null;
2047            try {
2048                // creation includes registration.
2049                // TODO: this was the old code: core = m_coreContainer.create(descriptor, false);
2050                Map<String, String> properties = new HashMap<String, String>(3);
2051                properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath());
2052                properties.put(CoreDescriptor.CORE_CONFIGSET, "default");
2053                core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false);
2054            } catch (NullPointerException e) {
2055                if (core != null) {
2056                    core.close();
2057                }
2058                throw new CmsConfigurationException(
2059                    Messages.get().container(
2060                        Messages.ERR_SOLR_SERVER_NOT_CREATED_3,
2061                        index.getName() + " (" + index.getCoreName() + ")",
2062                        index.getPath(),
2063                        m_solrConfig.getSolrConfigFile().getAbsolutePath()),
2064                    e);
2065            }
2066
2067            if (index.isNoSolrServerSet()) {
2068                index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName()));
2069            }
2070            if (CmsLog.INIT.isInfoEnabled()) {
2071                CmsLog.INIT.info(
2072                    Messages.get().getBundle().key(
2073                        Messages.INIT_SOLR_SERVER_CREATED_1,
2074                        index.getName() + " (" + index.getCoreName() + ")"));
2075            }
2076        }
2077    }
2078
2079    /**
2080     * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p>
2081     *
2082     * @param fieldConfiguration the field configuration to remove from the configuration
2083     *
2084     * @return true if remove was successful, false if preconditions for removal are ok but the given
2085     *         field configuration was unknown to the manager.
2086     *
2087     * @throws CmsIllegalStateException if the given field configuration is still used by at least one
2088     *         <code>{@link I_CmsSearchIndex}</code>.
2089     *
2090     */
2091    public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration)
2092    throws CmsIllegalStateException {
2093
2094        // never remove the standard field configuration
2095        if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) {
2096            throw new CmsIllegalStateException(
2097                Messages.get().container(
2098                    Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
2099                    fieldConfiguration.getName()));
2100        }
2101        // validation if removal will be granted
2102        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2103        I_CmsSearchIndex idx;
2104        // the list for collecting indexes that use the given field configuration
2105        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2106        I_CmsSearchFieldConfiguration refFieldConfig;
2107        while (itIndexes.hasNext()) {
2108            idx = itIndexes.next();
2109            refFieldConfig = idx.getFieldConfiguration();
2110            if (refFieldConfig.equals(fieldConfiguration)) {
2111                referrers.add(idx);
2112            }
2113        }
2114        if (referrers.size() > 0) {
2115            throw new CmsIllegalStateException(
2116                Messages.get().container(
2117                    Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
2118                    fieldConfiguration.getName(),
2119                    referrers.toString()));
2120        }
2121
2122        // remove operation (no exception)
2123        return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null;
2124
2125    }
2126
2127    /**
2128     * Removes a search field from the field configuration.<p>
2129     *
2130     * @param fieldConfiguration the field configuration
2131     * @param field field to remove from the field configuration
2132     *
2133     * @return true if remove was successful, false if preconditions for removal are ok but the given
2134     *         field was unknown.
2135     */
2136    public boolean removeSearchFieldConfigurationField(
2137        I_CmsSearchFieldConfiguration fieldConfiguration,
2138        CmsSearchField field) {
2139
2140        if (LOG.isInfoEnabled()) {
2141            LOG.info(
2142                Messages.get().getBundle().key(
2143                    Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
2144                    field.getName(),
2145                    fieldConfiguration.getName()));
2146        }
2147
2148        return fieldConfiguration.getFields().remove(field);
2149    }
2150
2151    /**
2152     * Removes a search field mapping from the given field.<p>
2153     *
2154     * @param field the field
2155     * @param mapping mapping to remove from the field
2156     *
2157     * @return true if remove was successful, false if preconditions for removal are ok but the given
2158     *         mapping was unknown.
2159     *
2160     * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
2161     */
2162    public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping)
2163    throws CmsIllegalStateException {
2164
2165        if (field.getMappings().size() < 2) {
2166            throw new CmsIllegalStateException(
2167                Messages.get().container(
2168                    Messages.ERR_FIELD_MAPPING_DELETE_2,
2169                    mapping.getType().toString(),
2170                    field.getName()));
2171        } else {
2172
2173            if (LOG.isInfoEnabled()) {
2174                LOG.info(
2175                    Messages.get().getBundle().key(
2176                        Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
2177                        mapping.toString(),
2178                        field.getName()));
2179            }
2180            return field.getMappings().remove(mapping);
2181        }
2182    }
2183
2184    /**
2185     * Removes a search index from the configuration.<p>
2186     *
2187     * @param searchIndex the search index to remove
2188     */
2189    public void removeSearchIndex(I_CmsSearchIndex searchIndex) {
2190
2191        // shut down index to remove potential config files of Solr indexes
2192        searchIndex.shutDown();
2193        if (searchIndex instanceof CmsSolrIndex) {
2194            CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex;
2195            m_coreContainer.unload(solrIndex.getCoreName(), true, true, true);
2196        }
2197        m_indexes.remove(searchIndex);
2198        initOfflineIndexes();
2199
2200        if (LOG.isInfoEnabled()) {
2201            LOG.info(
2202                Messages.get().getBundle().key(
2203                    Messages.LOG_REMOVE_SEARCH_INDEX_2,
2204                    searchIndex.getName(),
2205                    searchIndex.getProject()));
2206        }
2207    }
2208
2209    /**
2210     * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
2211     *
2212     * @param indexNames the names of the index to remove
2213     */
2214    public void removeSearchIndexes(List<String> indexNames) {
2215
2216        Iterator<String> i = indexNames.iterator();
2217        while (i.hasNext()) {
2218            String indexName = i.next();
2219            // get the search index by name
2220            I_CmsSearchIndex index = getIndex(indexName);
2221            if (index != null) {
2222                // remove the index
2223                removeSearchIndex(index);
2224            } else {
2225                if (LOG.isWarnEnabled()) {
2226                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
2227                }
2228            }
2229        }
2230    }
2231
2232    /**
2233     * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
2234     *
2235     * @param indexsource the indexsource to remove from the configuration
2236     *
2237     * @return true if remove was successful, false if preconditions for removal are ok but the given
2238     *         searchindex was unknown to the manager.
2239     *
2240     * @throws CmsIllegalStateException if the given indexsource is still used by at least one
2241     *         <code>{@link I_CmsSearchIndex}</code>.
2242     *
2243     */
2244    public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException {
2245
2246        // validation if removal will be granted
2247        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2248        I_CmsSearchIndex idx;
2249        // the list for collecting indexes that use the given index source
2250        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2251        // the current list of referred index sources of the iterated index
2252        List<CmsSearchIndexSource> refsources;
2253        while (itIndexes.hasNext()) {
2254            idx = itIndexes.next();
2255            refsources = idx.getSources();
2256            if (refsources != null) {
2257                if (refsources.contains(indexsource)) {
2258                    referrers.add(idx);
2259                }
2260            }
2261        }
2262        if (referrers.size() > 0) {
2263            throw new CmsIllegalStateException(
2264                Messages.get().container(
2265                    Messages.ERR_INDEX_SOURCE_DELETE_2,
2266                    indexsource.getName(),
2267                    referrers.toString()));
2268        }
2269
2270        // remove operation (no exception)
2271        return m_indexSources.remove(indexsource.getName()) != null;
2272
2273    }
2274
2275    /**
2276     * Resumes offline indexing if it was paused.<p>
2277     */
2278    public void resumeOfflineIndexing() {
2279
2280        if (m_offlineUpdateFrequency == Long.MAX_VALUE) {
2281            setOfflineUpdateFrequency(
2282                m_configuredOfflineIndexingFrequency > 0
2283                ? m_configuredOfflineIndexingFrequency
2284                : DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2285        }
2286    }
2287
2288    /**
2289     * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
2290     *
2291     * @param value the name of the directory below WEB-INF/ where the search indexes are stored
2292     */
2293    public void setDirectory(String value) {
2294
2295        m_path = value;
2296    }
2297
2298    /**
2299     * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
2300     *
2301     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2302     */
2303    public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
2304
2305        m_extractionCacheMaxAge = extractionCacheMaxAge;
2306    }
2307
2308    /**
2309     * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
2310     *
2311     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2312     */
2313    public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
2314
2315        try {
2316            setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge));
2317        } catch (NumberFormatException e) {
2318            LOG.error(
2319                Messages.get().getBundle().key(
2320                    Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
2321                    extractionCacheMaxAge,
2322                    new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)),
2323                e);
2324            setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
2325        }
2326    }
2327
2328    /**
2329     * Sets the unlock mode during indexing.<p>
2330     *
2331     * @param value the value
2332     */
2333    public void setForceunlock(String value) {
2334
2335        m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
2336    }
2337
2338    /**
2339     * Sets the highlighter.<p>
2340     *
2341     * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
2342     *
2343     * @param highlighter the package/class name of the highlighter
2344     */
2345    public void setHighlighter(String highlighter) {
2346
2347        try {
2348            m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance();
2349        } catch (Exception e) {
2350            m_highlighter = null;
2351            LOG.error(e.getLocalizedMessage(), e);
2352        }
2353    }
2354
2355    /**
2356     * Sets the seconds to wait for an index lock during an update operation.<p>
2357     *
2358     * @param value the seconds to wait for an index lock during an update operation
2359     */
2360    public void setIndexLockMaxWaitSeconds(int value) {
2361
2362        m_indexLockMaxWaitSeconds = value;
2363    }
2364
2365    /**
2366     * Sets the max. excerpt length.<p>
2367     *
2368     * @param maxExcerptLength the max. excerpt length to set
2369     */
2370    public void setMaxExcerptLength(int maxExcerptLength) {
2371
2372        m_maxExcerptLength = maxExcerptLength;
2373    }
2374
2375    /**
2376     * Sets the max. excerpt length as a String.<p>
2377     *
2378     * @param maxExcerptLength the max. excerpt length to set
2379     */
2380    public void setMaxExcerptLength(String maxExcerptLength) {
2381
2382        try {
2383            setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
2384        } catch (Exception e) {
2385            LOG.error(
2386                Messages.get().getBundle().key(
2387                    Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
2388                    maxExcerptLength,
2389                    new Integer(DEFAULT_EXCERPT_LENGTH)),
2390                e);
2391            setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
2392        }
2393    }
2394
2395    /**
2396     * Sets the maximal wait time for offline index updates after edit operations.<p>
2397     *
2398     * @param maxIndexWaitTime  the maximal wait time to set in milliseconds
2399     */
2400    public void setMaxIndexWaitTime(long maxIndexWaitTime) {
2401
2402        m_maxIndexWaitTime = maxIndexWaitTime;
2403    }
2404
2405    /**
2406     * Sets the maximal wait time for offline index updates after edit operations.<p>
2407     *
2408     * @param maxIndexWaitTime the maximal wait time to set in milliseconds
2409     */
2410    public void setMaxIndexWaitTime(String maxIndexWaitTime) {
2411
2412        try {
2413            setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime));
2414        } catch (Exception e) {
2415            LOG.error(
2416                Messages.get().getBundle().key(
2417                    Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2,
2418                    maxIndexWaitTime,
2419                    new Long(DEFAULT_MAX_INDEX_WAITTIME)),
2420                e);
2421            setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME);
2422        }
2423    }
2424
2425    /**
2426     * Sets the maximum number of modifications before a commit in the search index is triggered.<p>
2427     *
2428     * @param maxModificationsBeforeCommit the maximum number of modifications to set
2429     */
2430    public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) {
2431
2432        m_maxModificationsBeforeCommit = maxModificationsBeforeCommit;
2433    }
2434
2435    /**
2436     * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p>
2437     *
2438     * @param value the maximum number of modifications to set
2439     */
2440    public void setMaxModificationsBeforeCommit(String value) {
2441
2442        try {
2443            setMaxModificationsBeforeCommit(Integer.parseInt(value));
2444        } catch (Exception e) {
2445            LOG.error(
2446                Messages.get().getBundle().key(
2447                    Messages.LOG_PARSE_MAXCOMMIT_FAILED_2,
2448                    value,
2449                    new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)),
2450                e);
2451            setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT);
2452        }
2453    }
2454
2455    /**
2456     * Sets the update frequency of the offline indexer in milliseconds.<p>
2457     *
2458     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2459     */
2460    public void setOfflineUpdateFrequency(long offlineUpdateFrequency) {
2461
2462        m_offlineUpdateFrequency = offlineUpdateFrequency;
2463        updateOfflineIndexes(0);
2464    }
2465
2466    /**
2467     * Sets the update frequency of the offline indexer in milliseconds.<p>
2468     *
2469     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2470     */
2471    public void setOfflineUpdateFrequency(String offlineUpdateFrequency) {
2472
2473        try {
2474            setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency));
2475        } catch (Exception e) {
2476            LOG.error(
2477                Messages.get().getBundle().key(
2478                    Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2,
2479                    offlineUpdateFrequency,
2480                    new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)),
2481                e);
2482            setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2483        }
2484    }
2485
2486    /**
2487     * Sets the Solr configuration.<p>
2488     *
2489     * @param config the Solr configuration
2490     */
2491    public void setSolrServerConfiguration(CmsSolrConfiguration config) {
2492
2493        m_solrConfig = config;
2494    }
2495
2496    /**
2497     * Sets the timeout to abandon threads indexing a resource.<p>
2498     *
2499     * @param value the timeout in milliseconds
2500     */
2501    public void setTimeout(long value) {
2502
2503        m_timeout = value;
2504    }
2505
2506    /**
2507     * Sets the timeout to abandon threads indexing a resource as a String.<p>
2508     *
2509     * @param value the timeout in milliseconds
2510     */
2511    public void setTimeout(String value) {
2512
2513        try {
2514            setTimeout(Long.parseLong(value));
2515        } catch (Exception e) {
2516            LOG.error(
2517                Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)),
2518                e);
2519            setTimeout(DEFAULT_TIMEOUT);
2520        }
2521    }
2522
2523    /**
2524     * Shuts down the search manager.<p>
2525     *
2526     * This will cause all search indices to be shut down.<p>
2527     */
2528    public void shutDown() {
2529
2530        if (m_offlineIndexThread != null) {
2531            m_offlineIndexThread.shutDown();
2532        }
2533
2534        if (m_offlineHandler != null) {
2535            OpenCms.removeCmsEventListener(m_offlineHandler);
2536        }
2537
2538        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
2539        while (i.hasNext()) {
2540            I_CmsSearchIndex index = i.next();
2541            index.shutDown();
2542            index = null;
2543        }
2544        m_indexes.clear();
2545
2546        shutDownSolrContainer();
2547
2548        if (CmsLog.INIT.isInfoEnabled()) {
2549            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0));
2550        }
2551    }
2552
2553    /**
2554     * Updates all offline indexes.<p>
2555     *
2556     * Can be used to force an index update when it's not convenient to wait until the
2557     * offline update interval has eclipsed.<p>
2558     *
2559     * Since the offline indexes still need some time to update the new resources,
2560     * the method waits for at most the configurable <code>maxIndexWaitTime</code>
2561     * to ensure that updating is finished.
2562     *
2563     * @see #updateOfflineIndexes(long)
2564     *
2565     */
2566    public void updateOfflineIndexes() {
2567
2568        updateOfflineIndexes(getMaxIndexWaitTime());
2569    }
2570
2571    /**
2572     * Updates all offline indexes.<p>
2573     *
2574     * Can be used to force an index update when it's not convenient to wait until the
2575     * offline update interval has eclipsed.<p>
2576     *
2577     * Since the offline index will still need some time to update the new resources even if it runs directly,
2578     * a wait time of 2500 or so should be given in order to make sure the index finished updating.
2579     *
2580     * @param waitTime milliseconds to wait after the offline update index was notified of the changes
2581     */
2582    public void updateOfflineIndexes(long waitTime) {
2583
2584        if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
2585            // notify existing thread of update frequency change
2586            if (LOG.isDebugEnabled()) {
2587                LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0));
2588            }
2589            m_offlineIndexThread.interrupt();
2590            if (waitTime > 0) {
2591                m_offlineIndexThread.getWaitHandle().enter(waitTime);
2592            }
2593        }
2594    }
2595
2596    /**
2597     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2598     * We take transitive dependencies into account and handle cyclic dependencies correctly as well.
2599     *
2600     * @param adminCms an OpenCms user context with Admin permissions
2601     * @param updateResources the resources to be re-indexed
2602     *
2603     * @return the updated list of resource to re-index
2604     */
2605    protected List<CmsPublishedResource> addAdditionallyAffectedResources(
2606        CmsObject adminCms,
2607        List<CmsPublishedResource> updateResources) {
2608
2609        if (updateResources.size() > 0) {
2610            Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources);
2611            Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet;
2612            Collection<CmsPublishedResource> additionalResources = Collections.emptySet();
2613            do {
2614                additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck);
2615                additionalResources.addAll(
2616                    addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck));
2617                updateResources.addAll(additionalResources);
2618                updateResourceSet.addAll(additionalResources);
2619                resourcesToCheck = additionalResources;
2620            } while (resourcesToCheck.size() > 0);
2621        }
2622        return updateResources;
2623    }
2624
2625    /**
2626     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2627     *
2628     * @param adminCms an OpenCms user context with Admin permissions
2629     * @param updateResources the resources to be re-indexed
2630     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2631     *
2632     * @return the list of resources that need to be additionally re-index
2633     */
2634    protected Collection<CmsPublishedResource> addIndexContentRelatedResources(
2635        CmsObject adminCms,
2636        Collection<CmsPublishedResource> updateResources,
2637        Collection<CmsPublishedResource> updateResourcesToCheck) {
2638
2639        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2640        for (CmsPublishedResource checkedRes : updateResourcesToCheck) {
2641            try {
2642                CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId());
2643                filter = filter.filterType(CmsRelationType.INDEX_CONTENT);
2644                List<CmsRelation> relations = adminCms.readRelations(filter);
2645                for (CmsRelation relation : relations) {
2646                    CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2647                    CmsPublishedResource additionalPubRes = new CmsPublishedResource(res);
2648                    if (!updateResources.contains(additionalPubRes)) {
2649                        additionalUpdateResources.add(additionalPubRes);
2650                    }
2651                }
2652            } catch (CmsException e) {
2653                LOG.error(e.getLocalizedMessage(), e);
2654            }
2655        }
2656        return additionalUpdateResources;
2657    }
2658
2659    /**
2660     * Cleans up the extraction result cache.<p>
2661     */
2662    protected void cleanExtractionCache() {
2663
2664        // clean up the extraction result cache
2665        m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
2666    }
2667
2668    /**
2669     * Collects the related containerpages to the resources that have been published.<p>
2670     *
2671     * @param adminCms an OpenCms user context with Admin permissions
2672     * @param updateResources the resources to be re-indexed
2673     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2674     *
2675     * @return the list of resources that need to be additionally re-index
2676     */
2677    protected Collection<CmsPublishedResource> findRelatedContainerPages(
2678        CmsObject adminCms,
2679        Collection<CmsPublishedResource> updateResources,
2680        Collection<CmsPublishedResource> updateResourcesToCheck) {
2681
2682        CmsResourceManager resMan = OpenCms.getResourceManager();
2683        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2684
2685        Set<CmsResource> containerPages = new HashSet<CmsResource>();
2686        int containerPageTypeId = -1;
2687        try {
2688            containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId();
2689        } catch (CmsLoaderException e) {
2690            // will happen during setup, when container page type is not available yet
2691            LOG.info(e.getLocalizedMessage(), e);
2692        }
2693        if (containerPageTypeId != -1) {
2694            for (CmsPublishedResource pubRes : updateResourcesToCheck) {
2695                try {
2696                    if (resMan.getResourceType(pubRes.getType()) instanceof CmsResourceTypeXmlContent) {
2697                        if (!isGroup(pubRes.getType())) {
2698                            CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(
2699                                pubRes.getStructureId()).filterStrong();
2700                            List<CmsRelation> relations = adminCms.readRelations(filter);
2701                            for (CmsRelation relation : relations) {
2702                                CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2703                                if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2704                                    containerPages.add(res);
2705                                    if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2706                                        adminCms,
2707                                        adminCms.getSitePath(res))) {
2708                                        addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2709                                    }
2710                                }
2711                            }
2712                        }
2713                    }
2714                    if (containerPageTypeId == pubRes.getType()) {
2715                        addDetailContent(
2716                            adminCms,
2717                            containerPages,
2718                            adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath()));
2719                    }
2720                } catch (CmsException e) {
2721                    LOG.error(e.getLocalizedMessage(), e);
2722                }
2723            }
2724            // add all found container pages as published resource objects to the list
2725            for (CmsResource page : containerPages) {
2726                CmsPublishedResource pubCont = new CmsPublishedResource(page);
2727                if (!updateResources.contains(pubCont)) {
2728                    // ensure container page is added only once
2729                    additionalUpdateResources.add(pubCont);
2730                }
2731            }
2732        }
2733        return additionalUpdateResources;
2734    }
2735
2736    /**
2737     * Returns the set of names of all configured document types.<p>
2738     *
2739     * @return the set of names of all configured document types
2740     */
2741    protected List<String> getDocumentTypes() {
2742
2743        return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet()));
2744    }
2745
2746    /**
2747     * Returns the a offline project used for offline indexing.<p>
2748     *
2749     * @return the offline project if available
2750     */
2751    protected CmsProject getOfflineIndexProject() {
2752
2753        CmsProject result = null;
2754        for (I_CmsSearchIndex index : m_offlineIndexes) {
2755            try {
2756                result = m_adminCms.readProject(index.getProject());
2757
2758                if (!result.isOnlineProject()) {
2759                    break;
2760                }
2761            } catch (Exception e) {
2762                // may be a missconfigured index, ignore
2763                LOG.error(e.getLocalizedMessage(), e);
2764            }
2765        }
2766        return result;
2767    }
2768
2769    /**
2770     * Returns a new thread manager for the indexing threads.<p>
2771     *
2772     * @return a new thread manager for the indexing threads
2773     */
2774    protected CmsIndexingThreadManager getThreadManager() {
2775
2776        return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit);
2777    }
2778
2779    /**
2780     * Initializes the available Cms resource types to be indexed.<p>
2781     *
2782     * A map stores document factories keyed by a string representing
2783     * a colon separated list of Cms resource types and/or mimetypes.<p>
2784     *
2785     * The keys of this map are used to trigger a document factory to convert
2786     * a Cms resource into a Lucene index document.<p>
2787     *
2788     * A document factory is a class implementing the interface
2789     * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
2790     */
2791    protected void initAvailableDocumentTypes() {
2792
2793        CmsSearchDocumentType documenttype = null;
2794        String className = null;
2795        String name = null;
2796        I_CmsDocumentFactory documentFactory = null;
2797        List<String> resourceTypes = null;
2798        List<String> mimeTypes = null;
2799        Class<?> c = null;
2800
2801        m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>();
2802
2803        for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
2804
2805            documenttype = m_documentTypeConfigs.get(i);
2806            name = documenttype.getName();
2807
2808            try {
2809                className = documenttype.getClassName();
2810                resourceTypes = documenttype.getResourceTypes();
2811                mimeTypes = documenttype.getMimeTypes();
2812
2813                if (name == null) {
2814                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0));
2815                }
2816                if (className == null) {
2817                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
2818                }
2819                if (resourceTypes.size() == 0) {
2820                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
2821                }
2822
2823                try {
2824                    c = Class.forName(className);
2825                    documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance(
2826                        new Object[] {name});
2827                } catch (ClassNotFoundException exc) {
2828                    throw new CmsIndexException(
2829                        Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className),
2830                        exc);
2831                } catch (Exception exc) {
2832                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc);
2833                }
2834
2835                if (documentFactory.isUsingCache()) {
2836                    // init cache if used by the factory
2837                    documentFactory.setCache(m_extractionResultCache);
2838                }
2839
2840                Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>();
2841                for (Iterator<String> keyIt = documentFactory.getDocumentKeys(
2842                    resourceTypes,
2843                    mimeTypes).iterator(); keyIt.hasNext();) {
2844                    String key = keyIt.next();
2845                    matchingTypes.put(key, documentFactory);
2846                    m_extractionKeys.add(key);
2847                }
2848                m_documentTypes.put(name, matchingTypes);
2849
2850            } catch (CmsException e) {
2851                if (LOG.isWarnEnabled()) {
2852                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e);
2853                }
2854            }
2855        }
2856    }
2857
2858    /**
2859     * Initializes the index sources.
2860     */
2861    protected void initIndexSources() {
2862
2863        for (CmsSearchIndexSource source : m_indexSources.values()) {
2864            source.init();
2865        }
2866    }
2867
2868    /**
2869     * Initializes the configured search indexes.<p>
2870     *
2871     * This initializes also the list of Cms resources types
2872     * to be indexed by an index source.<p>
2873     */
2874    protected void initSearchIndexes() {
2875
2876        I_CmsSearchIndex index = null;
2877        for (int i = 0, n = m_indexes.size(); i < n; i++) {
2878            index = m_indexes.get(i);
2879            // reset disabled flag
2880            index.setEnabled(true);
2881            // check if the index has been configured correctly
2882            if (index.checkConfiguration(m_adminCms)) {
2883                // the index is configured correctly
2884                try {
2885                    index.initialize();
2886                } catch (Exception e) {
2887                    if (CmsLog.INIT.isWarnEnabled()) {
2888                        // in this case the index will be disabled
2889                        CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e);
2890                    }
2891                }
2892            }
2893            // output a log message if the index was successfully configured or not
2894            if (CmsLog.INIT.isInfoEnabled()) {
2895                if (index.isEnabled()) {
2896                    CmsLog.INIT.info(
2897                        Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject()));
2898                } else {
2899                    CmsLog.INIT.warn(
2900                        Messages.get().getBundle().key(
2901                            Messages.INIT_INDEX_NOT_CONFIGURED_2,
2902                            index,
2903                            index.getProject()));
2904                }
2905            }
2906        }
2907    }
2908
2909    /**
2910     * Checks, if the index should be rebuilt/updated at all by the search manager.
2911     * @param index the index to check.
2912     * @return a flag, indicating if the index should be rebuilt/updated at all.
2913     */
2914    protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) {
2915
2916        if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) {
2917            LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName()));
2918            return false;
2919        } else {
2920            return true;
2921        }
2922
2923    }
2924
2925    /**
2926     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
2927     * after resources have been published.<p>
2928     *
2929     * @param adminCms an OpenCms user context with Admin permissions
2930     * @param publishHistoryId the history ID of the published project
2931     * @param report the report to write the output to
2932     */
2933    protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) {
2934
2935        int oldPriority = Thread.currentThread().getPriority();
2936        try {
2937            SEARCH_MANAGER_LOCK.lock();
2938            Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
2939            List<CmsPublishedResource> publishedResources;
2940            try {
2941                // read the list of all published resources
2942                publishedResources = adminCms.readPublishedResources(publishHistoryId);
2943            } catch (CmsException e) {
2944                LOG.error(
2945                    Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId),
2946                    e);
2947                return;
2948            }
2949            Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources);
2950            // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved
2951
2952            List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>();
2953            for (CmsPublishedResource res : publishedResources) {
2954                if (res.isFolder() || res.getState().isUnchanged()) {
2955                    // folders and unchanged resources don't need to be indexed after publish
2956                    continue;
2957                }
2958                if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) {
2959                    if (updateResources.contains(res)) {
2960                        // resource may have been added as a sibling of another resource
2961                        // in this case we make sure to use the value from the publish list because of the "deleted" flag
2962                        boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId())
2963                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION)
2964                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE);
2965                        // check it this is a moved resource with source / target info, in this case we need both entries
2966                        if (!hasMoved) {
2967                            // if the resource was moved, we must contain both entries
2968                            updateResources.remove(res);
2969                        }
2970                        // "equals()" implementation of published resource checks for id,
2971                        // so the removed value may have a different "deleted" or "modified" status value
2972                        updateResources.add(res);
2973                    } else {
2974                        // resource not yet contained in the list
2975                        updateResources.add(res);
2976                        // check for the siblings (not for deleted resources, these are already gone)
2977                        if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) {
2978                            // this resource has siblings
2979                            try {
2980                                // read siblings from the online project
2981                                List<CmsResource> siblings = adminCms.readSiblings(
2982                                    res.getRootPath(),
2983                                    CmsResourceFilter.ALL);
2984                                Iterator<CmsResource> itSib = siblings.iterator();
2985                                while (itSib.hasNext()) {
2986                                    // check all siblings
2987                                    CmsResource sibling = itSib.next();
2988                                    CmsPublishedResource sib = new CmsPublishedResource(sibling);
2989                                    if (!updateResources.contains(sib)) {
2990                                        // ensure sibling is added only once
2991                                        updateResources.add(sib);
2992                                    }
2993                                }
2994                            } catch (CmsException e) {
2995                                // ignore, just use the original resource
2996                                if (LOG.isWarnEnabled()) {
2997                                    LOG.warn(
2998                                        Messages.get().getBundle().key(
2999                                            Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
3000                                            res.getRootPath()),
3001                                        e);
3002                                }
3003                            }
3004                        }
3005                    }
3006                }
3007            }
3008
3009            addAdditionallyAffectedResources(adminCms, updateResources);
3010            updateAllIndexes(adminCms, updateResources, report);
3011        } finally {
3012            SEARCH_MANAGER_LOCK.unlock();
3013            Thread.currentThread().setPriority(oldPriority);
3014        }
3015    }
3016
3017    /**
3018     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p>
3019     *
3020     * @param adminCms an OpenCms user context with Admin permissions
3021     * @param updateResources the resources to update
3022     * @param report the report to write the output to
3023     */
3024    protected void updateAllIndexes(
3025        CmsObject adminCms,
3026        List<CmsPublishedResource> updateResources,
3027        I_CmsReport report) {
3028
3029        try {
3030            SEARCH_MANAGER_LOCK.lock();
3031            if (!updateResources.isEmpty()) {
3032                // sort the resource to update
3033                Collections.sort(updateResources);
3034                // only update the indexes if the list of remaining published resources is not empty
3035                Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
3036                while (i.hasNext()) {
3037                    I_CmsSearchIndex index = i.next();
3038                    if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) {
3039                        // only update indexes which have the rebuild mode set to "auto"
3040                        try {
3041                            updateIndex(index, report, updateResources);
3042                        } catch (CmsException e) {
3043                            LOG.error(
3044                                Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()),
3045                                e);
3046                        }
3047                    }
3048                }
3049            }
3050            // clean up the extraction result cache
3051            cleanExtractionCache();
3052        } finally {
3053            SEARCH_MANAGER_LOCK.unlock();
3054        }
3055
3056    }
3057
3058    /**
3059     * Updates (if required creates) the index with the given name.<p>
3060     *
3061     * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
3062     * incrementally updated for these resources only. If this List is <code>null</code> or empty,
3063     * the index will be fully rebuild.<p>
3064     *
3065     * @param index the index to update or rebuild
3066     * @param report the report to write output messages to
3067     * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3068     *
3069     * @throws CmsException if something goes wrong
3070     */
3071    protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex)
3072    throws CmsException {
3073
3074        if (shouldUpdateAtAll(index)) {
3075            try {
3076                SEARCH_MANAGER_LOCK.lock();
3077
3078                // copy the stored admin context for the indexing
3079                CmsObject cms = OpenCms.initCmsObject(m_adminCms);
3080                // make sure a report is available
3081                if (report == null) {
3082                    report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
3083                }
3084
3085                // check if the index has been configured correctly
3086                if (!index.checkConfiguration(cms)) {
3087                    // the index is disabled
3088                    return;
3089                }
3090
3091                // set site root and project for this index
3092                cms.getRequestContext().setSiteRoot("/");
3093                // switch to the index project
3094                cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3095
3096                if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
3097                    // rebuild the complete index
3098
3099                    updateIndexCompletely(cms, index, report);
3100                } else {
3101                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3102                }
3103            } finally {
3104                SEARCH_MANAGER_LOCK.unlock();
3105            }
3106        }
3107    }
3108
3109    /**
3110     * The method updates all OpenCms documents that are indexed.
3111     * @param cms the OpenCms user context to use for accessing the VFS
3112     * @param index the index to update
3113     * @param report the report to write output messages to
3114     * @throws CmsIndexException thrown if indexing fails for some reason
3115     */
3116    @SuppressWarnings("null")
3117    protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report)
3118    throws CmsIndexException {
3119
3120        // create a new thread manager for the indexing threads
3121        CmsIndexingThreadManager threadManager = getThreadManager();
3122
3123        boolean isOfflineIndex = false;
3124        if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
3125            // disable offline indexing while the complete index is rebuild
3126            isOfflineIndex = true;
3127            index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL);
3128            // re-initialize the offline indexes, this will disable this offline index
3129            initOfflineIndexes();
3130        }
3131
3132        I_CmsIndexWriter writer = null;
3133        try {
3134            // create a backup of the existing index
3135            CmsSearchIndex indexInternal = null;
3136            String backup = null;
3137            if (index instanceof CmsSearchIndex) {
3138                indexInternal = (CmsSearchIndex)index;
3139                backup = indexInternal.createIndexBackup();
3140                if (backup != null) {
3141                    indexInternal.indexSearcherOpen(backup);
3142                }
3143            }
3144
3145            // create a new index writer
3146            writer = index.getIndexWriter(report, true);
3147            if (writer instanceof I_CmsSolrIndexWriter) {
3148                try {
3149                    ((I_CmsSolrIndexWriter)writer).deleteAllDocuments();
3150                } catch (IOException e) {
3151                    LOG.error(e.getMessage(), e);
3152                }
3153            }
3154
3155            // output start information on the report
3156            report.println(
3157                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()),
3158                I_CmsReport.FORMAT_HEADLINE);
3159
3160            // iterate all configured index sources of this index
3161            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3162            while (sources.hasNext()) {
3163                // get the next index source
3164                CmsSearchIndexSource source = sources.next();
3165                // create the indexer
3166                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3167                // new index creation, use all resources from the index source
3168                indexer.rebuildIndex(writer, threadManager, source);
3169
3170                // wait for indexing threads to finish
3171                while (threadManager.isRunning()) {
3172                    try {
3173                        Thread.sleep(500);
3174                    } catch (InterruptedException e) {
3175                        // just continue with the loop after interruption
3176                        LOG.info(e.getLocalizedMessage(), e);
3177                    }
3178                }
3179
3180                // commit and optimize the index after each index source has been finished
3181                try {
3182                    writer.commit();
3183                } catch (IOException e) {
3184                    if (LOG.isWarnEnabled()) {
3185                        LOG.warn(
3186                            Messages.get().getBundle().key(
3187                                Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3188                                index.getName(),
3189                                index.getPath()),
3190                            e);
3191                    }
3192                }
3193                try {
3194                    writer.optimize();
3195                } catch (IOException e) {
3196                    if (LOG.isWarnEnabled()) {
3197                        LOG.warn(
3198                            Messages.get().getBundle().key(
3199                                Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2,
3200                                index.getName(),
3201                                index.getPath()),
3202                            e);
3203                    }
3204                }
3205            }
3206
3207            // we are sure here that indexInternal is not null
3208            if (backup != null) {
3209                // remove the backup after the files have been re-indexed
3210                indexInternal.indexSearcherClose();
3211                indexInternal.removeIndexBackup(backup);
3212            }
3213
3214            // output finish information on the report
3215            report.println(
3216                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()),
3217                I_CmsReport.FORMAT_HEADLINE);
3218
3219        } finally {
3220            if (writer != null) {
3221                try {
3222                    writer.close();
3223                } catch (IOException e) {
3224                    if (LOG.isWarnEnabled()) {
3225                        LOG.warn(
3226                            Messages.get().getBundle().key(
3227                                Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
3228                                index.getPath(),
3229                                index.getName()),
3230                            e);
3231                    }
3232                }
3233            }
3234            if (isOfflineIndex) {
3235                // reset the mode of the offline index
3236                index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE);
3237                // re-initialize the offline indexes, this will re-enable this index
3238                initOfflineIndexes();
3239            }
3240            // index has changed - initialize the index searcher instance
3241            index.onIndexChanged(true);
3242        }
3243
3244        // show information about indexing runtime
3245        threadManager.reportStatistics(report);
3246    }
3247
3248    /**
3249     * Incrementally updates the given index.<p>
3250     *
3251     * @param cms the OpenCms user context to use for accessing the VFS
3252     * @param index the index to update
3253     * @param report the report to write output messages to
3254     * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3255     *
3256     * @throws CmsException if something goes wrong
3257     */
3258    protected void updateIndexIncremental(
3259        CmsObject cms,
3260        I_CmsSearchIndex index,
3261        I_CmsReport report,
3262        List<CmsPublishedResource> resourcesToIndex)
3263    throws CmsException {
3264
3265        try {
3266            SEARCH_MANAGER_LOCK.lock();
3267
3268            // update the existing index
3269            List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>();
3270
3271            boolean hasResourcesToDelete = false;
3272            boolean hasResourcesToUpdate = false;
3273
3274            // iterate all configured index sources of this index
3275            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3276            while (sources.hasNext()) {
3277                // get the next index source
3278                CmsSearchIndexSource source = sources.next();
3279                // create the indexer
3280                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3281                // collect the resources to update
3282                CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex);
3283                if (!updateData.isEmpty()) {
3284                    // add the update collection to the internal pipeline
3285                    updateCollections.add(updateData);
3286                    hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete();
3287                    hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate();
3288                }
3289            }
3290
3291            // only start index modification if required
3292            if (hasResourcesToDelete || hasResourcesToUpdate) {
3293                // output start information on the report
3294                report.println(
3295                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()),
3296                    I_CmsReport.FORMAT_HEADLINE);
3297
3298                I_CmsIndexWriter writer = null;
3299                try {
3300                    // obtain an index writer that updates the current index
3301                    writer = index.getIndexWriter(report, false);
3302
3303                    if (hasResourcesToDelete) {
3304                        // delete the resource from the index
3305                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3306                        while (i.hasNext()) {
3307                            CmsSearchIndexUpdateData updateCollection = i.next();
3308                            if (updateCollection.hasResourcesToDelete()) {
3309                                updateCollection.getIndexer().deleteResources(
3310                                    writer,
3311                                    updateCollection.getResourcesToDelete());
3312                            }
3313                        }
3314                    }
3315
3316                    if (hasResourcesToUpdate) {
3317                        // create a new thread manager
3318                        CmsIndexingThreadManager threadManager = getThreadManager();
3319
3320                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3321                        while (i.hasNext()) {
3322                            CmsSearchIndexUpdateData updateCollection = i.next();
3323                            if (updateCollection.hasResourceToUpdate()) {
3324                                updateCollection.getIndexer().updateResources(
3325                                    writer,
3326                                    threadManager,
3327                                    updateCollection.getResourcesToUpdate());
3328                            }
3329                        }
3330
3331                        // wait for indexing threads to finish
3332                        while (threadManager.isRunning()) {
3333                            try {
3334                                Thread.sleep(500);
3335                            } catch (InterruptedException e) {
3336                                // just continue with the loop after interruption
3337                                LOG.info(e.getLocalizedMessage(), e);
3338                            }
3339                        }
3340                    }
3341                } finally {
3342                    // close the index writer
3343                    if (writer != null) {
3344                        try {
3345                            writer.commit();
3346                        } catch (IOException e) {
3347                            LOG.error(
3348                                Messages.get().getBundle().key(
3349                                    Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3350                                    index.getName(),
3351                                    index.getPath()),
3352                                e);
3353                        }
3354                    }
3355                    // index has changed - initialize the index searcher instance
3356                    index.onIndexChanged(false);
3357                }
3358
3359                // output finish information on the report
3360                report.println(
3361                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()),
3362                    I_CmsReport.FORMAT_HEADLINE);
3363            }
3364        } finally {
3365            SEARCH_MANAGER_LOCK.unlock();
3366        }
3367    }
3368
3369    /**
3370     * Updates the offline search indexes for the given list of resources.<p>
3371     *
3372     * @param report the report to write the index information to
3373     * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
3374     */
3375    protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
3376
3377        CmsObject cms = m_adminCms;
3378        try {
3379            // copy the administration context for the indexing
3380            cms = OpenCms.initCmsObject(m_adminCms);
3381            // set site root and project for this index
3382            cms.getRequestContext().setSiteRoot("/");
3383        } catch (CmsException e) {
3384            LOG.error(e.getLocalizedMessage(), e);
3385        }
3386
3387        Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator();
3388        while (j.hasNext()) {
3389            I_CmsSearchIndex index = j.next();
3390            if (index.getSources() != null) {
3391                try {
3392                    // switch to the index project
3393                    cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3394                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3395                } catch (CmsException e) {
3396                    LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e);
3397                }
3398            }
3399        }
3400    }
3401
3402    /**
3403     * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p>
3404     *
3405     * @param adminCms the cms context
3406     * @param containerPages the containerpages
3407     * @param containerPage the container page site path
3408     */
3409    private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) {
3410
3411        if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) {
3412
3413            try {
3414                CmsResource detailRes = adminCms.readResource(
3415                    CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage),
3416                    CmsResourceFilter.IGNORE_EXPIRATION);
3417                containerPages.add(detailRes);
3418            } catch (Throwable e) {
3419                if (LOG.isWarnEnabled()) {
3420                    LOG.warn(e.getLocalizedMessage(), e);
3421                }
3422            }
3423        }
3424    }
3425
3426    /**
3427     * Creates the Solr core container.<p>
3428     *
3429     * @return the created core container
3430     */
3431    private CoreContainer createCoreContainer() {
3432
3433        CoreContainer container = null;
3434        try {
3435            // get the core container
3436            // still no core container: create it
3437            container = CoreContainer.createAndLoad(
3438                Paths.get(m_solrConfig.getHome()),
3439                m_solrConfig.getSolrFile().toPath());
3440            if (CmsLog.INIT.isInfoEnabled()) {
3441                CmsLog.INIT.info(
3442                    Messages.get().getBundle().key(
3443                        Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2,
3444                        m_solrConfig.getHome(),
3445                        m_solrConfig.getSolrFile().getName()));
3446            }
3447        } catch (Exception e) {
3448            LOG.error(
3449                Messages.get().getBundle().key(
3450                    Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1,
3451                    m_solrConfig.getSolrFile().getAbsolutePath()),
3452                e);
3453        }
3454        return container;
3455
3456    }
3457
3458    /**
3459     * Remove write.lock file in the data directory to ensure the index is unlocked.
3460     * @param dataDir the data directory of the Solr index that should be unlocked.
3461     */
3462    private void ensureIndexIsUnlocked(String dataDir) {
3463
3464        Collection<File> lockFiles = new ArrayList<File>(2);
3465        lockFiles.add(
3466            new File(
3467                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock"));
3468        lockFiles.add(
3469            new File(
3470                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck")
3471                    + "write.lock"));
3472        for (File lockFile : lockFiles) {
3473            if (lockFile.exists()) {
3474                lockFile.delete();
3475                LOG.warn(
3476                    "Forcely unlocking index with data dir \""
3477                        + dataDir
3478                        + "\" by removing file \""
3479                        + lockFile.getAbsolutePath()
3480                        + "\".");
3481            }
3482        }
3483    }
3484
3485    /**
3486     * Returns the report in the given event data, if <code>null</code>
3487     * a new log report is used.<p>
3488     *
3489     * @param event the event to get the report for
3490     *
3491     * @return the report
3492     */
3493    private I_CmsReport getEventReport(CmsEvent event) {
3494
3495        I_CmsReport report = null;
3496        if (event.getData() != null) {
3497            report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT);
3498        }
3499        if (report == null) {
3500            report = new CmsLogReport(Locale.ENGLISH, getClass());
3501        }
3502        return report;
3503    }
3504
3505    /**
3506     * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p>
3507     *
3508     * @param publishedResources a list of published resources
3509     *
3510     * @return the set of structure ids that satisfy the condition above
3511     */
3512    private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted(
3513        List<CmsPublishedResource> publishedResources) {
3514
3515        Set<CmsUUID> result = new HashSet<CmsUUID>();
3516        Set<CmsUUID> deletedSet = new HashSet<CmsUUID>();
3517        for (CmsPublishedResource pubRes : publishedResources) {
3518            if (pubRes.getState().isNew()) {
3519                result.add(pubRes.getStructureId());
3520            }
3521            if (pubRes.getState().isDeleted()) {
3522                deletedSet.add(pubRes.getStructureId());
3523            }
3524        }
3525        result.retainAll(deletedSet);
3526        return result;
3527    }
3528
3529    /**
3530     * Checks if the given type id belongs to a group type.
3531     *
3532     * @param type the type id to check
3533     * @return true if the type is a group type
3534     */
3535    private boolean isGroup(int type) {
3536
3537        for (String groupType : groupTypes) {
3538            if (OpenCms.getResourceManager().matchResourceType(groupType, type)) {
3539                return true;
3540            }
3541        }
3542        return false;
3543
3544    }
3545
3546    /**
3547     * Shuts down the Solr core container.<p>
3548     */
3549    private void shutDownSolrContainer() {
3550
3551        if (m_coreContainer != null) {
3552            for (SolrCore core : m_coreContainer.getCores()) {
3553                // do not unload spellcheck core because otherwise the core.properties file is removed
3554                // even when calling m_coreContainer.unload(core.getName(), false, false, false);
3555                if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) {
3556                    m_coreContainer.unload(core.getName(), false, false, true);
3557                }
3558            }
3559            m_coreContainer.shutdown();
3560            if (CmsLog.INIT.isInfoEnabled()) {
3561                CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0));
3562            }
3563            m_coreContainer = null;
3564        }
3565    }
3566
3567}