001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil;
031import org.opencms.configuration.CmsConfigurationException;
032import org.opencms.db.CmsDriverManager;
033import org.opencms.db.CmsPublishedResource;
034import org.opencms.db.CmsResourceState;
035import org.opencms.file.CmsObject;
036import org.opencms.file.CmsProject;
037import org.opencms.file.CmsResource;
038import org.opencms.file.CmsResourceFilter;
039import org.opencms.file.CmsUser;
040import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
041import org.opencms.file.types.CmsResourceTypeXmlContent;
042import org.opencms.file.types.I_CmsResourceType;
043import org.opencms.i18n.CmsLocaleManager;
044import org.opencms.i18n.CmsMessageContainer;
045import org.opencms.loader.CmsLoaderException;
046import org.opencms.loader.CmsResourceManager;
047import org.opencms.main.CmsBroadcast.ContentMode;
048import org.opencms.main.CmsEvent;
049import org.opencms.main.CmsException;
050import org.opencms.main.CmsIllegalArgumentException;
051import org.opencms.main.CmsIllegalStateException;
052import org.opencms.main.CmsLog;
053import org.opencms.main.I_CmsEventListener;
054import org.opencms.main.OpenCms;
055import org.opencms.main.OpenCmsSolrHandler;
056import org.opencms.relations.CmsRelation;
057import org.opencms.relations.CmsRelationFilter;
058import org.opencms.relations.CmsRelationType;
059import org.opencms.report.CmsLogReport;
060import org.opencms.report.CmsShellLogReport;
061import org.opencms.report.I_CmsReport;
062import org.opencms.scheduler.I_CmsScheduledJob;
063import org.opencms.search.documents.A_CmsVfsDocument;
064import org.opencms.search.documents.CmsExtractionResultCache;
065import org.opencms.search.documents.I_CmsDocumentFactory;
066import org.opencms.search.documents.I_CmsTermHighlighter;
067import org.opencms.search.fields.CmsLuceneField;
068import org.opencms.search.fields.CmsLuceneFieldConfiguration;
069import org.opencms.search.fields.CmsSearchField;
070import org.opencms.search.fields.CmsSearchFieldConfiguration;
071import org.opencms.search.fields.CmsSearchFieldMapping;
072import org.opencms.search.fields.I_CmsSearchFieldConfiguration;
073import org.opencms.search.solr.CmsSolrConfiguration;
074import org.opencms.search.solr.CmsSolrFieldConfiguration;
075import org.opencms.search.solr.CmsSolrIndex;
076import org.opencms.search.solr.I_CmsSolrIndexWriter;
077import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
078import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer;
079import org.opencms.security.CmsRole;
080import org.opencms.security.CmsRoleViolationException;
081import org.opencms.util.A_CmsModeStringEnumeration;
082import org.opencms.util.CmsFileUtil;
083import org.opencms.util.CmsStringUtil;
084import org.opencms.util.CmsUUID;
085import org.opencms.util.CmsWaitHandle;
086
087import java.io.File;
088import java.io.IOException;
089import java.nio.file.FileSystems;
090import java.nio.file.Paths;
091import java.util.ArrayList;
092import java.util.Collection;
093import java.util.Collections;
094import java.util.HashMap;
095import java.util.HashSet;
096import java.util.Iterator;
097import java.util.LinkedHashMap;
098import java.util.List;
099import java.util.ListIterator;
100import java.util.Locale;
101import java.util.Map;
102import java.util.Set;
103import java.util.TreeMap;
104import java.util.concurrent.locks.ReentrantLock;
105import java.util.stream.Collectors;
106
107import org.apache.commons.logging.Log;
108import org.apache.lucene.analysis.Analyzer;
109import org.apache.lucene.analysis.CharArraySet;
110import org.apache.lucene.analysis.standard.StandardAnalyzer;
111import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
112import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
113import org.apache.solr.core.CoreContainer;
114import org.apache.solr.core.CoreDescriptor;
115import org.apache.solr.core.SolrCore;
116
117/**
118 * Implements the general management and configuration of the search and
119 * indexing facilities in OpenCms.<p>
120 *
121 * @since 6.0.0
122 */
123public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener {
124
125    /**
126     *  Enumeration class for force unlock types.<p>
127     */
128    public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration {
129
130        /** Force unlock type "always". */
131        public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always");
132
133        /** Force unlock type "never". */
134        public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never");
135
136        /** Force unlock type "only full". */
137        public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull");
138
139        /** Serializable version id. */
140        private static final long serialVersionUID = 74746076708908673L;
141
142        /**
143         * Creates a new force unlock type with the given name.<p>
144         *
145         * @param mode the mode id to use
146         */
147        protected CmsSearchForceUnlockMode(String mode) {
148
149            super(mode);
150        }
151
152        /**
153         * Returns the lock type for the given type value.<p>
154         *
155         * @param type the type value to get the lock type for
156         *
157         * @return the lock type for the given type value
158         */
159        public static CmsSearchForceUnlockMode valueOf(String type) {
160
161            if (type.equals(ALWAYS.toString())) {
162                return ALWAYS;
163            } else if (type.equals(NEVER.toString())) {
164                return NEVER;
165            } else {
166                return ONLYFULL;
167            }
168        }
169    }
170
171    /**
172     * Handles offline index generation.<p>
173     */
174    protected class CmsSearchOfflineHandler implements I_CmsEventListener {
175
176        /** Indicates if the event handlers for the offline search have been already registered. */
177        private boolean m_isEventRegistered;
178
179        /** The list of resources to index. */
180        private List<CmsPublishedResource> m_resourcesToIndex;
181
182        /**
183         * Initializes the offline index handler.<p>
184         */
185        protected CmsSearchOfflineHandler() {
186
187            m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
188        }
189
190        /**
191         * Implements the event listener of this class.<p>
192         *
193         * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
194         */
195        @SuppressWarnings("unchecked")
196        public void cmsEvent(CmsEvent event) {
197
198            Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
199            switch (event.getType()) {
200                case I_CmsEventListener.EVENT_PROPERTY_MODIFIED:
201                case I_CmsEventListener.EVENT_RESOURCE_CREATED:
202                case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED:
203                case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
204                    if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
205                        // skip lock & unlock
206                        return;
207                    }
208                    // skip indexing if flag is set in event
209                    Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX);
210                    if (skip != null) {
211                        return;
212                    }
213
214                    // a resource has been modified - offline indexes require (re)indexing
215                    List<CmsResource> resources = Collections.singletonList(
216                        (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE));
217                    reIndexResources(resources);
218                    break;
219                case I_CmsEventListener.EVENT_RESOURCE_DELETED:
220                    List<CmsResource> eventResources = (List<CmsResource>)event.getData().get(
221                        I_CmsEventListener.KEY_RESOURCES);
222                    List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources);
223                    for (CmsResource res : resourcesToDelete) {
224                        if (res.getState().isNew()) {
225                            // if the resource is new and a delete action was performed
226                            // --> set the state of the resource to deleted
227                            res.setState(CmsResourceState.STATE_DELETED);
228                        }
229                    }
230                    reIndexResources(resourcesToDelete);
231                    break;
232                case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED:
233                    if (I_CmsEventListener.VALUE_CREATE_SIBLING.equals(change)) {
234                        List<CmsResource> resList = (List<CmsResource>)event.getData().get(
235                            I_CmsEventListener.KEY_RESOURCES);
236                        if ((resList != null) && (resList.size() >= 3)) {
237                            System.out.println("Sibling creation case, resource = " + resList.get(1).getRootPath());
238                            reIndexResources(Collections.singletonList(resList.get(1)));
239
240                        }
241                    } else {
242                        reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
243                    }
244                    break;
245                case I_CmsEventListener.EVENT_RESOURCE_MOVED:
246                case I_CmsEventListener.EVENT_RESOURCE_COPIED:
247                case I_CmsEventListener.EVENT_RESOURCES_MODIFIED:
248
249                    // a list of resources has been modified - offline indexes require (re)indexing
250                    reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES));
251                    break;
252                default:
253                    // no operation
254            }
255        }
256
257        /**
258         * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p>
259         *
260         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed
261         */
262        protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) {
263
264            m_resourcesToIndex.addAll(resourcesToIndex);
265        }
266
267        /**
268         * Returns the list of {@link CmsPublishedResource} objects to index.<p>
269         *
270         * @return the resources to index
271         */
272        protected List<CmsPublishedResource> getResourcesToIndex() {
273
274            List<CmsPublishedResource> result;
275            synchronized (this) {
276                result = m_resourcesToIndex;
277                m_resourcesToIndex = new ArrayList<CmsPublishedResource>();
278            }
279            try {
280                CmsObject cms = m_adminCms;
281                CmsProject offline = getOfflineIndexProject();
282                if (offline != null) {
283                    // switch to the offline project if available
284                    cms = OpenCms.initCmsObject(m_adminCms);
285                    cms.getRequestContext().setCurrentProject(offline);
286                }
287                addAdditionallyAffectedResources(cms, result);
288            } catch (CmsException e) {
289                LOG.error(e.getLocalizedMessage(), e);
290            }
291            return result;
292        }
293
294        /**
295         * Initializes this offline search handler, registering the event handlers if required.<p>
296         */
297        protected void initialize() {
298
299            if (m_offlineIndexes.size() > 0) {
300                // there is at least one offline index configured
301                if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) {
302                    // create the offline indexing thread
303                    m_offlineIndexThread = new CmsSearchOfflineIndexThread(this);
304                    // start the offline index thread
305                    m_offlineIndexThread.start();
306                }
307            } else {
308                if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
309                    // no offline indexes but thread still running, stop the thread
310                    m_offlineIndexThread.shutDown();
311                    m_offlineIndexThread = null;
312                }
313            }
314            // do this only in case there are offline indexes configured
315            if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) {
316                m_isEventRegistered = true;
317                // register this object as event listener
318                OpenCms.addCmsEventListener(
319                    this,
320                    new int[] {
321                        I_CmsEventListener.EVENT_PROPERTY_MODIFIED,
322                        I_CmsEventListener.EVENT_RESOURCE_CREATED,
323                        I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED,
324                        I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
325                        I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED,
326                        I_CmsEventListener.EVENT_RESOURCE_MOVED,
327                        I_CmsEventListener.EVENT_RESOURCE_DELETED,
328                        I_CmsEventListener.EVENT_RESOURCE_COPIED,
329                        I_CmsEventListener.EVENT_RESOURCES_MODIFIED});
330            }
331        }
332
333        /**
334         * Updates all offline indexes for the given list of {@link CmsResource} objects.<p>
335         *
336         * @param resources a list of {@link CmsResource} objects to update in the offline indexes
337         */
338        protected synchronized void reIndexResources(List<CmsResource> resources) {
339
340            List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size());
341            for (CmsResource res : resources) {
342                CmsPublishedResource pubRes = new CmsPublishedResource(res);
343                resourcesToIndex.add(pubRes);
344            }
345            if (resourcesToIndex.size() > 0) {
346                // add the resources found to the offline index thread
347                addResourcesToIndex(resourcesToIndex);
348            }
349        }
350    }
351
352    /**
353     * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p>
354     */
355    protected class CmsSearchOfflineIndexThread extends Thread {
356
357        /** The event handler that triggers this thread. */
358        CmsSearchOfflineHandler m_handler;
359
360        /** Indicates if this thread is still alive. */
361        boolean m_isAlive;
362
363        /** Indicates that an index update thread is currently running. */
364        private boolean m_isUpdating;
365
366        /** If true a manual update (after file upload) was triggered. */
367        private boolean m_updateTriggered;
368
369        /** The wait handle used for signalling when the worker thread has finished. */
370        private CmsWaitHandle m_waitHandle = new CmsWaitHandle();
371
372        /**
373         * Constructor.<p>
374         *
375         * @param handler the offline index event handler
376         */
377        protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) {
378
379            super("OpenCms: Offline Search Indexer");
380            m_handler = handler;
381        }
382
383        /**
384         * Gets the wait handle used for signalling when the worker thread has finished.
385         *
386         * @return the wait handle
387         **/
388        public CmsWaitHandle getWaitHandle() {
389
390            return m_waitHandle;
391        }
392
393        /**
394         * @see java.lang.Thread#interrupt()
395         */
396        @Override
397        public void interrupt() {
398
399            super.interrupt();
400            m_updateTriggered = true;
401        }
402
403        /**
404         * @see java.lang.Thread#run()
405         */
406        @Override
407        public void run() {
408
409            // create a log report for the output
410            I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class);
411            long offlineUpdateFrequency = getOfflineUpdateFrequency();
412            m_updateTriggered = false;
413            try {
414                while (m_isAlive) {
415                    if (!m_updateTriggered) {
416                        try {
417                            sleep(offlineUpdateFrequency);
418                        } catch (InterruptedException e) {
419                            // continue the thread after interruption
420                            if (!m_isAlive) {
421                                // the thread has been shut down while sleeping
422                                continue;
423                            }
424                            if (offlineUpdateFrequency != getOfflineUpdateFrequency()) {
425                                // offline update frequency change - clear interrupt status
426                                offlineUpdateFrequency = getOfflineUpdateFrequency();
427                            }
428                            LOG.info(e.getLocalizedMessage(), e);
429                        }
430                    }
431                    if (m_isAlive) {
432                        // set update trigger to false since we do the update now
433                        m_updateTriggered = false;
434                        // get list of resource to update
435                        List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex();
436                        if (resourcesToIndex.size() > 0) {
437                            // only start indexing if there is at least one resource
438                            startOfflineUpdateThread(report, resourcesToIndex);
439                        } else {
440                            getWaitHandle().release();
441                        }
442                        // this is just called to clear the interrupt status of the thread
443                        interrupted();
444                    }
445                }
446            } finally {
447                // make sure that live status is reset in case of Exceptions
448                m_isAlive = false;
449            }
450
451        }
452
453        /**
454         * @see java.lang.Thread#start()
455         */
456        @Override
457        public synchronized void start() {
458
459            m_isAlive = true;
460            super.start();
461        }
462
463        /**
464         * Obtains the list of resource to update in the offline index,
465         * then optimizes the list by removing duplicate entries.<p>
466         *
467         * @return the list of resource to update in the offline index
468         */
469        protected List<CmsPublishedResource> getResourcesToIndex() {
470
471            List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex();
472            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size());
473
474            // Reverse to always keep the last list entries
475            Collections.reverse(resourcesToIndex);
476            for (CmsPublishedResource pubRes : resourcesToIndex) {
477                boolean addResource = true;
478                for (CmsPublishedResource resRes : result) {
479                    if (pubRes.equals(resRes)
480                        && (pubRes.getState() == resRes.getState())
481                        && (pubRes.getMovedState() == resRes.getMovedState())
482                        && pubRes.getRootPath().equals(resRes.getRootPath())) {
483                        // resource already in the update list
484                        addResource = false;
485                        break;
486                    }
487                }
488                if (addResource) {
489                    result.add(pubRes);
490                }
491
492            }
493            Collections.reverse(result);
494            return changeStateOfMoveOriginsToDeleted(result);
495        }
496
497        /**
498         * Shuts down this offline index thread.<p>
499         */
500        protected void shutDown() {
501
502            m_isAlive = false;
503            interrupt();
504            if (m_isUpdating) {
505                long waitTime = getOfflineUpdateFrequency() / 2;
506                int waitSteps = 0;
507                do {
508                    try {
509                        // wait half the time of the offline index frequency for the thread to finish
510                        Thread.sleep(waitTime);
511                    } catch (InterruptedException e) {
512                        // continue
513                        LOG.info(e.getLocalizedMessage(), e);
514                    }
515                    waitSteps++;
516                    // wait 5 times then stop waiting
517                } while ((waitSteps < 5) && m_isUpdating);
518            }
519        }
520
521        /**
522         * Updates the offline search indexes for the given list of resources.<p>
523         *
524         * @param report the report to write the index information to
525         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
526         */
527        protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
528
529            CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex);
530            long startTime = System.currentTimeMillis();
531            long waitTime = getOfflineUpdateFrequency() / 2;
532            if (LOG.isDebugEnabled()) {
533                LOG.debug(
534                    Messages.get().getBundle().key(
535                        Messages.LOG_OI_UPDATE_START_1,
536                        Integer.valueOf(resourcesToIndex.size())));
537            }
538
539            m_isUpdating = true;
540            thread.start();
541
542            do {
543                try {
544                    // wait half the time of the offline index frequency for the thread to finish
545                    thread.join(waitTime);
546                } catch (InterruptedException e) {
547                    // continue
548                    LOG.info(e.getLocalizedMessage(), e);
549                }
550                if (thread.isAlive()) {
551                    LOG.warn(
552                        Messages.get().getBundle().key(
553                            Messages.LOG_OI_UPDATE_LONG_2,
554                            Integer.valueOf(resourcesToIndex.size()),
555                            Long.valueOf(System.currentTimeMillis() - startTime)));
556                }
557            } while (thread.isAlive());
558            m_isUpdating = false;
559
560            if (LOG.isDebugEnabled()) {
561                LOG.debug(
562                    Messages.get().getBundle().key(
563                        Messages.LOG_OI_UPDATE_FINISH_2,
564                        Integer.valueOf(resourcesToIndex.size()),
565                        Long.valueOf(System.currentTimeMillis() - startTime)));
566            }
567        }
568
569        /**
570         * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'.
571         * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index,
572         *
573         * @param resourcesToIndex the resources to index
574         *
575         * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths
576         */
577        private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted(
578            List<CmsPublishedResource> resourcesToIndex) {
579
580            Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>();
581            for (CmsPublishedResource resource : resourcesToIndex) {
582                if (resource.getState().isDeleted()) {
583                    // we don't want the last path to be from a deleted resource
584                    continue;
585                }
586                lastValidPaths.put(resource.getStructureId(), resource.getRootPath());
587            }
588            List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>();
589            for (CmsPublishedResource resource : resourcesToIndex) {
590                if (resource.getState().isDeleted()) {
591                    result.add(resource);
592                    continue;
593                }
594                String lastValidPath = lastValidPaths.get(resource.getStructureId());
595                if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) {
596                    result.add(resource);
597                } else {
598                    result.add(
599                        new CmsPublishedResource(
600                            resource.getStructureId(),
601                            resource.getResourceId(),
602                            resource.getPublishTag(),
603                            resource.getRootPath(),
604                            resource.getType(),
605                            resource.isFolder(),
606                            CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted
607                            resource.getSiblingCount()));
608                }
609            }
610            return result;
611        }
612    }
613
614    /**
615     * An offline index worker Thread runs each time for every offline index update action.<p>
616     *
617     * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid
618     * problems if a single operation "hangs" the Tread.<p>
619     */
620    protected class CmsSearchOfflineIndexWorkThread extends Thread {
621
622        /** The report to write the index information to. */
623        I_CmsReport m_report;
624
625        /** The list of {@link CmsPublishedResource} objects to index. */
626        List<CmsPublishedResource> m_resourcesToIndex;
627
628        /**
629         * Updates the offline search indexes for the given list of resources.<p>
630         *
631         * @param report the report to write the index information to
632         * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
633         */
634        protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
635
636            super("OpenCms: Offline Search Index Worker");
637            m_report = report;
638            m_resourcesToIndex = resourcesToIndex;
639        }
640
641        /**
642         * @see java.lang.Thread#run()
643         */
644        @Override
645        public void run() {
646
647            updateIndexOffline(m_report, m_resourcesToIndex);
648            if (m_offlineIndexThread != null) {
649                m_offlineIndexThread.getWaitHandle().release();
650            }
651        }
652    }
653
654    /** This needs to be a fair lock to preserve order of threads accessing the search manager. */
655    private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true);
656
657    /** The default value used for generating search result excerpts (1024 chars). */
658    public static final int DEFAULT_EXCERPT_LENGTH = 1024;
659
660    /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
661    public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
662
663    /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */
664    public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500;
665
666    /** The default update frequency for offline indexes (15000 msec = 15 sec). */
667    public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000;
668
669    /** The default maximal wait time for re-indexing after editing a content. */
670    public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000;
671
672    /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
673    public static final int DEFAULT_TIMEOUT = 60000;
674
675    /** Scheduler parameter: Update only a specified list of indexes. */
676    public static final String JOB_PARAM_INDEXLIST = "indexList";
677
678    /** Scheduler parameter: Write the output of the update to the logfile. */
679    public static final String JOB_PARAM_WRITELOG = "writeLog";
680
681    /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */
682    public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core.";
683
684    /** The log object for this class. */
685    protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class);
686
687    /** List of resource types which represent groups of elements. */
688    private static final String[] groupTypes = {
689        CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME,
690        CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME,
691        CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME};
692
693    /** The administrator OpenCms user context to access OpenCms VFS resources. */
694    protected CmsObject m_adminCms;
695
696    /** The list of indexes that are configured for offline index mode. */
697    protected List<I_CmsSearchIndex> m_offlineIndexes;
698
699    /** The thread used of offline indexing. */
700    protected CmsSearchOfflineIndexThread m_offlineIndexThread;
701
702    /** Configured analyzers for languages using &lt;analyzer&gt;. */
703    private HashMap<Locale, CmsSearchAnalyzer> m_analyzers;
704
705    /** Stores the offline update frequency while indexing is paused. */
706    private long m_configuredOfflineIndexingFrequency;
707
708    /** The Solr core container. */
709    private CoreContainer m_coreContainer;
710
711    /** A map of document factory configurations. */
712    private List<CmsSearchDocumentType> m_documentTypeConfigs;
713
714    /** A map of document factories keyed first by their name and then by their extraction keys. */
715    private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes;
716
717    /** The set of all globally available extraction keys for document factories. */
718    private Set<String> m_extractionKeys;
719
720    /** The max age for extraction results to remain in the cache. */
721    private float m_extractionCacheMaxAge;
722
723    /** The cache for the extraction results. */
724    private CmsExtractionResultCache m_extractionResultCache;
725
726    /** Contains the available field configurations. */
727    private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations;
728
729    /** The force unlock type. */
730    private CmsSearchForceUnlockMode m_forceUnlockMode;
731
732    /** The class used to highlight the search terms in the excerpt of a search result. */
733    private I_CmsTermHighlighter m_highlighter;
734
735    /** A list of search indexes. */
736    private List<I_CmsSearchIndex> m_indexes;
737
738    /** Seconds to wait for an index lock. */
739    private int m_indexLockMaxWaitSeconds = 10;
740
741    /** Configured index sources. */
742    private Map<String, CmsSearchIndexSource> m_indexSources;
743
744    /** The max. char. length of the excerpt in the search result. */
745    private int m_maxExcerptLength;
746
747    /** The maximum number of modifications before a commit in the search index is triggered. */
748    private int m_maxModificationsBeforeCommit;
749
750    /** The offline index search handler. */
751    private CmsSearchOfflineHandler m_offlineHandler;
752
753    /** The update frequency of the offline indexer in milliseconds. */
754    private long m_offlineUpdateFrequency;
755
756    /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */
757    private long m_maxIndexWaitTime;
758
759    /** Path to index files below WEB-INF/. */
760    private String m_path;
761
762    /** The Solr configuration. */
763    private CmsSolrConfiguration m_solrConfig;
764
765    /** Timeout for abandoning indexing thread. */
766    private long m_timeout;
767
768    /** Offline indexing pause requests */
769    private final Set<CmsUUID> m_pauseRequests = new HashSet<>();
770
771    /**
772     * Default constructor when called as cron job.<p>
773     */
774    public CmsSearchManager() {
775
776        m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>();
777        m_extractionKeys = new HashSet<String>();
778        m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>();
779        m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>();
780        m_indexes = new ArrayList<I_CmsSearchIndex>();
781        m_indexSources = new TreeMap<String, CmsSearchIndexSource>();
782        m_offlineHandler = new CmsSearchOfflineHandler();
783        m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
784        m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
785        m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY;
786        m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME;
787        m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT;
788
789        m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>();
790        // make sure we have a "standard" field configuration
791        addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD);
792
793        if (CmsLog.INIT.isInfoEnabled()) {
794            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0));
795        }
796    }
797
798    /**
799     * Returns an analyzer for the given class name.<p>
800     *
801     * @param className the class name of the analyzer
802     *
803     * @return the appropriate lucene analyzer
804     *
805     * @throws Exception if something goes wrong
806     */
807    public static Analyzer getAnalyzer(String className) throws Exception {
808
809        Analyzer analyzer = null;
810        Class<?> analyzerClass;
811        try {
812            analyzerClass = Class.forName(className);
813        } catch (ClassNotFoundException e) {
814            // allow Lucene standard classes to be written in a short form
815            analyzerClass = Class.forName(LUCENE_ANALYZER + className);
816        }
817
818        // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor
819        if (StandardAnalyzer.class.equals(analyzerClass)) {
820            // the Lucene standard analyzer is used - but without any stopwords.
821            analyzer = new StandardAnalyzer(new CharArraySet(0, false));
822        } else {
823            analyzer = (Analyzer)analyzerClass.newInstance();
824        }
825        return analyzer;
826    }
827
828    /**
829     * Returns the Solr index configured with the parameters name.
830     * The parameters must contain a key/value pair with an existing
831     * Solr index, otherwise <code>null</code> is returned.<p>
832     *
833     * @param cms the current context
834     * @param params the parameter map
835     *
836     * @return the best matching Solr index
837     */
838    public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) {
839
840        String indexName = null;
841        CmsSolrIndex index = null;
842        // try to get the index name from the parameters: 'core' or 'index'
843        if (params != null) {
844            indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null
845            ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0]
846            : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null
847            ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0]
848            : null);
849        }
850        if (indexName == null) {
851            // if no parameter is specified try to use the default online/offline indexes by context
852            indexName = cms.getRequestContext().getCurrentProject().isOnlineProject()
853            ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE
854            : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE;
855        }
856        // try to get the index
857        index = OpenCms.getSearchManager().getIndexSolr(indexName);
858        if (index == null) {
859            // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice.
860            List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes();
861            if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) {
862                index = solrs.get(0);
863            }
864        }
865        return index;
866    }
867
868    /**
869     * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p>
870     *
871     * @param indexName the name of the index to check
872     *
873     * @return <code>true</code> if the index for the given name is a Lucene index
874     */
875    public static boolean isLuceneIndex(String indexName) {
876
877        I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName);
878        return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex));
879    }
880
881    /**
882     * Adds an analyzer.<p>
883     *
884     * @param analyzer an analyzer
885     */
886    public void addAnalyzer(CmsSearchAnalyzer analyzer) {
887
888        m_analyzers.put(analyzer.getLocale(), analyzer);
889
890        if (CmsLog.INIT.isInfoEnabled()) {
891            CmsLog.INIT.info(
892                Messages.get().getBundle().key(
893                    Messages.INIT_ADD_ANALYZER_2,
894                    analyzer.getLocale(),
895                    analyzer.getClassName()));
896        }
897    }
898
899    /**
900     * Adds a document type.<p>
901     *
902     * @param documentType a document type
903     */
904    public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
905
906        m_documentTypeConfigs.add(documentType);
907
908        if (CmsLog.INIT.isInfoEnabled()) {
909            CmsLog.INIT.info(
910                Messages.get().getBundle().key(
911                    Messages.INIT_SEARCH_DOC_TYPES_2,
912                    documentType.getName(),
913                    documentType.getClassName()));
914        }
915    }
916
917    /**
918     * Adds a search field configuration to the search manager.<p>
919     *
920     * @param fieldConfiguration the search field configuration to add
921     */
922    public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) {
923
924        m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration);
925    }
926
927    /**
928     * Adds a search index to the configuration.<p>
929     *
930     * @param searchIndex the search index to add
931     */
932    public void addSearchIndex(I_CmsSearchIndex searchIndex) {
933
934        if (!searchIndex.isInitialized()) {
935            if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
936                try {
937                    searchIndex.initialize();
938                } catch (CmsException e) {
939                    // should never happen
940                    LOG.error(e.getMessage(), e);
941                }
942            }
943        }
944
945        // name: not null or emtpy and unique
946        String name = searchIndex.getName();
947        if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
948            throw new CmsIllegalArgumentException(
949                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
950        }
951        if (m_indexSources.keySet().contains(name)) {
952            throw new CmsIllegalArgumentException(
953                Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name));
954        }
955
956        m_indexes.add(searchIndex);
957        if (m_adminCms != null) {
958            initOfflineIndexes();
959        }
960
961        if (CmsLog.INIT.isInfoEnabled()) {
962            CmsLog.INIT.info(
963                Messages.get().getBundle().key(
964                    Messages.INIT_ADD_SEARCH_INDEX_2,
965                    searchIndex.getName(),
966                    searchIndex.getProject()));
967        }
968    }
969
970    /**
971     * Adds a search index source configuration.<p>
972     *
973     * @param searchIndexSource a search index source configuration
974     */
975    public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) {
976
977        m_indexSources.put(searchIndexSource.getName(), searchIndexSource);
978
979        if (CmsLog.INIT.isInfoEnabled()) {
980            CmsLog.INIT.info(
981                Messages.get().getBundle().key(
982                    Messages.INIT_SEARCH_INDEX_SOURCE_2,
983                    searchIndexSource.getName(),
984                    searchIndexSource.getIndexerClassName()));
985        }
986    }
987
988    /**
989     * Implements the event listener of this class.<p>
990     *
991     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
992     */
993    public void cmsEvent(CmsEvent event) {
994
995        switch (event.getType()) {
996            case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
997                List<String> indexNames = null;
998                if ((event.getData() != null)
999                    && CmsStringUtil.isNotEmptyOrWhitespaceOnly(
1000                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) {
1001                    indexNames = CmsStringUtil.splitAsList(
1002                        (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES),
1003                        ",",
1004                        true);
1005                }
1006                try {
1007                    if (LOG.isDebugEnabled()) {
1008                        LOG.debug(
1009                            Messages.get().getBundle().key(
1010                                Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
1011                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1012                            new Exception());
1013                    }
1014                    if (indexNames == null) {
1015                        rebuildAllIndexes(getEventReport(event));
1016                    } else {
1017                        rebuildIndexes(indexNames, getEventReport(event));
1018                    }
1019                } catch (CmsException e) {
1020                    if (LOG.isErrorEnabled()) {
1021                        LOG.error(
1022                            Messages.get().getBundle().key(
1023                                Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
1024                                indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")),
1025                            e);
1026                    }
1027                }
1028                break;
1029            case I_CmsEventListener.EVENT_CLEAR_CACHES:
1030                if (LOG.isDebugEnabled()) {
1031                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception());
1032                }
1033                break;
1034            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
1035                // event data contains a list of the published resources
1036                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
1037                if (LOG.isDebugEnabled()) {
1038                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId));
1039                }
1040                updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event));
1041                if (LOG.isDebugEnabled()) {
1042                    LOG.debug(
1043                        Messages.get().getBundle().key(
1044                            Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
1045                            publishHistoryId));
1046                }
1047                break;
1048            case I_CmsEventListener.EVENT_REINDEX_OFFLINE:
1049            case I_CmsEventListener.EVENT_REINDEX_ONLINE:
1050                boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType();
1051                Map<String, Object> eventData = event.getData();
1052                CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID);
1053                CmsUser user = null;
1054                if (userId != null) {
1055                    try {
1056                        user = m_adminCms.readUser(userId);
1057                    } catch (Throwable t) {
1058                        // should not normally happen
1059                        LOG.debug(t.getMessage(), t);
1060                    }
1061                }
1062                try {
1063                    SEARCH_MANAGER_LOCK.lock();
1064                    if (LOG.isDebugEnabled()) {
1065                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0));
1066                    }
1067                    CmsObject cms = m_adminCms;
1068                    if (!isOnline) {
1069                        OpenCms.initCmsObject(m_adminCms);
1070                        cms.getRequestContext().setCurrentProject(
1071                            cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID)));
1072                    }
1073                    @SuppressWarnings("unchecked")
1074                    List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES);
1075                    I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT);
1076                    List<CmsResource> resourcesToIndex = new ArrayList<>();
1077                    for (CmsResource res : resources) {
1078                        if (res.isFile()) {
1079                            resourcesToIndex.add(res);
1080                        } else {
1081                            try {
1082                                resourcesToIndex.addAll(
1083                                    cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true));
1084                            } catch (CmsException e) {
1085                                LOG.error(e, e);
1086                            }
1087                        }
1088                    }
1089                    // we reindex and prevent using cached results
1090                    cleanExtractionCache();
1091                    List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map(
1092                        res -> new CmsPublishedResource(res)).collect(Collectors.toList());
1093                    if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) {
1094                        addAdditionallyAffectedResources(cms, publishedResourcesToIndex);
1095                    }
1096                    if (isOnline) {
1097                        updateAllIndexes(
1098                            m_adminCms,
1099                            publishedResourcesToIndex,
1100                            new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE));
1101                    } else {
1102                        updateIndexOffline(report, publishedResourcesToIndex);
1103                    }
1104                    cms = null;
1105                    SEARCH_MANAGER_LOCK.unlock();
1106                    if (null != user) {
1107                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1108                        OpenCms.getSessionManager().sendBroadcast(
1109                            null,
1110                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0),
1111                            user,
1112                            ContentMode.html);
1113                    }
1114                    if (LOG.isDebugEnabled()) {
1115                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0));
1116                    }
1117
1118                } catch (Throwable e) {
1119                    if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) {
1120                        SEARCH_MANAGER_LOCK.unlock();
1121                    }
1122                    if (null != user) {
1123                        Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user);
1124                        OpenCms.getSessionManager().sendBroadcast(
1125                            null,
1126                            Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0),
1127                            user,
1128                            ContentMode.html);
1129                    }
1130                    if (LOG.isDebugEnabled()) {
1131                        LOG.error(
1132                            Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()),
1133                            e);
1134                    } else if (LOG.isErrorEnabled()) {
1135                        LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()));
1136                    }
1137                }
1138                break;
1139            default:
1140                // no operation
1141        }
1142    }
1143
1144    /**
1145     * Returns all Solr index.<p>
1146     *
1147     * @return all Solr indexes
1148     */
1149    public List<CmsSolrIndex> getAllSolrIndexes() {
1150
1151        List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>();
1152        for (String indexName : getIndexNames()) {
1153            CmsSolrIndex index = getIndexSolr(indexName);
1154            if (index != null) {
1155                result.add(index);
1156            }
1157        }
1158        return result;
1159    }
1160
1161    /**
1162     * Returns an analyzer for the given language.<p>
1163     *
1164     * The analyzer is selected according to the analyzer configuration.<p>
1165     *
1166     * @param locale the locale to get the analyzer for
1167     * @return the appropriate lucene analyzer
1168     *
1169     * @throws CmsSearchException if something goes wrong
1170     */
1171    public Analyzer getAnalyzer(Locale locale) throws CmsSearchException {
1172
1173        Analyzer analyzer = null;
1174        String className = null;
1175
1176        CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale);
1177        if (analyzerConf == null) {
1178            throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1179        }
1180
1181        try {
1182            analyzer = getAnalyzer(analyzerConf.getClassName());
1183        } catch (Exception e) {
1184            throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e);
1185        }
1186
1187        return analyzer;
1188    }
1189
1190    /**
1191     * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
1192     *
1193     * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
1194     *
1195     * @return an unmodifiable view of the Analyzers Map
1196     */
1197    public Map<Locale, CmsSearchAnalyzer> getAnalyzers() {
1198
1199        return Collections.unmodifiableMap(m_analyzers);
1200    }
1201
1202    /**
1203     * Returns the search analyzer for the given locale.<p>
1204     *
1205     * @param locale the locale to get the analyzer for
1206     *
1207     * @return the search analyzer for the given locale
1208     */
1209    public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
1210
1211        return m_analyzers.get(locale);
1212    }
1213
1214    /**
1215     * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1216     *
1217     * @return the name of the directory below WEB-INF/ where the search indexes are stored
1218     */
1219    public String getDirectory() {
1220
1221        return m_path;
1222    }
1223
1224    /**
1225     * Returns the configured Solr home directory <code>null</code> if not set.<p>
1226     *
1227     * @return the Solr home directory
1228     */
1229    public String getDirectorySolr() {
1230
1231        return m_solrConfig != null ? m_solrConfig.getHome() : null;
1232    }
1233
1234    /**
1235     * Returns the document factory configured under the provided name.
1236     * @param docTypeName the name of the document type.
1237     * @return the factory for the provided name.
1238     */
1239    public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) {
1240
1241        Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName);
1242        if (factoryMap != null) {
1243            Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator();
1244            if (factoryIt.hasNext()) {
1245                return factoryMap.values().iterator().next();
1246            }
1247        }
1248        return null;
1249    }
1250
1251    /**
1252     * Returns a document type config.<p>
1253     *
1254     * @param name the name of the document type config
1255     * @return the document type config.
1256     */
1257    public CmsSearchDocumentType getDocumentTypeConfig(String name) {
1258
1259        // this is really used only for the search manager GUI,
1260        // so performance is not an issue and no lookup map is generated
1261        for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
1262            CmsSearchDocumentType type = m_documentTypeConfigs.get(i);
1263            if (type.getName().equals(name)) {
1264                return type;
1265            }
1266        }
1267        return null;
1268    }
1269
1270    /**
1271     * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
1272     *
1273     * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
1274     */
1275    public List<CmsSearchDocumentType> getDocumentTypeConfigs() {
1276
1277        return Collections.unmodifiableList(m_documentTypeConfigs);
1278    }
1279
1280    /**
1281     * Returns the document type keys used to specify the correct document factory.
1282     *
1283     * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys.
1284     *
1285     * @param resource the resource to generate the list of document type keys for.
1286     * @return the document type keys.
1287     */
1288    public List<String> getDocumentTypeKeys(CmsResource resource) {
1289
1290        // first get the MIME type of the resource
1291        String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown");
1292        String resourceType = null;
1293        try {
1294            resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName();
1295        } catch (CmsLoaderException e) {
1296            // ignore, unknown resource type, resource can not be indexed
1297            LOG.info(e.getLocalizedMessage(), e);
1298        }
1299        return getDocumentTypeKeys(resourceType, mimeType);
1300    }
1301
1302    /**
1303     * Returns the document type keys used to specify the correct document factory.
1304     * One resource typically has more than one key. The document factories are matched
1305     * in the provided order and the first matching factory is used.
1306     *
1307     * The keys for type name "typename" and mimetype "mimetype" would be a subset of:
1308     * <ul>
1309     *  <li><code>typename_mimetype</code></li>
1310     *  <li><code>typename</code></li>
1311     *  <li>if <code>typename</code> is a sub-type of <code>containerpage</code>
1312     *      <ul>
1313     *          <li><code>containerpage_mimetype</code></li>
1314     *          <li><code>containerpage</code></li>
1315     *      </ul>
1316     *  </li>
1317     *  <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code>
1318     *      <ul>
1319     *          <li><code>xmlcontent_mimetype</code></li>
1320     *          <li><code>xmlcontent</code></li>
1321     *      </ul>
1322     *  </li>
1323     *  <li><code>__unconfigured___mimetype</code></li>
1324     *  <li><code>__unconfigured__</code></li>
1325     *  <li><code>__all___mimetype</code></li>
1326     *  <li><code>__all__</code></li>
1327     * <ul>
1328     * Note that all keys except the "__all__"-keys are only added as long as globally
1329     * there is no matching factory for the key.
1330     * This in particular means that a factory matching "typename" will never be used
1331     * if you have a factory for "typename__mimetype" - even if this is not configured
1332     * for the used index source. Eventually, the content will not be indexed in such cases.
1333     * @param resourceType the resource type to generate the list of document type keys for.
1334     * @param mimeType the mime type to generate the list of document type keys for.
1335     * @return the document type keys.
1336     */
1337    public List<String> getDocumentTypeKeys(String resourceType, String mimeType) {
1338
1339        List<String> result = new ArrayList<>(8);
1340        if (null != resourceType) {
1341            String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType);
1342            result.add(currentKey);
1343            if (!m_extractionKeys.contains(currentKey)) {
1344                currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null);
1345                result.add(currentKey);
1346                if (!m_extractionKeys.contains(currentKey)) {
1347                    boolean hasGlobalMatch = false;
1348                    try {
1349                        String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName();
1350                        I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType);
1351                        if (!resourceType.equals(containerpageTypeName)) {
1352                            if (type instanceof CmsResourceTypeXmlContainerPage) {
1353                                if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) {
1354                                    currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType);
1355                                    result.add(currentKey);
1356                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1357                                    if (!hasGlobalMatch) {
1358                                        currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null);
1359                                        result.add(currentKey);
1360                                        hasGlobalMatch = m_extractionKeys.contains(currentKey);
1361                                    }
1362                                }
1363                            }
1364                        }
1365                        String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName();
1366                        if (!resourceType.equals(containerpageTypeName)) {
1367                            if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) {
1368                                currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType);
1369                                result.add(currentKey);
1370                                hasGlobalMatch = m_extractionKeys.contains(currentKey);
1371                                if (!hasGlobalMatch) {
1372                                    currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null);
1373                                    result.add(currentKey);
1374                                    hasGlobalMatch = m_extractionKeys.contains(currentKey);
1375                                }
1376                            }
1377                        }
1378                    } catch (Throwable t) {
1379                        LOG.warn("Could not read type for name \"" + resourceType + "\".", t);
1380                    }
1381                    if (!hasGlobalMatch) {
1382                        result.add(
1383                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType));
1384                        result.add(
1385                            A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null));
1386                    }
1387                }
1388            }
1389            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType));
1390            result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null));
1391        }
1392        return result;
1393
1394    }
1395
1396    /**
1397     * Returns the map from document type keys to document factories with all entries for the provided document type names.
1398     * @param documentTypeNames list of document type names to generate the map for.
1399     * @return the map from document type keys to document factories.
1400     */
1401    public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) {
1402
1403        Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>();
1404        if (null != documentTypeNames) {
1405            // Iterate the list in reverse order to prefer factories that are added by document types listed earlier.
1406            ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size());
1407            while (typesIterator.hasPrevious()) {
1408                Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous());
1409                if (null != factories) {
1410                    result.putAll(factories);
1411                }
1412            }
1413        }
1414        return result;
1415    }
1416
1417    /**
1418     * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
1419     *
1420     * @return the maximum age a text extraction result is kept in the cache (in hours)
1421     */
1422    public float getExtractionCacheMaxAge() {
1423
1424        return m_extractionCacheMaxAge;
1425    }
1426
1427    /**
1428     * Returns the search field configuration with the given name.<p>
1429     *
1430     * In case no configuration is available with the given name, <code>null</code> is returned.<p>
1431     *
1432     * @param name the name to get the search field configuration for
1433     *
1434     * @return the search field configuration with the given name
1435     */
1436    public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) {
1437
1438        return m_fieldConfigurations.get(name);
1439    }
1440
1441    /**
1442     * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p>
1443     *
1444     * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries
1445     */
1446    public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() {
1447
1448        List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>(
1449            m_fieldConfigurations.values());
1450        Collections.sort(result);
1451        return Collections.unmodifiableList(result);
1452    }
1453
1454    /**
1455     * Returns the Lucene search field configurations only.<p>
1456     *
1457     * @return the Lucene search field configurations
1458     */
1459    public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() {
1460
1461        List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>();
1462        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1463            if (conf instanceof CmsLuceneFieldConfiguration) {
1464                result.add((CmsLuceneFieldConfiguration)conf);
1465            }
1466        }
1467        Collections.sort(result);
1468        return Collections.unmodifiableList(result);
1469    }
1470
1471    /**
1472     * Returns the Solr search field configurations only.<p>
1473     *
1474     * @return the Solr search field configurations
1475     */
1476    public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() {
1477
1478        List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>();
1479        for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) {
1480            if (conf instanceof CmsSolrFieldConfiguration) {
1481                result.add((CmsSolrFieldConfiguration)conf);
1482            }
1483        }
1484        Collections.sort(result);
1485        return Collections.unmodifiableList(result);
1486    }
1487
1488    /**
1489     * Returns the force unlock mode during indexing.<p>
1490     *
1491     * @return the force unlock mode during indexing
1492     */
1493    public CmsSearchForceUnlockMode getForceunlock() {
1494
1495        return m_forceUnlockMode;
1496    }
1497
1498    /**
1499     * Returns the highlighter.<p>
1500     *
1501     * @return the highlighter
1502     */
1503    public I_CmsTermHighlighter getHighlighter() {
1504
1505        return m_highlighter;
1506    }
1507
1508    /**
1509     * Returns the Lucene search index configured with the given name.<p>
1510     * The index must exist, otherwise <code>null</code> is returned.
1511     *
1512     * @param indexName then name of the requested search index
1513     *
1514     * @return the Lucene search index configured with the given name
1515     */
1516    public I_CmsSearchIndex getIndex(String indexName) {
1517
1518        for (I_CmsSearchIndex index : m_indexes) {
1519            if (indexName.equalsIgnoreCase(index.getName())) {
1520                return index;
1521            }
1522        }
1523        return null;
1524    }
1525
1526    /**
1527     * Returns the seconds to wait for an index lock during an update operation.<p>
1528     *
1529     * @return the seconds to wait for an index lock during an update operation
1530     */
1531    public int getIndexLockMaxWaitSeconds() {
1532
1533        return m_indexLockMaxWaitSeconds;
1534    }
1535
1536    /**
1537     * Returns the names of all configured indexes.<p>
1538     *
1539     * @return list of names
1540     */
1541    public List<String> getIndexNames() {
1542
1543        List<String> indexNames = new ArrayList<String>();
1544        for (int i = 0, n = m_indexes.size(); i < n; i++) {
1545            indexNames.add((m_indexes.get(i)).getName());
1546        }
1547
1548        return indexNames;
1549    }
1550
1551    /**
1552     * Returns the Solr index configured with the given name.<p>
1553     * The index must exist, otherwise <code>null</code> is returned.
1554     *
1555     * @param indexName then name of the requested Solr index
1556     * @return the Solr index configured with the given name
1557     */
1558    public CmsSolrIndex getIndexSolr(String indexName) {
1559
1560        I_CmsSearchIndex index = getIndex(indexName);
1561        if (index instanceof CmsSolrIndex) {
1562            return (CmsSolrIndex)index;
1563        }
1564        return null;
1565    }
1566
1567    /**
1568     * Returns a search index source for a specified source name.<p>
1569     *
1570     * @param sourceName the name of the index source
1571     * @return a search index source
1572     */
1573    public CmsSearchIndexSource getIndexSource(String sourceName) {
1574
1575        return m_indexSources.get(sourceName);
1576    }
1577
1578    /**
1579     * Returns the max. excerpt length.<p>
1580     *
1581     * @return the max excerpt length
1582     */
1583    public int getMaxExcerptLength() {
1584
1585        return m_maxExcerptLength;
1586    }
1587
1588    /**
1589     * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p>
1590     *
1591     * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds)
1592     */
1593    public long getMaxIndexWaitTime() {
1594
1595        return m_maxIndexWaitTime;
1596    }
1597
1598    /**
1599     * Returns the maximum number of modifications before a commit in the search index is triggered.<p>
1600     *
1601     * @return the maximum number of modifications before a commit in the search index is triggered
1602     */
1603    public int getMaxModificationsBeforeCommit() {
1604
1605        return m_maxModificationsBeforeCommit;
1606    }
1607
1608    /**
1609     * Returns the update frequency of the offline indexer in milliseconds.<p>
1610     *
1611     * @return the update frequency of the offline indexer in milliseconds
1612     */
1613    public long getOfflineUpdateFrequency() {
1614
1615        return m_offlineUpdateFrequency;
1616    }
1617
1618    /**
1619     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1620     *
1621     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1622     */
1623    public List<I_CmsSearchIndex> getSearchIndexes() {
1624
1625        return Collections.unmodifiableList(m_indexes);
1626    }
1627
1628    /**
1629     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1630     *
1631     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1632     */
1633    public List<I_CmsSearchIndex> getSearchIndexesAll() {
1634
1635        return Collections.unmodifiableList(m_indexes);
1636    }
1637
1638    /**
1639     * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p>
1640     *
1641     * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances
1642     */
1643    public List<CmsSolrIndex> getSearchIndexesSolr() {
1644
1645        List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>();
1646        for (I_CmsSearchIndex index : m_indexes) {
1647            if (index instanceof CmsSolrIndex) {
1648                indexes.add((CmsSolrIndex)index);
1649            }
1650        }
1651        return Collections.unmodifiableList(indexes);
1652    }
1653
1654    /**
1655     * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
1656     *
1657     * @return an unmodifiable view (read-only) of the SearchIndexSources Map
1658     */
1659    public Map<String, CmsSearchIndexSource> getSearchIndexSources() {
1660
1661        return Collections.unmodifiableMap(m_indexSources);
1662    }
1663
1664    /**
1665     * Return singleton instance of the OpenCms spellchecker.<p>
1666     *
1667     * @return instance of CmsSolrSpellchecker.
1668     */
1669    public CmsSolrSpellchecker getSolrDictionary() {
1670
1671        // get the core container that contains one core for each configured index
1672        if (m_coreContainer == null) {
1673            m_coreContainer = createCoreContainer();
1674        }
1675        return CmsSolrSpellchecker.getInstance(m_coreContainer);
1676    }
1677
1678    /**
1679     * Returns the Solr configuration.<p>
1680     *
1681     * @return the Solr configuration
1682     */
1683    public CmsSolrConfiguration getSolrServerConfiguration() {
1684
1685        return m_solrConfig;
1686    }
1687
1688    /**
1689     * Returns the timeout to abandon threads indexing a resource.<p>
1690     *
1691     * @return the timeout to abandon threads indexing a resource
1692     */
1693    public long getTimeout() {
1694
1695        return m_timeout;
1696    }
1697
1698    /**
1699     * Initializes the search manager.<p>
1700     *
1701     * @param cms the cms object
1702     *
1703     * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
1704     */
1705    public void initialize(CmsObject cms) throws CmsRoleViolationException {
1706
1707        OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER);
1708        try {
1709            // store the Admin cms to index Cms resources
1710            m_adminCms = OpenCms.initCmsObject(cms);
1711        } catch (CmsException e) {
1712            // this should never happen
1713            LOG.error(e.getLocalizedMessage(), e);
1714        }
1715        // make sure the site root is the root site
1716        m_adminCms.getRequestContext().setSiteRoot("/");
1717
1718        // create the extraction result cache
1719        m_extractionResultCache = new CmsExtractionResultCache(
1720            OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()),
1721            "/extractCache");
1722        initializeFieldConfigurations();
1723        initializeIndexes();
1724        initOfflineIndexes();
1725
1726        // register this object as event listener
1727        OpenCms.addCmsEventListener(
1728            this,
1729            new int[] {
1730                I_CmsEventListener.EVENT_CLEAR_CACHES,
1731                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
1732                I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES,
1733                I_CmsEventListener.EVENT_REINDEX_OFFLINE,
1734                I_CmsEventListener.EVENT_REINDEX_ONLINE});
1735    }
1736
1737    /**
1738     * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations.
1739     */
1740    public void initializeFieldConfigurations() {
1741
1742        for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) {
1743            config.init();
1744        }
1745
1746    }
1747
1748    /**
1749     * Initializes all configured document types, index sources and search indexes.<p>
1750     *
1751     * This methods needs to be called if after a change in the index configuration has been made.
1752     */
1753    public void initializeIndexes() {
1754
1755        initAvailableDocumentTypes();
1756        initIndexSources();
1757        initSearchIndexes();
1758    }
1759
1760    /**
1761     * Initialize the offline index handler, require after an offline index has been added.<p>
1762     */
1763    public void initOfflineIndexes() {
1764
1765        // check which indexes are configured as offline indexes
1766        List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>();
1767        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
1768        while (i.hasNext()) {
1769            I_CmsSearchIndex index = i.next();
1770            if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
1771                // this is an offline index
1772                offlineIndexes.add(index);
1773            }
1774        }
1775        m_offlineIndexes = offlineIndexes;
1776        m_offlineHandler.initialize();
1777
1778    }
1779
1780    /**
1781     * Initializes the spell check index.<p>
1782     *
1783     * @param adminCms the ROOT_ADMIN cms context
1784     */
1785    public void initSpellcheckIndex(CmsObject adminCms) {
1786
1787        if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) {
1788            final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary();
1789            if (spellchecker != null) {
1790
1791                Runnable initRunner = new Runnable() {
1792
1793                    public void run() {
1794
1795                        try {
1796                            spellchecker.parseAndAddDictionaries(adminCms);
1797                        } catch (CmsRoleViolationException e) {
1798                            LOG.error(e.getLocalizedMessage(), e);
1799                        }
1800                    }
1801                };
1802                new Thread(initRunner).start();
1803            }
1804        }
1805    }
1806
1807    /**
1808     * Returns if the offline indexing is paused.<p>
1809     *
1810     * @return <code>true</code> if the offline indexing is paused
1811     */
1812    public boolean isOfflineIndexingPaused() {
1813
1814        return m_offlineUpdateFrequency == Long.MAX_VALUE;
1815    }
1816
1817    /**
1818     * Updates the indexes from as a scheduled job.<p>
1819     *
1820     * @param cms the OpenCms user context to use when reading resources from the VFS
1821     * @param parameters the parameters for the scheduled job
1822     *
1823     * @throws Exception if something goes wrong
1824     *
1825     * @return the String to write in the scheduler log
1826     *
1827     * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map)
1828     */
1829    public String launch(CmsObject cms, Map<String, String> parameters) throws Exception {
1830
1831        CmsSearchManager manager = OpenCms.getSearchManager();
1832
1833        I_CmsReport report = null;
1834        boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue();
1835
1836        if (writeLog) {
1837            report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
1838        }
1839
1840        List<String> updateList = null;
1841        String indexList = parameters.get(JOB_PARAM_INDEXLIST);
1842        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
1843            // index list has been provided as job parameter
1844            updateList = new ArrayList<String>();
1845            String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|');
1846            for (int i = 0; i < indexNames.length; i++) {
1847                // check if the index actually exists
1848                if (manager.getIndex(indexNames[i]) != null) {
1849                    updateList.add(indexNames[i]);
1850                } else {
1851                    if (LOG.isWarnEnabled()) {
1852                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i]));
1853                    }
1854                }
1855            }
1856        }
1857
1858        long startTime = System.currentTimeMillis();
1859
1860        if (updateList == null) {
1861            // all indexes need to be updated
1862            manager.rebuildAllIndexes(report);
1863        } else {
1864            // rebuild only the selected indexes
1865            manager.rebuildIndexes(updateList, report);
1866        }
1867
1868        long runTime = System.currentTimeMillis() - startTime;
1869
1870        String finishMessage = Messages.get().getBundle().key(
1871            Messages.LOG_REBUILD_INDEXES_FINISHED_1,
1872            CmsStringUtil.formatRuntime(runTime));
1873
1874        if (LOG.isInfoEnabled()) {
1875            LOG.info(finishMessage);
1876        }
1877        return finishMessage;
1878    }
1879
1880    /**
1881     * Pauses the offline indexing and returns a pause request id that has to be used for resuming offline indexing again.<p>
1882     * May take some time, because the indexes are updated first.<p>
1883     *
1884     *@return the pause request id. The id has to be given to the {@link #resumeOfflineIndexing(CmsUUID)} method to resume offline indexing.
1885     */
1886    public CmsUUID pauseOfflineIndexing() {
1887
1888        CmsUUID pauseId = new CmsUUID();
1889        synchronized (m_pauseRequests) {
1890            if (m_pauseRequests.isEmpty()) {
1891                LOG.info("Pausing offline indexing.");
1892                m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency;
1893                m_offlineUpdateFrequency = Long.MAX_VALUE;
1894                updateOfflineIndexes(0);
1895            }
1896            m_pauseRequests.add(pauseId);
1897            if (LOG.isDebugEnabled()) {
1898                LOG.debug("Added pause request with id " + pauseId);
1899            }
1900        }
1901        return pauseId;
1902    }
1903
1904    /**
1905     * Rebuilds (if required creates) all configured indexes.<p>
1906     *
1907     * @param report the report object to write messages (or <code>null</code>)
1908     *
1909     * @throws CmsException if something goes wrong
1910     */
1911    public void rebuildAllIndexes(I_CmsReport report) throws CmsException {
1912
1913        try {
1914            SEARCH_MANAGER_LOCK.lock();
1915
1916            CmsMessageContainer container = null;
1917            for (int i = 0, n = m_indexes.size(); i < n; i++) {
1918                // iterate all configured search indexes
1919                I_CmsSearchIndex searchIndex = m_indexes.get(i);
1920                try {
1921                    // update the index
1922                    updateIndex(searchIndex, report, null);
1923                } catch (CmsException e) {
1924                    container = new CmsMessageContainer(
1925                        Messages.get(),
1926                        Messages.ERR_INDEX_REBUILD_ALL_1,
1927                        new Object[] {searchIndex.getName()});
1928                    LOG.error(
1929                        Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()),
1930                        e);
1931                }
1932            }
1933            // clean up the extraction result cache
1934            cleanExtractionCache();
1935            if (container != null) {
1936                // throw stored exception
1937                throw new CmsSearchException(container);
1938            }
1939        } finally {
1940            SEARCH_MANAGER_LOCK.unlock();
1941        }
1942    }
1943
1944    /**
1945     * Rebuilds (if required creates) the index with the given name.<p>
1946     *
1947     * @param indexName the name of the index to rebuild
1948     * @param report the report object to write messages (or <code>null</code>)
1949     *
1950     * @throws CmsException if something goes wrong
1951     */
1952    public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException {
1953
1954        try {
1955            SEARCH_MANAGER_LOCK.lock();
1956            // get the search index by name
1957            I_CmsSearchIndex index = getIndex(indexName);
1958            // update the index
1959            updateIndex(index, report, null);
1960            // clean up the extraction result cache
1961            cleanExtractionCache();
1962        } finally {
1963            SEARCH_MANAGER_LOCK.unlock();
1964        }
1965    }
1966
1967    /**
1968     * Rebuilds (if required creates) the List of indexes with the given name.<p>
1969     *
1970     * @param indexNames the names (String) of the index to rebuild
1971     * @param report the report object to write messages (or <code>null</code>)
1972     *
1973     * @throws CmsException if something goes wrong
1974     */
1975    public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException {
1976
1977        try {
1978            SEARCH_MANAGER_LOCK.lock();
1979            Iterator<String> i = indexNames.iterator();
1980            while (i.hasNext()) {
1981                String indexName = i.next();
1982                // get the search index by name
1983                I_CmsSearchIndex index = getIndex(indexName);
1984                if (index != null) {
1985                    // update the index
1986                    updateIndex(index, report, null);
1987                } else {
1988                    if (LOG.isWarnEnabled()) {
1989                        LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
1990                    }
1991                }
1992            }
1993            // clean up the extraction result cache
1994            cleanExtractionCache();
1995        } finally {
1996            SEARCH_MANAGER_LOCK.unlock();
1997        }
1998    }
1999
2000    /**
2001     * Registers a new Solr core for the given index.<p>
2002     *
2003     * @param index the index to register a new Solr core for
2004     *
2005     * @throws CmsConfigurationException if no Solr server is configured
2006     */
2007    @SuppressWarnings("resource")
2008    public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException {
2009
2010        if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) {
2011            // No solr server configured
2012            throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0));
2013        }
2014
2015        if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present.
2016            index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build());
2017        } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present.
2018            // HTTP Server configured
2019            // TODO Implement multi core support for HTTP server
2020            // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml
2021            index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build());
2022        } else { // Default to the embedded Solr Server
2023
2024            // get the core container that contains one core for each configured index
2025            if (m_coreContainer == null) {
2026                m_coreContainer = createCoreContainer();
2027            }
2028
2029            // unload the existing core if it exists to avoid problems with forced unlock.
2030            if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) {
2031                m_coreContainer.unload(index.getCoreName(), false, false, true);
2032            }
2033            // ensure that all locks on the index are gone
2034            ensureIndexIsUnlocked(index.getPath());
2035
2036            // load the core to the container
2037            File dataDir = new File(index.getPath());
2038            if (!dataDir.exists()) {
2039                dataDir.mkdirs();
2040                if (CmsLog.INIT.isInfoEnabled()) {
2041                    CmsLog.INIT.info(
2042                        Messages.get().getBundle().key(
2043                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2044                            index.getName(),
2045                            index.getPath()));
2046                }
2047            }
2048            File instanceDir = new File(
2049                m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName());
2050            if (!instanceDir.exists()) {
2051                instanceDir.mkdirs();
2052                if (CmsLog.INIT.isInfoEnabled()) {
2053                    CmsLog.INIT.info(
2054                        Messages.get().getBundle().key(
2055                            Messages.INIT_SOLR_INDEX_DIR_CREATED_2,
2056                            index.getName(),
2057                            index.getPath()));
2058                }
2059            }
2060
2061            // create the core
2062            // TODO: suboptimal - forces always the same schema
2063            SolrCore core = null;
2064            try {
2065                // creation includes registration.
2066                // TODO: this was the old code: core = m_coreContainer.create(descriptor, false);
2067                Map<String, String> properties = new HashMap<String, String>(3);
2068                properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath());
2069                properties.put(CoreDescriptor.CORE_CONFIGSET, "default");
2070                core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false);
2071            } catch (NullPointerException e) {
2072                if (core != null) {
2073                    core.close();
2074                }
2075                throw new CmsConfigurationException(
2076                    Messages.get().container(
2077                        Messages.ERR_SOLR_SERVER_NOT_CREATED_3,
2078                        index.getName() + " (" + index.getCoreName() + ")",
2079                        index.getPath(),
2080                        m_solrConfig.getSolrConfigFile().getAbsolutePath()),
2081                    e);
2082            }
2083
2084            if (index.isNoSolrServerSet()) {
2085                index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName()));
2086            }
2087            if (CmsLog.INIT.isInfoEnabled()) {
2088                CmsLog.INIT.info(
2089                    Messages.get().getBundle().key(
2090                        Messages.INIT_SOLR_SERVER_CREATED_1,
2091                        index.getName() + " (" + index.getCoreName() + ")"));
2092            }
2093        }
2094    }
2095
2096    /**
2097     * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p>
2098     *
2099     * @param fieldConfiguration the field configuration to remove from the configuration
2100     *
2101     * @return true if remove was successful, false if preconditions for removal are ok but the given
2102     *         field configuration was unknown to the manager.
2103     *
2104     * @throws CmsIllegalStateException if the given field configuration is still used by at least one
2105     *         <code>{@link I_CmsSearchIndex}</code>.
2106     *
2107     */
2108    public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration)
2109    throws CmsIllegalStateException {
2110
2111        // never remove the standard field configuration
2112        if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) {
2113            throw new CmsIllegalStateException(
2114                Messages.get().container(
2115                    Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
2116                    fieldConfiguration.getName()));
2117        }
2118        // validation if removal will be granted
2119        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2120        I_CmsSearchIndex idx;
2121        // the list for collecting indexes that use the given field configuration
2122        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2123        I_CmsSearchFieldConfiguration refFieldConfig;
2124        while (itIndexes.hasNext()) {
2125            idx = itIndexes.next();
2126            refFieldConfig = idx.getFieldConfiguration();
2127            if (refFieldConfig.equals(fieldConfiguration)) {
2128                referrers.add(idx);
2129            }
2130        }
2131        if (referrers.size() > 0) {
2132            throw new CmsIllegalStateException(
2133                Messages.get().container(
2134                    Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
2135                    fieldConfiguration.getName(),
2136                    referrers.toString()));
2137        }
2138
2139        // remove operation (no exception)
2140        return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null;
2141
2142    }
2143
2144    /**
2145     * Removes a search field from the field configuration.<p>
2146     *
2147     * @param fieldConfiguration the field configuration
2148     * @param field field to remove from the field configuration
2149     *
2150     * @return true if remove was successful, false if preconditions for removal are ok but the given
2151     *         field was unknown.
2152     */
2153    public boolean removeSearchFieldConfigurationField(
2154        I_CmsSearchFieldConfiguration fieldConfiguration,
2155        CmsSearchField field) {
2156
2157        if (LOG.isInfoEnabled()) {
2158            LOG.info(
2159                Messages.get().getBundle().key(
2160                    Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
2161                    field.getName(),
2162                    fieldConfiguration.getName()));
2163        }
2164
2165        return fieldConfiguration.getFields().remove(field);
2166    }
2167
2168    /**
2169     * Removes a search field mapping from the given field.<p>
2170     *
2171     * @param field the field
2172     * @param mapping mapping to remove from the field
2173     *
2174     * @return true if remove was successful, false if preconditions for removal are ok but the given
2175     *         mapping was unknown.
2176     *
2177     * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
2178     */
2179    public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping)
2180    throws CmsIllegalStateException {
2181
2182        if (field.getMappings().size() < 2) {
2183            throw new CmsIllegalStateException(
2184                Messages.get().container(
2185                    Messages.ERR_FIELD_MAPPING_DELETE_2,
2186                    mapping.getType().toString(),
2187                    field.getName()));
2188        } else {
2189
2190            if (LOG.isInfoEnabled()) {
2191                LOG.info(
2192                    Messages.get().getBundle().key(
2193                        Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
2194                        mapping.toString(),
2195                        field.getName()));
2196            }
2197            return field.getMappings().remove(mapping);
2198        }
2199    }
2200
2201    /**
2202     * Removes a search index from the configuration.<p>
2203     *
2204     * @param searchIndex the search index to remove
2205     */
2206    public void removeSearchIndex(I_CmsSearchIndex searchIndex) {
2207
2208        // shut down index to remove potential config files of Solr indexes
2209        searchIndex.shutDown();
2210        if (searchIndex instanceof CmsSolrIndex) {
2211            CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex;
2212            m_coreContainer.unload(solrIndex.getCoreName(), true, true, true);
2213        }
2214        m_indexes.remove(searchIndex);
2215        initOfflineIndexes();
2216
2217        if (LOG.isInfoEnabled()) {
2218            LOG.info(
2219                Messages.get().getBundle().key(
2220                    Messages.LOG_REMOVE_SEARCH_INDEX_2,
2221                    searchIndex.getName(),
2222                    searchIndex.getProject()));
2223        }
2224    }
2225
2226    /**
2227     * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
2228     *
2229     * @param indexNames the names of the index to remove
2230     */
2231    public void removeSearchIndexes(List<String> indexNames) {
2232
2233        Iterator<String> i = indexNames.iterator();
2234        while (i.hasNext()) {
2235            String indexName = i.next();
2236            // get the search index by name
2237            I_CmsSearchIndex index = getIndex(indexName);
2238            if (index != null) {
2239                // remove the index
2240                removeSearchIndex(index);
2241            } else {
2242                if (LOG.isWarnEnabled()) {
2243                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName));
2244                }
2245            }
2246        }
2247    }
2248
2249    /**
2250     * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
2251     *
2252     * @param indexsource the indexsource to remove from the configuration
2253     *
2254     * @return true if remove was successful, false if preconditions for removal are ok but the given
2255     *         searchindex was unknown to the manager.
2256     *
2257     * @throws CmsIllegalStateException if the given indexsource is still used by at least one
2258     *         <code>{@link I_CmsSearchIndex}</code>.
2259     *
2260     */
2261    public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException {
2262
2263        // validation if removal will be granted
2264        Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator();
2265        I_CmsSearchIndex idx;
2266        // the list for collecting indexes that use the given index source
2267        List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>();
2268        // the current list of referred index sources of the iterated index
2269        List<CmsSearchIndexSource> refsources;
2270        while (itIndexes.hasNext()) {
2271            idx = itIndexes.next();
2272            refsources = idx.getSources();
2273            if (refsources != null) {
2274                if (refsources.contains(indexsource)) {
2275                    referrers.add(idx);
2276                }
2277            }
2278        }
2279        if (referrers.size() > 0) {
2280            throw new CmsIllegalStateException(
2281                Messages.get().container(
2282                    Messages.ERR_INDEX_SOURCE_DELETE_2,
2283                    indexsource.getName(),
2284                    referrers.toString()));
2285        }
2286
2287        // remove operation (no exception)
2288        return m_indexSources.remove(indexsource.getName()) != null;
2289
2290    }
2291
2292    /**
2293     * Resumes offline indexing if it was paused and no pause for another pauseId is still present.<p>
2294     * @param pauseId the id of the pause request, which now allows for resuming.
2295     */
2296    public void resumeOfflineIndexing(CmsUUID pauseId) {
2297
2298        synchronized (m_pauseRequests) {
2299            if (!m_pauseRequests.contains(pauseId)) {
2300                try {
2301                    throw new IllegalArgumentException();
2302                } catch (IllegalArgumentException e) {
2303                    LOG.warn("Cannot resume for pause request " + pauseId + ". The request id is unknown.", e);
2304                }
2305            } else {
2306                m_pauseRequests.remove(pauseId);
2307                if (LOG.isDebugEnabled()) {
2308                    LOG.debug(
2309                        "Removed pause request "
2310                            + pauseId
2311                            + " from pause requests. Remaining pauses are: "
2312                            + m_pauseRequests);
2313                }
2314                if (m_pauseRequests.isEmpty()) {
2315                    LOG.info("Resuming offline indexing.");
2316                    setOfflineUpdateFrequency(
2317                        m_configuredOfflineIndexingFrequency > 0
2318                        ? m_configuredOfflineIndexingFrequency
2319                        : DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2320                }
2321            }
2322        }
2323    }
2324
2325    /**
2326     * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
2327     *
2328     * @param value the name of the directory below WEB-INF/ where the search indexes are stored
2329     */
2330    public void setDirectory(String value) {
2331
2332        m_path = value;
2333    }
2334
2335    /**
2336     * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
2337     *
2338     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2339     */
2340    public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
2341
2342        m_extractionCacheMaxAge = extractionCacheMaxAge;
2343    }
2344
2345    /**
2346     * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
2347     *
2348     * @param extractionCacheMaxAge the maximum age for a text extraction result to set
2349     */
2350    public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
2351
2352        try {
2353            setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge));
2354        } catch (NumberFormatException e) {
2355            LOG.error(
2356                Messages.get().getBundle().key(
2357                    Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
2358                    extractionCacheMaxAge,
2359                    new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)),
2360                e);
2361            setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
2362        }
2363    }
2364
2365    /**
2366     * Sets the unlock mode during indexing.<p>
2367     *
2368     * @param value the value
2369     */
2370    public void setForceunlock(String value) {
2371
2372        m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
2373    }
2374
2375    /**
2376     * Sets the highlighter.<p>
2377     *
2378     * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
2379     *
2380     * @param highlighter the package/class name of the highlighter
2381     */
2382    public void setHighlighter(String highlighter) {
2383
2384        try {
2385            m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance();
2386        } catch (Exception e) {
2387            m_highlighter = null;
2388            LOG.error(e.getLocalizedMessage(), e);
2389        }
2390    }
2391
2392    /**
2393     * Sets the seconds to wait for an index lock during an update operation.<p>
2394     *
2395     * @param value the seconds to wait for an index lock during an update operation
2396     */
2397    public void setIndexLockMaxWaitSeconds(int value) {
2398
2399        m_indexLockMaxWaitSeconds = value;
2400    }
2401
2402    /**
2403     * Sets the max. excerpt length.<p>
2404     *
2405     * @param maxExcerptLength the max. excerpt length to set
2406     */
2407    public void setMaxExcerptLength(int maxExcerptLength) {
2408
2409        m_maxExcerptLength = maxExcerptLength;
2410    }
2411
2412    /**
2413     * Sets the max. excerpt length as a String.<p>
2414     *
2415     * @param maxExcerptLength the max. excerpt length to set
2416     */
2417    public void setMaxExcerptLength(String maxExcerptLength) {
2418
2419        try {
2420            setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
2421        } catch (Exception e) {
2422            LOG.error(
2423                Messages.get().getBundle().key(
2424                    Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
2425                    maxExcerptLength,
2426                    new Integer(DEFAULT_EXCERPT_LENGTH)),
2427                e);
2428            setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
2429        }
2430    }
2431
2432    /**
2433     * Sets the maximal wait time for offline index updates after edit operations.<p>
2434     *
2435     * @param maxIndexWaitTime  the maximal wait time to set in milliseconds
2436     */
2437    public void setMaxIndexWaitTime(long maxIndexWaitTime) {
2438
2439        m_maxIndexWaitTime = maxIndexWaitTime;
2440    }
2441
2442    /**
2443     * Sets the maximal wait time for offline index updates after edit operations.<p>
2444     *
2445     * @param maxIndexWaitTime the maximal wait time to set in milliseconds
2446     */
2447    public void setMaxIndexWaitTime(String maxIndexWaitTime) {
2448
2449        try {
2450            setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime));
2451        } catch (Exception e) {
2452            LOG.error(
2453                Messages.get().getBundle().key(
2454                    Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2,
2455                    maxIndexWaitTime,
2456                    new Long(DEFAULT_MAX_INDEX_WAITTIME)),
2457                e);
2458            setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME);
2459        }
2460    }
2461
2462    /**
2463     * Sets the maximum number of modifications before a commit in the search index is triggered.<p>
2464     *
2465     * @param maxModificationsBeforeCommit the maximum number of modifications to set
2466     */
2467    public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) {
2468
2469        m_maxModificationsBeforeCommit = maxModificationsBeforeCommit;
2470    }
2471
2472    /**
2473     * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p>
2474     *
2475     * @param value the maximum number of modifications to set
2476     */
2477    public void setMaxModificationsBeforeCommit(String value) {
2478
2479        try {
2480            setMaxModificationsBeforeCommit(Integer.parseInt(value));
2481        } catch (Exception e) {
2482            LOG.error(
2483                Messages.get().getBundle().key(
2484                    Messages.LOG_PARSE_MAXCOMMIT_FAILED_2,
2485                    value,
2486                    new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)),
2487                e);
2488            setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT);
2489        }
2490    }
2491
2492    /**
2493     * Sets the update frequency of the offline indexer in milliseconds.<p>
2494     *
2495     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2496     */
2497    public void setOfflineUpdateFrequency(long offlineUpdateFrequency) {
2498
2499        m_offlineUpdateFrequency = offlineUpdateFrequency;
2500        updateOfflineIndexes(0);
2501    }
2502
2503    /**
2504     * Sets the update frequency of the offline indexer in milliseconds.<p>
2505     *
2506     * @param offlineUpdateFrequency the update frequency in milliseconds to set
2507     */
2508    public void setOfflineUpdateFrequency(String offlineUpdateFrequency) {
2509
2510        try {
2511            setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency));
2512        } catch (Exception e) {
2513            LOG.error(
2514                Messages.get().getBundle().key(
2515                    Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2,
2516                    offlineUpdateFrequency,
2517                    new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)),
2518                e);
2519            setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY);
2520        }
2521    }
2522
2523    /**
2524     * Sets the Solr configuration.<p>
2525     *
2526     * @param config the Solr configuration
2527     */
2528    public void setSolrServerConfiguration(CmsSolrConfiguration config) {
2529
2530        m_solrConfig = config;
2531    }
2532
2533    /**
2534     * Sets the timeout to abandon threads indexing a resource.<p>
2535     *
2536     * @param value the timeout in milliseconds
2537     */
2538    public void setTimeout(long value) {
2539
2540        m_timeout = value;
2541    }
2542
2543    /**
2544     * Sets the timeout to abandon threads indexing a resource as a String.<p>
2545     *
2546     * @param value the timeout in milliseconds
2547     */
2548    public void setTimeout(String value) {
2549
2550        try {
2551            setTimeout(Long.parseLong(value));
2552        } catch (Exception e) {
2553            LOG.error(
2554                Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)),
2555                e);
2556            setTimeout(DEFAULT_TIMEOUT);
2557        }
2558    }
2559
2560    /**
2561     * Shuts down the search manager.<p>
2562     *
2563     * This will cause all search indices to be shut down.<p>
2564     */
2565    public void shutDown() {
2566
2567        if (m_offlineIndexThread != null) {
2568            m_offlineIndexThread.shutDown();
2569        }
2570
2571        if (m_offlineHandler != null) {
2572            OpenCms.removeCmsEventListener(m_offlineHandler);
2573        }
2574
2575        Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
2576        while (i.hasNext()) {
2577            I_CmsSearchIndex index = i.next();
2578            index.shutDown();
2579            index = null;
2580        }
2581        m_indexes.clear();
2582
2583        shutDownSolrContainer();
2584
2585        if (CmsLog.INIT.isInfoEnabled()) {
2586            CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0));
2587        }
2588    }
2589
2590    /**
2591     * Updates all offline indexes.<p>
2592     *
2593     * Can be used to force an index update when it's not convenient to wait until the
2594     * offline update interval has eclipsed.<p>
2595     *
2596     * Since the offline indexes still need some time to update the new resources,
2597     * the method waits for at most the configurable <code>maxIndexWaitTime</code>
2598     * to ensure that updating is finished.
2599     *
2600     * @see #updateOfflineIndexes(long)
2601     *
2602     */
2603    public void updateOfflineIndexes() {
2604
2605        updateOfflineIndexes(getMaxIndexWaitTime());
2606    }
2607
2608    /**
2609     * Updates all offline indexes.<p>
2610     *
2611     * Can be used to force an index update when it's not convenient to wait until the
2612     * offline update interval has eclipsed.<p>
2613     *
2614     * Since the offline index will still need some time to update the new resources even if it runs directly,
2615     * a wait time of 2500 or so should be given in order to make sure the index finished updating.
2616     *
2617     * @param waitTime milliseconds to wait after the offline update index was notified of the changes
2618     */
2619    public void updateOfflineIndexes(long waitTime) {
2620
2621        if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) {
2622            // notify existing thread of update frequency change
2623            if (LOG.isDebugEnabled()) {
2624                LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0));
2625            }
2626            m_offlineIndexThread.interrupt();
2627            if (waitTime > 0) {
2628                m_offlineIndexThread.getWaitHandle().enter(waitTime);
2629            }
2630        }
2631    }
2632
2633    /**
2634     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2635     * We take transitive dependencies into account and handle cyclic dependencies correctly as well.
2636     *
2637     * @param adminCms an OpenCms user context with Admin permissions
2638     * @param updateResources the resources to be re-indexed
2639     *
2640     * @return the updated list of resource to re-index
2641     */
2642    protected List<CmsPublishedResource> addAdditionallyAffectedResources(
2643        CmsObject adminCms,
2644        List<CmsPublishedResource> updateResources) {
2645
2646        if (updateResources.size() > 0) {
2647            Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources);
2648            Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet;
2649            Collection<CmsPublishedResource> additionalResources = Collections.emptySet();
2650            do {
2651                additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck);
2652                additionalResources.addAll(
2653                    addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck));
2654                updateResources.addAll(additionalResources);
2655                updateResourceSet.addAll(additionalResources);
2656                resourcesToCheck = additionalResources;
2657            } while (resourcesToCheck.size() > 0);
2658        }
2659        return updateResources;
2660    }
2661
2662    /**
2663     * Collects the resources whose indexed document depends on one of the updated resources.<p>
2664     *
2665     * @param adminCms an OpenCms user context with Admin permissions
2666     * @param updateResources the resources to be re-indexed
2667     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2668     *
2669     * @return the list of resources that need to be additionally re-index
2670     */
2671    protected Collection<CmsPublishedResource> addIndexContentRelatedResources(
2672        CmsObject adminCms,
2673        Collection<CmsPublishedResource> updateResources,
2674        Collection<CmsPublishedResource> updateResourcesToCheck) {
2675
2676        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2677        for (CmsPublishedResource checkedRes : updateResourcesToCheck) {
2678            try {
2679                CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId());
2680                filter = filter.filterType(CmsRelationType.INDEX_CONTENT);
2681                List<CmsRelation> relations = adminCms.readRelations(filter);
2682                for (CmsRelation relation : relations) {
2683                    CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2684                    CmsPublishedResource additionalPubRes = new CmsPublishedResource(res);
2685                    if (!updateResources.contains(additionalPubRes)) {
2686                        additionalUpdateResources.add(additionalPubRes);
2687                    }
2688                }
2689            } catch (CmsException e) {
2690                LOG.error(e.getLocalizedMessage(), e);
2691            }
2692        }
2693        return additionalUpdateResources;
2694    }
2695
2696    /**
2697     * Cleans up the extraction result cache.<p>
2698     */
2699    protected void cleanExtractionCache() {
2700
2701        // clean up the extraction result cache
2702        m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
2703    }
2704
2705    /**
2706     * Collects the related containerpages to the resources that have been published.<p>
2707     *
2708     * @param adminCms an OpenCms user context with Admin permissions
2709     * @param updateResources the resources to be re-indexed
2710     * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources
2711     *
2712     * @return the list of resources that need to be additionally re-index
2713     */
2714    protected Collection<CmsPublishedResource> findRelatedContainerPages(
2715        CmsObject adminCms,
2716        Collection<CmsPublishedResource> updateResources,
2717        Collection<CmsPublishedResource> updateResourcesToCheck) {
2718
2719        CmsResourceManager resMan = OpenCms.getResourceManager();
2720        Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>();
2721
2722        Set<CmsResource> containerPages = new HashSet<CmsResource>();
2723        int containerPageTypeId = -1;
2724        try {
2725            containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId();
2726        } catch (CmsLoaderException e) {
2727            // will happen during setup, when container page type is not available yet
2728            LOG.info(e.getLocalizedMessage(), e);
2729        }
2730        if (containerPageTypeId != -1) {
2731            for (CmsPublishedResource pubRes : updateResourcesToCheck) {
2732                try {
2733                    if (resMan.getResourceType(pubRes.getType()) instanceof CmsResourceTypeXmlContent) {
2734                        if (!isGroup(pubRes.getType())) {
2735                            CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(
2736                                pubRes.getStructureId()).filterStrong();
2737                            List<CmsRelation> relations = adminCms.readRelations(filter);
2738                            for (CmsRelation relation : relations) {
2739                                CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL);
2740                                if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) {
2741                                    containerPages.add(res);
2742                                    if (CmsDetailOnlyContainerUtil.isDetailContainersPage(
2743                                        adminCms,
2744                                        adminCms.getSitePath(res))) {
2745                                        addDetailContent(adminCms, containerPages, adminCms.getSitePath(res));
2746                                    }
2747                                }
2748                            }
2749                        }
2750                    }
2751                    if (containerPageTypeId == pubRes.getType()) {
2752                        addDetailContent(
2753                            adminCms,
2754                            containerPages,
2755                            adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath()));
2756                    }
2757                } catch (CmsException e) {
2758                    LOG.error(e.getLocalizedMessage(), e);
2759                }
2760            }
2761            // add all found container pages as published resource objects to the list
2762            for (CmsResource page : containerPages) {
2763                CmsPublishedResource pubCont = new CmsPublishedResource(page);
2764                if (!updateResources.contains(pubCont)) {
2765                    // ensure container page is added only once
2766                    additionalUpdateResources.add(pubCont);
2767                }
2768            }
2769        }
2770        return additionalUpdateResources;
2771    }
2772
2773    /**
2774     * Returns the set of names of all configured document types.<p>
2775     *
2776     * @return the set of names of all configured document types
2777     */
2778    protected List<String> getDocumentTypes() {
2779
2780        return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet()));
2781    }
2782
2783    /**
2784     * Returns the a offline project used for offline indexing.<p>
2785     *
2786     * @return the offline project if available
2787     */
2788    protected CmsProject getOfflineIndexProject() {
2789
2790        CmsProject result = null;
2791        for (I_CmsSearchIndex index : m_offlineIndexes) {
2792            try {
2793                result = m_adminCms.readProject(index.getProject());
2794
2795                if (!result.isOnlineProject()) {
2796                    break;
2797                }
2798            } catch (Exception e) {
2799                // may be a missconfigured index, ignore
2800                LOG.error(e.getLocalizedMessage(), e);
2801            }
2802        }
2803        return result;
2804    }
2805
2806    /**
2807     * Returns a new thread manager for the indexing threads.<p>
2808     *
2809     * @return a new thread manager for the indexing threads
2810     */
2811    protected CmsIndexingThreadManager getThreadManager() {
2812
2813        return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit);
2814    }
2815
2816    /**
2817     * Initializes the available Cms resource types to be indexed.<p>
2818     *
2819     * A map stores document factories keyed by a string representing
2820     * a colon separated list of Cms resource types and/or mimetypes.<p>
2821     *
2822     * The keys of this map are used to trigger a document factory to convert
2823     * a Cms resource into a Lucene index document.<p>
2824     *
2825     * A document factory is a class implementing the interface
2826     * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
2827     */
2828    protected void initAvailableDocumentTypes() {
2829
2830        CmsSearchDocumentType documenttype = null;
2831        String className = null;
2832        String name = null;
2833        I_CmsDocumentFactory documentFactory = null;
2834        List<String> resourceTypes = null;
2835        List<String> mimeTypes = null;
2836        Class<?> c = null;
2837
2838        m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>();
2839
2840        for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
2841
2842            documenttype = m_documentTypeConfigs.get(i);
2843            name = documenttype.getName();
2844
2845            try {
2846                className = documenttype.getClassName();
2847                resourceTypes = documenttype.getResourceTypes();
2848                mimeTypes = documenttype.getMimeTypes();
2849
2850                if (name == null) {
2851                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0));
2852                }
2853                if (className == null) {
2854                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
2855                }
2856                if (resourceTypes.size() == 0) {
2857                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
2858                }
2859
2860                try {
2861                    c = Class.forName(className);
2862                    documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance(
2863                        new Object[] {name});
2864                } catch (ClassNotFoundException exc) {
2865                    throw new CmsIndexException(
2866                        Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className),
2867                        exc);
2868                } catch (Exception exc) {
2869                    throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc);
2870                }
2871
2872                if (documentFactory.isUsingCache()) {
2873                    // init cache if used by the factory
2874                    documentFactory.setCache(m_extractionResultCache);
2875                }
2876
2877                Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>();
2878                for (Iterator<String> keyIt = documentFactory.getDocumentKeys(
2879                    resourceTypes,
2880                    mimeTypes).iterator(); keyIt.hasNext();) {
2881                    String key = keyIt.next();
2882                    matchingTypes.put(key, documentFactory);
2883                    m_extractionKeys.add(key);
2884                }
2885                m_documentTypes.put(name, matchingTypes);
2886
2887            } catch (CmsException e) {
2888                if (LOG.isWarnEnabled()) {
2889                    LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e);
2890                }
2891            }
2892        }
2893    }
2894
2895    /**
2896     * Initializes the index sources.
2897     */
2898    protected void initIndexSources() {
2899
2900        for (CmsSearchIndexSource source : m_indexSources.values()) {
2901            source.init();
2902        }
2903    }
2904
2905    /**
2906     * Initializes the configured search indexes.<p>
2907     *
2908     * This initializes also the list of Cms resources types
2909     * to be indexed by an index source.<p>
2910     */
2911    protected void initSearchIndexes() {
2912
2913        I_CmsSearchIndex index = null;
2914        for (int i = 0, n = m_indexes.size(); i < n; i++) {
2915            index = m_indexes.get(i);
2916            // reset disabled flag
2917            index.setEnabled(true);
2918            // check if the index has been configured correctly
2919            if (index.checkConfiguration(m_adminCms)) {
2920                // the index is configured correctly
2921                try {
2922                    index.initialize();
2923                } catch (Exception e) {
2924                    if (CmsLog.INIT.isWarnEnabled()) {
2925                        // in this case the index will be disabled
2926                        CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e);
2927                    }
2928                }
2929            }
2930            // output a log message if the index was successfully configured or not
2931            if (CmsLog.INIT.isInfoEnabled()) {
2932                if (index.isEnabled()) {
2933                    CmsLog.INIT.info(
2934                        Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject()));
2935                } else {
2936                    CmsLog.INIT.warn(
2937                        Messages.get().getBundle().key(
2938                            Messages.INIT_INDEX_NOT_CONFIGURED_2,
2939                            index,
2940                            index.getProject()));
2941                }
2942            }
2943        }
2944    }
2945
2946    /**
2947     * Checks, if the index should be rebuilt/updated at all by the search manager.
2948     * @param index the index to check.
2949     * @return a flag, indicating if the index should be rebuilt/updated at all.
2950     */
2951    protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) {
2952
2953        if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) {
2954            LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName()));
2955            return false;
2956        } else {
2957            return true;
2958        }
2959
2960    }
2961
2962    /**
2963     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
2964     * after resources have been published.<p>
2965     *
2966     * @param adminCms an OpenCms user context with Admin permissions
2967     * @param publishHistoryId the history ID of the published project
2968     * @param report the report to write the output to
2969     */
2970    protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) {
2971
2972        int oldPriority = Thread.currentThread().getPriority();
2973        try {
2974            SEARCH_MANAGER_LOCK.lock();
2975            Thread.currentThread().setPriority(Thread.MIN_PRIORITY);
2976            List<CmsPublishedResource> publishedResources;
2977            try {
2978                // read the list of all published resources
2979                publishedResources = adminCms.readPublishedResources(publishHistoryId);
2980            } catch (CmsException e) {
2981                LOG.error(
2982                    Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId),
2983                    e);
2984                return;
2985            }
2986            Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources);
2987            // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved
2988
2989            List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>();
2990            for (CmsPublishedResource res : publishedResources) {
2991                if (res.isFolder() || res.getState().isUnchanged()) {
2992                    // folders and unchanged resources don't need to be indexed after publish
2993                    continue;
2994                }
2995                if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) {
2996                    if (updateResources.contains(res)) {
2997                        // resource may have been added as a sibling of another resource
2998                        // in this case we make sure to use the value from the publish list because of the "deleted" flag
2999                        boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId())
3000                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION)
3001                            || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE);
3002                        // check it this is a moved resource with source / target info, in this case we need both entries
3003                        if (!hasMoved) {
3004                            // if the resource was moved, we must contain both entries
3005                            updateResources.remove(res);
3006                        }
3007                        // "equals()" implementation of published resource checks for id,
3008                        // so the removed value may have a different "deleted" or "modified" status value
3009                        updateResources.add(res);
3010                    } else {
3011                        // resource not yet contained in the list
3012                        updateResources.add(res);
3013                        // check for the siblings (not for deleted resources, these are already gone)
3014                        if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) {
3015                            // this resource has siblings
3016                            try {
3017                                // read siblings from the online project
3018                                List<CmsResource> siblings = adminCms.readSiblings(
3019                                    res.getRootPath(),
3020                                    CmsResourceFilter.ALL);
3021                                Iterator<CmsResource> itSib = siblings.iterator();
3022                                while (itSib.hasNext()) {
3023                                    // check all siblings
3024                                    CmsResource sibling = itSib.next();
3025                                    CmsPublishedResource sib = new CmsPublishedResource(sibling);
3026                                    if (!updateResources.contains(sib)) {
3027                                        // ensure sibling is added only once
3028                                        updateResources.add(sib);
3029                                    }
3030                                }
3031                            } catch (CmsException e) {
3032                                // ignore, just use the original resource
3033                                if (LOG.isWarnEnabled()) {
3034                                    LOG.warn(
3035                                        Messages.get().getBundle().key(
3036                                            Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
3037                                            res.getRootPath()),
3038                                        e);
3039                                }
3040                            }
3041                        }
3042                    }
3043                }
3044            }
3045
3046            addAdditionallyAffectedResources(adminCms, updateResources);
3047            updateAllIndexes(adminCms, updateResources, report);
3048        } finally {
3049            SEARCH_MANAGER_LOCK.unlock();
3050            Thread.currentThread().setPriority(oldPriority);
3051        }
3052    }
3053
3054    /**
3055     * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p>
3056     *
3057     * @param adminCms an OpenCms user context with Admin permissions
3058     * @param updateResources the resources to update
3059     * @param report the report to write the output to
3060     */
3061    protected void updateAllIndexes(
3062        CmsObject adminCms,
3063        List<CmsPublishedResource> updateResources,
3064        I_CmsReport report) {
3065
3066        try {
3067            SEARCH_MANAGER_LOCK.lock();
3068            if (!updateResources.isEmpty()) {
3069                // sort the resource to update
3070                Collections.sort(updateResources);
3071                // only update the indexes if the list of remaining published resources is not empty
3072                Iterator<I_CmsSearchIndex> i = m_indexes.iterator();
3073                while (i.hasNext()) {
3074                    I_CmsSearchIndex index = i.next();
3075                    if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) {
3076                        // only update indexes which have the rebuild mode set to "auto"
3077                        try {
3078                            updateIndex(index, report, updateResources);
3079                        } catch (CmsException e) {
3080                            LOG.error(
3081                                Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()),
3082                                e);
3083                        }
3084                    }
3085                }
3086            }
3087            // clean up the extraction result cache
3088            cleanExtractionCache();
3089        } finally {
3090            SEARCH_MANAGER_LOCK.unlock();
3091        }
3092
3093    }
3094
3095    /**
3096     * Updates (if required creates) the index with the given name.<p>
3097     *
3098     * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
3099     * incrementally updated for these resources only. If this List is <code>null</code> or empty,
3100     * the index will be fully rebuild.<p>
3101     *
3102     * @param index the index to update or rebuild
3103     * @param report the report to write output messages to
3104     * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3105     *
3106     * @throws CmsException if something goes wrong
3107     */
3108    protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex)
3109    throws CmsException {
3110
3111        if (shouldUpdateAtAll(index)) {
3112            try {
3113                SEARCH_MANAGER_LOCK.lock();
3114
3115                // copy the stored admin context for the indexing
3116                CmsObject cms = OpenCms.initCmsObject(m_adminCms);
3117                // make sure a report is available
3118                if (report == null) {
3119                    report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class);
3120                }
3121
3122                // check if the index has been configured correctly
3123                if (!index.checkConfiguration(cms)) {
3124                    // the index is disabled
3125                    return;
3126                }
3127
3128                // set site root and project for this index
3129                cms.getRequestContext().setSiteRoot("/");
3130                // switch to the index project
3131                cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3132
3133                if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
3134                    // rebuild the complete index
3135
3136                    updateIndexCompletely(cms, index, report);
3137                } else {
3138                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3139                }
3140            } finally {
3141                SEARCH_MANAGER_LOCK.unlock();
3142            }
3143        }
3144    }
3145
3146    /**
3147     * The method updates all OpenCms documents that are indexed.
3148     * @param cms the OpenCms user context to use for accessing the VFS
3149     * @param index the index to update
3150     * @param report the report to write output messages to
3151     * @throws CmsIndexException thrown if indexing fails for some reason
3152     */
3153    @SuppressWarnings("null")
3154    protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report)
3155    throws CmsIndexException {
3156
3157        // create a new thread manager for the indexing threads
3158        CmsIndexingThreadManager threadManager = getThreadManager();
3159
3160        boolean isOfflineIndex = false;
3161        if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) {
3162            // disable offline indexing while the complete index is rebuild
3163            isOfflineIndex = true;
3164            index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL);
3165            // re-initialize the offline indexes, this will disable this offline index
3166            initOfflineIndexes();
3167        }
3168
3169        I_CmsIndexWriter writer = null;
3170        try {
3171            // create a backup of the existing index
3172            CmsSearchIndex indexInternal = null;
3173            String backup = null;
3174            if (index instanceof CmsSearchIndex) {
3175                indexInternal = (CmsSearchIndex)index;
3176                backup = indexInternal.createIndexBackup();
3177                if (backup != null) {
3178                    indexInternal.indexSearcherOpen(backup);
3179                }
3180            }
3181
3182            // create a new index writer
3183            writer = index.getIndexWriter(report, true);
3184            if (writer instanceof I_CmsSolrIndexWriter) {
3185                try {
3186                    ((I_CmsSolrIndexWriter)writer).deleteAllDocuments();
3187                } catch (IOException e) {
3188                    LOG.error(e.getMessage(), e);
3189                }
3190            }
3191
3192            // output start information on the report
3193            report.println(
3194                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()),
3195                I_CmsReport.FORMAT_HEADLINE);
3196
3197            // iterate all configured index sources of this index
3198            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3199            while (sources.hasNext()) {
3200                // get the next index source
3201                CmsSearchIndexSource source = sources.next();
3202                // create the indexer
3203                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3204                // new index creation, use all resources from the index source
3205                indexer.rebuildIndex(writer, threadManager, source);
3206
3207                // wait for indexing threads to finish
3208                while (threadManager.isRunning()) {
3209                    try {
3210                        Thread.sleep(500);
3211                    } catch (InterruptedException e) {
3212                        // just continue with the loop after interruption
3213                        LOG.info(e.getLocalizedMessage(), e);
3214                    }
3215                }
3216
3217                // commit and optimize the index after each index source has been finished
3218                try {
3219                    writer.commit();
3220                } catch (IOException e) {
3221                    if (LOG.isWarnEnabled()) {
3222                        LOG.warn(
3223                            Messages.get().getBundle().key(
3224                                Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3225                                index.getName(),
3226                                index.getPath()),
3227                            e);
3228                    }
3229                }
3230                try {
3231                    writer.optimize();
3232                } catch (IOException e) {
3233                    if (LOG.isWarnEnabled()) {
3234                        LOG.warn(
3235                            Messages.get().getBundle().key(
3236                                Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2,
3237                                index.getName(),
3238                                index.getPath()),
3239                            e);
3240                    }
3241                }
3242            }
3243
3244            // we are sure here that indexInternal is not null
3245            if (backup != null) {
3246                // remove the backup after the files have been re-indexed
3247                indexInternal.indexSearcherClose();
3248                indexInternal.removeIndexBackup(backup);
3249            }
3250
3251            // output finish information on the report
3252            report.println(
3253                Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()),
3254                I_CmsReport.FORMAT_HEADLINE);
3255
3256        } finally {
3257            if (writer != null) {
3258                try {
3259                    writer.close();
3260                } catch (IOException e) {
3261                    if (LOG.isWarnEnabled()) {
3262                        LOG.warn(
3263                            Messages.get().getBundle().key(
3264                                Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
3265                                index.getPath(),
3266                                index.getName()),
3267                            e);
3268                    }
3269                }
3270            }
3271            if (isOfflineIndex) {
3272                // reset the mode of the offline index
3273                index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE);
3274                // re-initialize the offline indexes, this will re-enable this index
3275                initOfflineIndexes();
3276            }
3277            // index has changed - initialize the index searcher instance
3278            index.onIndexChanged(true);
3279        }
3280
3281        // show information about indexing runtime
3282        threadManager.reportStatistics(report);
3283    }
3284
3285    /**
3286     * Incrementally updates the given index.<p>
3287     *
3288     * @param cms the OpenCms user context to use for accessing the VFS
3289     * @param index the index to update
3290     * @param report the report to write output messages to
3291     * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index
3292     *
3293     * @throws CmsException if something goes wrong
3294     */
3295    protected void updateIndexIncremental(
3296        CmsObject cms,
3297        I_CmsSearchIndex index,
3298        I_CmsReport report,
3299        List<CmsPublishedResource> resourcesToIndex)
3300    throws CmsException {
3301
3302        try {
3303            SEARCH_MANAGER_LOCK.lock();
3304
3305            // update the existing index
3306            List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>();
3307
3308            boolean hasResourcesToDelete = false;
3309            boolean hasResourcesToUpdate = false;
3310
3311            // iterate all configured index sources of this index
3312            Iterator<CmsSearchIndexSource> sources = index.getSources().iterator();
3313            while (sources.hasNext()) {
3314                // get the next index source
3315                CmsSearchIndexSource source = sources.next();
3316                // create the indexer
3317                I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index);
3318                // collect the resources to update
3319                CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex);
3320                if (!updateData.isEmpty()) {
3321                    // add the update collection to the internal pipeline
3322                    updateCollections.add(updateData);
3323                    hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete();
3324                    hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate();
3325                }
3326            }
3327
3328            // only start index modification if required
3329            if (hasResourcesToDelete || hasResourcesToUpdate) {
3330                // output start information on the report
3331                report.println(
3332                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()),
3333                    I_CmsReport.FORMAT_HEADLINE);
3334
3335                I_CmsIndexWriter writer = null;
3336                try {
3337                    // obtain an index writer that updates the current index
3338                    writer = index.getIndexWriter(report, false);
3339
3340                    if (hasResourcesToDelete) {
3341                        // delete the resource from the index
3342                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3343                        while (i.hasNext()) {
3344                            CmsSearchIndexUpdateData updateCollection = i.next();
3345                            if (updateCollection.hasResourcesToDelete()) {
3346                                updateCollection.getIndexer().deleteResources(
3347                                    writer,
3348                                    updateCollection.getResourcesToDelete());
3349                            }
3350                        }
3351                    }
3352
3353                    if (hasResourcesToUpdate) {
3354                        // create a new thread manager
3355                        CmsIndexingThreadManager threadManager = getThreadManager();
3356
3357                        Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator();
3358                        while (i.hasNext()) {
3359                            CmsSearchIndexUpdateData updateCollection = i.next();
3360                            if (updateCollection.hasResourceToUpdate()) {
3361                                updateCollection.getIndexer().updateResources(
3362                                    writer,
3363                                    threadManager,
3364                                    updateCollection.getResourcesToUpdate());
3365                            }
3366                        }
3367
3368                        // wait for indexing threads to finish
3369                        while (threadManager.isRunning()) {
3370                            try {
3371                                Thread.sleep(500);
3372                            } catch (InterruptedException e) {
3373                                // just continue with the loop after interruption
3374                                LOG.info(e.getLocalizedMessage(), e);
3375                            }
3376                        }
3377                    }
3378                } finally {
3379                    // close the index writer
3380                    if (writer != null) {
3381                        try {
3382                            writer.commit();
3383                        } catch (IOException e) {
3384                            LOG.error(
3385                                Messages.get().getBundle().key(
3386                                    Messages.LOG_IO_INDEX_WRITER_COMMIT_2,
3387                                    index.getName(),
3388                                    index.getPath()),
3389                                e);
3390                        }
3391                    }
3392                    // index has changed - initialize the index searcher instance
3393                    index.onIndexChanged(false);
3394                }
3395
3396                // output finish information on the report
3397                report.println(
3398                    Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()),
3399                    I_CmsReport.FORMAT_HEADLINE);
3400            }
3401        } finally {
3402            SEARCH_MANAGER_LOCK.unlock();
3403        }
3404    }
3405
3406    /**
3407     * Updates the offline search indexes for the given list of resources.<p>
3408     *
3409     * @param report the report to write the index information to
3410     * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index
3411     */
3412    protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) {
3413
3414        CmsObject cms = m_adminCms;
3415        try {
3416            // copy the administration context for the indexing
3417            cms = OpenCms.initCmsObject(m_adminCms);
3418            // set site root and project for this index
3419            cms.getRequestContext().setSiteRoot("/");
3420        } catch (CmsException e) {
3421            LOG.error(e.getLocalizedMessage(), e);
3422        }
3423
3424        Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator();
3425        while (j.hasNext()) {
3426            I_CmsSearchIndex index = j.next();
3427            if (index.getSources() != null) {
3428                try {
3429                    // switch to the index project
3430                    cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject()));
3431                    updateIndexIncremental(cms, index, report, resourcesToIndex);
3432                } catch (CmsException e) {
3433                    LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e);
3434                }
3435            }
3436        }
3437    }
3438
3439    /**
3440     * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p>
3441     *
3442     * @param adminCms the cms context
3443     * @param containerPages the containerpages
3444     * @param containerPage the container page site path
3445     */
3446    private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) {
3447
3448        if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) {
3449
3450            try {
3451                CmsResource detailRes = adminCms.readResource(
3452                    CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage),
3453                    CmsResourceFilter.IGNORE_EXPIRATION);
3454                containerPages.add(detailRes);
3455            } catch (Throwable e) {
3456                if (LOG.isWarnEnabled()) {
3457                    LOG.warn(e.getLocalizedMessage(), e);
3458                }
3459            }
3460        }
3461    }
3462
3463    /**
3464     * Creates the Solr core container.<p>
3465     *
3466     * @return the created core container
3467     */
3468    private CoreContainer createCoreContainer() {
3469
3470        CoreContainer container = null;
3471        try {
3472            // get the core container
3473            // still no core container: create it
3474            container = CoreContainer.createAndLoad(
3475                Paths.get(m_solrConfig.getHome()),
3476                m_solrConfig.getSolrFile().toPath());
3477            if (CmsLog.INIT.isInfoEnabled()) {
3478                CmsLog.INIT.info(
3479                    Messages.get().getBundle().key(
3480                        Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2,
3481                        m_solrConfig.getHome(),
3482                        m_solrConfig.getSolrFile().getName()));
3483            }
3484        } catch (Exception e) {
3485            LOG.error(
3486                Messages.get().getBundle().key(
3487                    Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1,
3488                    m_solrConfig.getSolrFile().getAbsolutePath()),
3489                e);
3490        }
3491        return container;
3492
3493    }
3494
3495    /**
3496     * Remove write.lock file in the data directory to ensure the index is unlocked.
3497     * @param dataDir the data directory of the Solr index that should be unlocked.
3498     */
3499    private void ensureIndexIsUnlocked(String dataDir) {
3500
3501        Collection<File> lockFiles = new ArrayList<File>(2);
3502        lockFiles.add(
3503            new File(
3504                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock"));
3505        lockFiles.add(
3506            new File(
3507                CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck")
3508                    + "write.lock"));
3509        for (File lockFile : lockFiles) {
3510            if (lockFile.exists()) {
3511                lockFile.delete();
3512                LOG.warn(
3513                    "Forcely unlocking index with data dir \""
3514                        + dataDir
3515                        + "\" by removing file \""
3516                        + lockFile.getAbsolutePath()
3517                        + "\".");
3518            }
3519        }
3520    }
3521
3522    /**
3523     * Returns the report in the given event data, if <code>null</code>
3524     * a new log report is used.<p>
3525     *
3526     * @param event the event to get the report for
3527     *
3528     * @return the report
3529     */
3530    private I_CmsReport getEventReport(CmsEvent event) {
3531
3532        I_CmsReport report = null;
3533        if (event.getData() != null) {
3534            report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT);
3535        }
3536        if (report == null) {
3537            report = new CmsLogReport(Locale.ENGLISH, getClass());
3538        }
3539        return report;
3540    }
3541
3542    /**
3543     * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p>
3544     *
3545     * @param publishedResources a list of published resources
3546     *
3547     * @return the set of structure ids that satisfy the condition above
3548     */
3549    private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted(
3550        List<CmsPublishedResource> publishedResources) {
3551
3552        Set<CmsUUID> result = new HashSet<CmsUUID>();
3553        Set<CmsUUID> deletedSet = new HashSet<CmsUUID>();
3554        for (CmsPublishedResource pubRes : publishedResources) {
3555            if (pubRes.getState().isNew()) {
3556                result.add(pubRes.getStructureId());
3557            }
3558            if (pubRes.getState().isDeleted()) {
3559                deletedSet.add(pubRes.getStructureId());
3560            }
3561        }
3562        result.retainAll(deletedSet);
3563        return result;
3564    }
3565
3566    /**
3567     * Checks if the given type id belongs to a group type.
3568     *
3569     * @param type the type id to check
3570     * @return true if the type is a group type
3571     */
3572    private boolean isGroup(int type) {
3573
3574        for (String groupType : groupTypes) {
3575            if (OpenCms.getResourceManager().matchResourceType(groupType, type)) {
3576                return true;
3577            }
3578        }
3579        return false;
3580
3581    }
3582
3583    /**
3584     * Shuts down the Solr core container.<p>
3585     */
3586    private void shutDownSolrContainer() {
3587
3588        if (m_coreContainer != null) {
3589            for (SolrCore core : m_coreContainer.getCores()) {
3590                // do not unload spellcheck core because otherwise the core.properties file is removed
3591                // even when calling m_coreContainer.unload(core.getName(), false, false, false);
3592                if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) {
3593                    m_coreContainer.unload(core.getName(), false, false, true);
3594                }
3595            }
3596            m_coreContainer.shutdown();
3597            if (CmsLog.INIT.isInfoEnabled()) {
3598                CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0));
3599            }
3600            m_coreContainer = null;
3601        }
3602    }
3603
3604}