001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 031import org.opencms.configuration.CmsConfigurationException; 032import org.opencms.db.CmsDriverManager; 033import org.opencms.db.CmsPublishedResource; 034import org.opencms.db.CmsResourceState; 035import org.opencms.file.CmsObject; 036import org.opencms.file.CmsProject; 037import org.opencms.file.CmsResource; 038import org.opencms.file.CmsResourceFilter; 039import org.opencms.file.CmsUser; 040import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 041import org.opencms.file.types.CmsResourceTypeXmlContent; 042import org.opencms.file.types.I_CmsResourceType; 043import org.opencms.i18n.CmsLocaleManager; 044import org.opencms.i18n.CmsMessageContainer; 045import org.opencms.loader.CmsLoaderException; 046import org.opencms.loader.CmsResourceManager; 047import org.opencms.main.CmsBroadcast.ContentMode; 048import org.opencms.main.CmsEvent; 049import org.opencms.main.CmsException; 050import org.opencms.main.CmsIllegalArgumentException; 051import org.opencms.main.CmsIllegalStateException; 052import org.opencms.main.CmsLog; 053import org.opencms.main.I_CmsEventListener; 054import org.opencms.main.OpenCms; 055import org.opencms.main.OpenCmsSolrHandler; 056import org.opencms.relations.CmsRelation; 057import org.opencms.relations.CmsRelationFilter; 058import org.opencms.relations.CmsRelationType; 059import org.opencms.report.CmsLogReport; 060import org.opencms.report.CmsShellLogReport; 061import org.opencms.report.I_CmsReport; 062import org.opencms.scheduler.I_CmsScheduledJob; 063import org.opencms.search.documents.A_CmsVfsDocument; 064import org.opencms.search.documents.CmsExtractionResultCache; 065import org.opencms.search.documents.I_CmsDocumentFactory; 066import org.opencms.search.documents.I_CmsTermHighlighter; 067import org.opencms.search.fields.CmsLuceneField; 068import org.opencms.search.fields.CmsLuceneFieldConfiguration; 069import org.opencms.search.fields.CmsSearchField; 070import org.opencms.search.fields.CmsSearchFieldConfiguration; 071import org.opencms.search.fields.CmsSearchFieldMapping; 072import org.opencms.search.fields.I_CmsSearchFieldConfiguration; 073import org.opencms.search.solr.CmsSolrConfiguration; 074import org.opencms.search.solr.CmsSolrFieldConfiguration; 075import org.opencms.search.solr.CmsSolrIndex; 076import org.opencms.search.solr.I_CmsSolrIndexWriter; 077import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker; 078import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer; 079import org.opencms.security.CmsRole; 080import org.opencms.security.CmsRoleViolationException; 081import org.opencms.util.A_CmsModeStringEnumeration; 082import org.opencms.util.CmsFileUtil; 083import org.opencms.util.CmsStringUtil; 084import org.opencms.util.CmsUUID; 085import org.opencms.util.CmsWaitHandle; 086 087import java.io.File; 088import java.io.IOException; 089import java.nio.file.FileSystems; 090import java.nio.file.Paths; 091import java.util.ArrayList; 092import java.util.Collection; 093import java.util.Collections; 094import java.util.HashMap; 095import java.util.HashSet; 096import java.util.Iterator; 097import java.util.LinkedHashMap; 098import java.util.List; 099import java.util.ListIterator; 100import java.util.Locale; 101import java.util.Map; 102import java.util.Set; 103import java.util.TreeMap; 104import java.util.concurrent.locks.ReentrantLock; 105import java.util.stream.Collectors; 106 107import org.apache.commons.logging.Log; 108import org.apache.lucene.analysis.Analyzer; 109import org.apache.lucene.analysis.CharArraySet; 110import org.apache.lucene.analysis.standard.StandardAnalyzer; 111import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 112import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder; 113import org.apache.solr.core.CoreContainer; 114import org.apache.solr.core.CoreDescriptor; 115import org.apache.solr.core.SolrCore; 116 117/** 118 * Implements the general management and configuration of the search and 119 * indexing facilities in OpenCms.<p> 120 * 121 * @since 6.0.0 122 */ 123public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { 124 125 /** 126 * Enumeration class for force unlock types.<p> 127 */ 128 public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { 129 130 /** Force unlock type "always". */ 131 public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); 132 133 /** Force unlock type "never". */ 134 public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); 135 136 /** Force unlock type "only full". */ 137 public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); 138 139 /** Serializable version id. */ 140 private static final long serialVersionUID = 74746076708908673L; 141 142 /** 143 * Creates a new force unlock type with the given name.<p> 144 * 145 * @param mode the mode id to use 146 */ 147 protected CmsSearchForceUnlockMode(String mode) { 148 149 super(mode); 150 } 151 152 /** 153 * Returns the lock type for the given type value.<p> 154 * 155 * @param type the type value to get the lock type for 156 * 157 * @return the lock type for the given type value 158 */ 159 public static CmsSearchForceUnlockMode valueOf(String type) { 160 161 if (type.equals(ALWAYS.toString())) { 162 return ALWAYS; 163 } else if (type.equals(NEVER.toString())) { 164 return NEVER; 165 } else { 166 return ONLYFULL; 167 } 168 } 169 } 170 171 /** 172 * Handles offline index generation.<p> 173 */ 174 protected class CmsSearchOfflineHandler implements I_CmsEventListener { 175 176 /** Indicates if the event handlers for the offline search have been already registered. */ 177 private boolean m_isEventRegistered; 178 179 /** The list of resources to index. */ 180 private List<CmsPublishedResource> m_resourcesToIndex; 181 182 /** 183 * Initializes the offline index handler.<p> 184 */ 185 protected CmsSearchOfflineHandler() { 186 187 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 188 } 189 190 /** 191 * Implements the event listener of this class.<p> 192 * 193 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 194 */ 195 @SuppressWarnings("unchecked") 196 public void cmsEvent(CmsEvent event) { 197 198 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 199 switch (event.getType()) { 200 case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: 201 case I_CmsEventListener.EVENT_RESOURCE_CREATED: 202 case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: 203 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 204 if ((change != null) && change.equals(Integer.valueOf(CmsDriverManager.NOTHING_CHANGED))) { 205 // skip lock & unlock 206 return; 207 } 208 // skip indexing if flag is set in event 209 Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); 210 if (skip != null) { 211 return; 212 } 213 214 // a resource has been modified - offline indexes require (re)indexing 215 List<CmsResource> resources = Collections.singletonList( 216 (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); 217 reIndexResources(resources); 218 break; 219 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 220 List<CmsResource> eventResources = (List<CmsResource>)event.getData().get( 221 I_CmsEventListener.KEY_RESOURCES); 222 List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources); 223 for (CmsResource res : resourcesToDelete) { 224 if (res.getState().isNew()) { 225 // if the resource is new and a delete action was performed 226 // --> set the state of the resource to deleted 227 res.setState(CmsResourceState.STATE_DELETED); 228 } 229 } 230 reIndexResources(resourcesToDelete); 231 break; 232 case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: 233 if (I_CmsEventListener.VALUE_CREATE_SIBLING.equals(change)) { 234 List<CmsResource> resList = (List<CmsResource>)event.getData().get( 235 I_CmsEventListener.KEY_RESOURCES); 236 if ((resList != null) && (resList.size() >= 3)) { 237 System.out.println("Sibling creation case, resource = " + resList.get(1).getRootPath()); 238 reIndexResources(Collections.singletonList(resList.get(1))); 239 240 } 241 } else { 242 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 243 } 244 break; 245 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 246 case I_CmsEventListener.EVENT_RESOURCE_COPIED: 247 case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: 248 249 // a list of resources has been modified - offline indexes require (re)indexing 250 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 251 break; 252 default: 253 // no operation 254 } 255 } 256 257 /** 258 * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p> 259 * 260 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed 261 */ 262 protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) { 263 264 m_resourcesToIndex.addAll(resourcesToIndex); 265 } 266 267 /** 268 * Returns the list of {@link CmsPublishedResource} objects to index.<p> 269 * 270 * @return the resources to index 271 */ 272 protected List<CmsPublishedResource> getResourcesToIndex() { 273 274 List<CmsPublishedResource> result; 275 synchronized (this) { 276 result = m_resourcesToIndex; 277 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 278 } 279 try { 280 CmsObject cms = m_adminCms; 281 CmsProject offline = getOfflineIndexProject(); 282 if (offline != null) { 283 // switch to the offline project if available 284 cms = OpenCms.initCmsObject(m_adminCms); 285 cms.getRequestContext().setCurrentProject(offline); 286 } 287 addAdditionallyAffectedResources(cms, result); 288 } catch (CmsException e) { 289 LOG.error(e.getLocalizedMessage(), e); 290 } 291 return result; 292 } 293 294 /** 295 * Initializes this offline search handler, registering the event handlers if required.<p> 296 */ 297 protected void initialize() { 298 299 if (m_offlineIndexes.size() > 0) { 300 // there is at least one offline index configured 301 if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { 302 // create the offline indexing thread 303 m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); 304 // start the offline index thread 305 m_offlineIndexThread.start(); 306 } 307 } else { 308 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 309 // no offline indexes but thread still running, stop the thread 310 m_offlineIndexThread.shutDown(); 311 m_offlineIndexThread = null; 312 } 313 } 314 // do this only in case there are offline indexes configured 315 if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { 316 m_isEventRegistered = true; 317 // register this object as event listener 318 OpenCms.addCmsEventListener( 319 this, 320 new int[] { 321 I_CmsEventListener.EVENT_PROPERTY_MODIFIED, 322 I_CmsEventListener.EVENT_RESOURCE_CREATED, 323 I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, 324 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 325 I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, 326 I_CmsEventListener.EVENT_RESOURCE_MOVED, 327 I_CmsEventListener.EVENT_RESOURCE_DELETED, 328 I_CmsEventListener.EVENT_RESOURCE_COPIED, 329 I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); 330 } 331 } 332 333 /** 334 * Updates all offline indexes for the given list of {@link CmsResource} objects.<p> 335 * 336 * @param resources a list of {@link CmsResource} objects to update in the offline indexes 337 */ 338 protected synchronized void reIndexResources(List<CmsResource> resources) { 339 340 List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size()); 341 for (CmsResource res : resources) { 342 CmsPublishedResource pubRes = new CmsPublishedResource(res); 343 resourcesToIndex.add(pubRes); 344 } 345 if (resourcesToIndex.size() > 0) { 346 // add the resources found to the offline index thread 347 addResourcesToIndex(resourcesToIndex); 348 } 349 } 350 } 351 352 /** 353 * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p> 354 */ 355 protected class CmsSearchOfflineIndexThread extends Thread { 356 357 /** The event handler that triggers this thread. */ 358 CmsSearchOfflineHandler m_handler; 359 360 /** Indicates if this thread is still alive. */ 361 boolean m_isAlive; 362 363 /** Indicates that an index update thread is currently running. */ 364 private boolean m_isUpdating; 365 366 /** If true a manual update (after file upload) was triggered. */ 367 private boolean m_updateTriggered; 368 369 /** The wait handle used for signalling when the worker thread has finished. */ 370 private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); 371 372 /** 373 * Constructor.<p> 374 * 375 * @param handler the offline index event handler 376 */ 377 protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { 378 379 super("OpenCms: Offline Search Indexer"); 380 m_handler = handler; 381 } 382 383 /** 384 * Gets the wait handle used for signalling when the worker thread has finished. 385 * 386 * @return the wait handle 387 **/ 388 public CmsWaitHandle getWaitHandle() { 389 390 return m_waitHandle; 391 } 392 393 /** 394 * @see java.lang.Thread#interrupt() 395 */ 396 @Override 397 public void interrupt() { 398 399 super.interrupt(); 400 m_updateTriggered = true; 401 } 402 403 /** 404 * @see java.lang.Thread#run() 405 */ 406 @Override 407 public void run() { 408 409 // create a log report for the output 410 I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); 411 long offlineUpdateFrequency = getOfflineUpdateFrequency(); 412 m_updateTriggered = false; 413 try { 414 while (m_isAlive) { 415 if (!m_updateTriggered) { 416 try { 417 sleep(offlineUpdateFrequency); 418 } catch (InterruptedException e) { 419 // continue the thread after interruption 420 if (!m_isAlive) { 421 // the thread has been shut down while sleeping 422 continue; 423 } 424 if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { 425 // offline update frequency change - clear interrupt status 426 offlineUpdateFrequency = getOfflineUpdateFrequency(); 427 } 428 LOG.info(e.getLocalizedMessage(), e); 429 } 430 } 431 if (m_isAlive) { 432 // set update trigger to false since we do the update now 433 m_updateTriggered = false; 434 // get list of resource to update 435 List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex(); 436 if (resourcesToIndex.size() > 0) { 437 // only start indexing if there is at least one resource 438 startOfflineUpdateThread(report, resourcesToIndex); 439 } else { 440 getWaitHandle().release(); 441 } 442 // this is just called to clear the interrupt status of the thread 443 interrupted(); 444 } 445 } 446 } finally { 447 // make sure that live status is reset in case of Exceptions 448 m_isAlive = false; 449 } 450 451 } 452 453 /** 454 * @see java.lang.Thread#start() 455 */ 456 @Override 457 public synchronized void start() { 458 459 m_isAlive = true; 460 super.start(); 461 } 462 463 /** 464 * Obtains the list of resource to update in the offline index, 465 * then optimizes the list by removing duplicate entries.<p> 466 * 467 * @return the list of resource to update in the offline index 468 */ 469 protected List<CmsPublishedResource> getResourcesToIndex() { 470 471 List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex(); 472 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size()); 473 474 // Reverse to always keep the last list entries 475 Collections.reverse(resourcesToIndex); 476 for (CmsPublishedResource pubRes : resourcesToIndex) { 477 boolean addResource = true; 478 for (CmsPublishedResource resRes : result) { 479 if (pubRes.equals(resRes) 480 && (pubRes.getState() == resRes.getState()) 481 && (pubRes.getMovedState() == resRes.getMovedState()) 482 && pubRes.getRootPath().equals(resRes.getRootPath())) { 483 // resource already in the update list 484 addResource = false; 485 break; 486 } 487 } 488 if (addResource) { 489 result.add(pubRes); 490 } 491 492 } 493 Collections.reverse(result); 494 return changeStateOfMoveOriginsToDeleted(result); 495 } 496 497 /** 498 * Shuts down this offline index thread.<p> 499 */ 500 protected void shutDown() { 501 502 m_isAlive = false; 503 interrupt(); 504 if (m_isUpdating) { 505 long waitTime = getOfflineUpdateFrequency() / 2; 506 int waitSteps = 0; 507 do { 508 try { 509 // wait half the time of the offline index frequency for the thread to finish 510 Thread.sleep(waitTime); 511 } catch (InterruptedException e) { 512 // continue 513 LOG.info(e.getLocalizedMessage(), e); 514 } 515 waitSteps++; 516 // wait 5 times then stop waiting 517 } while ((waitSteps < 5) && m_isUpdating); 518 } 519 } 520 521 /** 522 * Updates the offline search indexes for the given list of resources.<p> 523 * 524 * @param report the report to write the index information to 525 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 526 */ 527 protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 528 529 CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); 530 long startTime = System.currentTimeMillis(); 531 long waitTime = getOfflineUpdateFrequency() / 2; 532 if (LOG.isDebugEnabled()) { 533 LOG.debug( 534 Messages.get().getBundle().key( 535 Messages.LOG_OI_UPDATE_START_1, 536 Integer.valueOf(resourcesToIndex.size()))); 537 } 538 539 m_isUpdating = true; 540 thread.start(); 541 542 do { 543 try { 544 // wait half the time of the offline index frequency for the thread to finish 545 thread.join(waitTime); 546 } catch (InterruptedException e) { 547 // continue 548 LOG.info(e.getLocalizedMessage(), e); 549 } 550 if (thread.isAlive()) { 551 LOG.warn( 552 Messages.get().getBundle().key( 553 Messages.LOG_OI_UPDATE_LONG_2, 554 Integer.valueOf(resourcesToIndex.size()), 555 Long.valueOf(System.currentTimeMillis() - startTime))); 556 } 557 } while (thread.isAlive()); 558 m_isUpdating = false; 559 560 if (LOG.isDebugEnabled()) { 561 LOG.debug( 562 Messages.get().getBundle().key( 563 Messages.LOG_OI_UPDATE_FINISH_2, 564 Integer.valueOf(resourcesToIndex.size()), 565 Long.valueOf(System.currentTimeMillis() - startTime))); 566 } 567 } 568 569 /** 570 * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. 571 * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, 572 * 573 * @param resourcesToIndex the resources to index 574 * 575 * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths 576 */ 577 private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted( 578 List<CmsPublishedResource> resourcesToIndex) { 579 580 Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>(); 581 for (CmsPublishedResource resource : resourcesToIndex) { 582 if (resource.getState().isDeleted()) { 583 // we don't want the last path to be from a deleted resource 584 continue; 585 } 586 lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); 587 } 588 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(); 589 for (CmsPublishedResource resource : resourcesToIndex) { 590 if (resource.getState().isDeleted()) { 591 result.add(resource); 592 continue; 593 } 594 String lastValidPath = lastValidPaths.get(resource.getStructureId()); 595 if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { 596 result.add(resource); 597 } else { 598 result.add( 599 new CmsPublishedResource( 600 resource.getStructureId(), 601 resource.getResourceId(), 602 resource.getPublishTag(), 603 resource.getRootPath(), 604 resource.getType(), 605 resource.isFolder(), 606 CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted 607 resource.getSiblingCount())); 608 } 609 } 610 return result; 611 } 612 } 613 614 /** 615 * An offline index worker Thread runs each time for every offline index update action.<p> 616 * 617 * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid 618 * problems if a single operation "hangs" the Tread.<p> 619 */ 620 protected class CmsSearchOfflineIndexWorkThread extends Thread { 621 622 /** The report to write the index information to. */ 623 I_CmsReport m_report; 624 625 /** The list of {@link CmsPublishedResource} objects to index. */ 626 List<CmsPublishedResource> m_resourcesToIndex; 627 628 /** 629 * Updates the offline search indexes for the given list of resources.<p> 630 * 631 * @param report the report to write the index information to 632 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 633 */ 634 protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 635 636 super("OpenCms: Offline Search Index Worker"); 637 m_report = report; 638 m_resourcesToIndex = resourcesToIndex; 639 } 640 641 /** 642 * @see java.lang.Thread#run() 643 */ 644 @Override 645 public void run() { 646 647 updateIndexOffline(m_report, m_resourcesToIndex); 648 if (m_offlineIndexThread != null) { 649 m_offlineIndexThread.getWaitHandle().release(); 650 } 651 } 652 } 653 654 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 655 private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true); 656 657 /** The default value used for generating search result excerpts (1024 chars). */ 658 public static final int DEFAULT_EXCERPT_LENGTH = 1024; 659 660 /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ 661 public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; 662 663 /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ 664 public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; 665 666 /** The default update frequency for offline indexes (15000 msec = 15 sec). */ 667 public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; 668 669 /** The default maximal wait time for re-indexing after editing a content. */ 670 public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; 671 672 /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ 673 public static final int DEFAULT_TIMEOUT = 60000; 674 675 /** Scheduler parameter: Update only a specified list of indexes. */ 676 public static final String JOB_PARAM_INDEXLIST = "indexList"; 677 678 /** Scheduler parameter: Write the output of the update to the logfile. */ 679 public static final String JOB_PARAM_WRITELOG = "writeLog"; 680 681 /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */ 682 public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; 683 684 /** The log object for this class. */ 685 protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); 686 687 /** List of resource types which represent groups of elements. */ 688 private static final String[] groupTypes = { 689 CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME, 690 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME, 691 CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME}; 692 693 /** The administrator OpenCms user context to access OpenCms VFS resources. */ 694 protected CmsObject m_adminCms; 695 696 /** The list of indexes that are configured for offline index mode. */ 697 protected List<I_CmsSearchIndex> m_offlineIndexes; 698 699 /** The thread used of offline indexing. */ 700 protected CmsSearchOfflineIndexThread m_offlineIndexThread; 701 702 /** Configured analyzers for languages using <analyzer>. */ 703 private HashMap<Locale, CmsSearchAnalyzer> m_analyzers; 704 705 /** Stores the offline update frequency while indexing is paused. */ 706 private long m_configuredOfflineIndexingFrequency; 707 708 /** The Solr core container. */ 709 private CoreContainer m_coreContainer; 710 711 /** A map of document factory configurations. */ 712 private List<CmsSearchDocumentType> m_documentTypeConfigs; 713 714 /** A map of document factories keyed first by their name and then by their extraction keys. */ 715 private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes; 716 717 /** The set of all globally available extraction keys for document factories. */ 718 private Set<String> m_extractionKeys; 719 720 /** The max age for extraction results to remain in the cache. */ 721 private float m_extractionCacheMaxAge; 722 723 /** The cache for the extraction results. */ 724 private CmsExtractionResultCache m_extractionResultCache; 725 726 /** Contains the available field configurations. */ 727 private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations; 728 729 /** The force unlock type. */ 730 private CmsSearchForceUnlockMode m_forceUnlockMode; 731 732 /** The class used to highlight the search terms in the excerpt of a search result. */ 733 private I_CmsTermHighlighter m_highlighter; 734 735 /** A list of search indexes. */ 736 private List<I_CmsSearchIndex> m_indexes; 737 738 /** Seconds to wait for an index lock. */ 739 private int m_indexLockMaxWaitSeconds = 10; 740 741 /** Configured index sources. */ 742 private Map<String, CmsSearchIndexSource> m_indexSources; 743 744 /** The max. char. length of the excerpt in the search result. */ 745 private int m_maxExcerptLength; 746 747 /** The maximum number of modifications before a commit in the search index is triggered. */ 748 private int m_maxModificationsBeforeCommit; 749 750 /** The offline index search handler. */ 751 private CmsSearchOfflineHandler m_offlineHandler; 752 753 /** The update frequency of the offline indexer in milliseconds. */ 754 private long m_offlineUpdateFrequency; 755 756 /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ 757 private long m_maxIndexWaitTime; 758 759 /** Path to index files below WEB-INF/. */ 760 private String m_path; 761 762 /** The Solr configuration. */ 763 private CmsSolrConfiguration m_solrConfig; 764 765 /** Timeout for abandoning indexing thread. */ 766 private long m_timeout; 767 768 /** Offline indexing pause requests */ 769 private final Set<CmsUUID> m_pauseRequests = new HashSet<>(); 770 771 /** 772 * Default constructor when called as cron job.<p> 773 */ 774 public CmsSearchManager() { 775 776 m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>(); 777 m_extractionKeys = new HashSet<String>(); 778 m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>(); 779 m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>(); 780 m_indexes = new ArrayList<I_CmsSearchIndex>(); 781 m_indexSources = new TreeMap<String, CmsSearchIndexSource>(); 782 m_offlineHandler = new CmsSearchOfflineHandler(); 783 m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; 784 m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; 785 m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; 786 m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; 787 m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; 788 789 m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>(); 790 // make sure we have a "standard" field configuration 791 addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); 792 793 if (CmsLog.INIT.isInfoEnabled()) { 794 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); 795 } 796 } 797 798 /** 799 * Returns an analyzer for the given class name.<p> 800 * 801 * @param className the class name of the analyzer 802 * 803 * @return the appropriate lucene analyzer 804 * 805 * @throws Exception if something goes wrong 806 */ 807 public static Analyzer getAnalyzer(String className) throws Exception { 808 809 Analyzer analyzer = null; 810 Class<?> analyzerClass; 811 try { 812 analyzerClass = Class.forName(className); 813 } catch (ClassNotFoundException e) { 814 // allow Lucene standard classes to be written in a short form 815 analyzerClass = Class.forName(LUCENE_ANALYZER + className); 816 } 817 818 // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor 819 if (StandardAnalyzer.class.equals(analyzerClass)) { 820 // the Lucene standard analyzer is used - but without any stopwords. 821 analyzer = new StandardAnalyzer(new CharArraySet(0, false)); 822 } else { 823 analyzer = (Analyzer)analyzerClass.newInstance(); 824 } 825 return analyzer; 826 } 827 828 /** 829 * Returns the Solr index configured with the parameters name. 830 * The parameters must contain a key/value pair with an existing 831 * Solr index, otherwise <code>null</code> is returned.<p> 832 * 833 * @param cms the current context 834 * @param params the parameter map 835 * 836 * @return the best matching Solr index 837 */ 838 public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) { 839 840 String indexName = null; 841 CmsSolrIndex index = null; 842 // try to get the index name from the parameters: 'core' or 'index' 843 if (params != null) { 844 indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null 845 ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] 846 : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null 847 ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] 848 : null); 849 } 850 if (indexName == null) { 851 // if no parameter is specified try to use the default online/offline indexes by context 852 indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() 853 ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE 854 : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; 855 } 856 // try to get the index 857 index = OpenCms.getSearchManager().getIndexSolr(indexName); 858 if (index == null) { 859 // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. 860 List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes(); 861 if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { 862 index = solrs.get(0); 863 } 864 } 865 return index; 866 } 867 868 /** 869 * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p> 870 * 871 * @param indexName the name of the index to check 872 * 873 * @return <code>true</code> if the index for the given name is a Lucene index 874 */ 875 public static boolean isLuceneIndex(String indexName) { 876 877 I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); 878 return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex)); 879 } 880 881 /** 882 * Adds an analyzer.<p> 883 * 884 * @param analyzer an analyzer 885 */ 886 public void addAnalyzer(CmsSearchAnalyzer analyzer) { 887 888 m_analyzers.put(analyzer.getLocale(), analyzer); 889 890 if (CmsLog.INIT.isInfoEnabled()) { 891 CmsLog.INIT.info( 892 Messages.get().getBundle().key( 893 Messages.INIT_ADD_ANALYZER_2, 894 analyzer.getLocale(), 895 analyzer.getClassName())); 896 } 897 } 898 899 /** 900 * Adds a document type.<p> 901 * 902 * @param documentType a document type 903 */ 904 public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { 905 906 m_documentTypeConfigs.add(documentType); 907 908 if (CmsLog.INIT.isInfoEnabled()) { 909 CmsLog.INIT.info( 910 Messages.get().getBundle().key( 911 Messages.INIT_SEARCH_DOC_TYPES_2, 912 documentType.getName(), 913 documentType.getClassName())); 914 } 915 } 916 917 /** 918 * Adds a search field configuration to the search manager.<p> 919 * 920 * @param fieldConfiguration the search field configuration to add 921 */ 922 public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) { 923 924 m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); 925 } 926 927 /** 928 * Adds a search index to the configuration.<p> 929 * 930 * @param searchIndex the search index to add 931 */ 932 public void addSearchIndex(I_CmsSearchIndex searchIndex) { 933 934 if (!searchIndex.isInitialized()) { 935 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 936 try { 937 searchIndex.initialize(); 938 } catch (CmsException e) { 939 // should never happen 940 LOG.error(e.getMessage(), e); 941 } 942 } 943 } 944 945 // name: not null or emtpy and unique 946 String name = searchIndex.getName(); 947 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 948 throw new CmsIllegalArgumentException( 949 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 950 } 951 if (m_indexSources.keySet().contains(name)) { 952 throw new CmsIllegalArgumentException( 953 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); 954 } 955 956 m_indexes.add(searchIndex); 957 if (m_adminCms != null) { 958 initOfflineIndexes(); 959 } 960 961 if (CmsLog.INIT.isInfoEnabled()) { 962 CmsLog.INIT.info( 963 Messages.get().getBundle().key( 964 Messages.INIT_ADD_SEARCH_INDEX_2, 965 searchIndex.getName(), 966 searchIndex.getProject())); 967 } 968 } 969 970 /** 971 * Adds a search index source configuration.<p> 972 * 973 * @param searchIndexSource a search index source configuration 974 */ 975 public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { 976 977 m_indexSources.put(searchIndexSource.getName(), searchIndexSource); 978 979 if (CmsLog.INIT.isInfoEnabled()) { 980 CmsLog.INIT.info( 981 Messages.get().getBundle().key( 982 Messages.INIT_SEARCH_INDEX_SOURCE_2, 983 searchIndexSource.getName(), 984 searchIndexSource.getIndexerClassName())); 985 } 986 } 987 988 /** 989 * Implements the event listener of this class.<p> 990 * 991 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 992 */ 993 public void cmsEvent(CmsEvent event) { 994 995 switch (event.getType()) { 996 case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: 997 List<String> indexNames = null; 998 if ((event.getData() != null) 999 && CmsStringUtil.isNotEmptyOrWhitespaceOnly( 1000 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { 1001 indexNames = CmsStringUtil.splitAsList( 1002 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), 1003 ",", 1004 true); 1005 } 1006 try { 1007 if (LOG.isDebugEnabled()) { 1008 LOG.debug( 1009 Messages.get().getBundle().key( 1010 Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, 1011 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1012 new Exception()); 1013 } 1014 if (indexNames == null) { 1015 rebuildAllIndexes(getEventReport(event)); 1016 } else { 1017 rebuildIndexes(indexNames, getEventReport(event)); 1018 } 1019 } catch (CmsException e) { 1020 if (LOG.isErrorEnabled()) { 1021 LOG.error( 1022 Messages.get().getBundle().key( 1023 Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, 1024 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1025 e); 1026 } 1027 } 1028 break; 1029 case I_CmsEventListener.EVENT_CLEAR_CACHES: 1030 if (LOG.isDebugEnabled()) { 1031 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); 1032 } 1033 break; 1034 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 1035 // event data contains a list of the published resources 1036 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 1037 if (LOG.isDebugEnabled()) { 1038 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); 1039 } 1040 updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); 1041 if (LOG.isDebugEnabled()) { 1042 LOG.debug( 1043 Messages.get().getBundle().key( 1044 Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, 1045 publishHistoryId)); 1046 } 1047 break; 1048 case I_CmsEventListener.EVENT_REINDEX_OFFLINE: 1049 case I_CmsEventListener.EVENT_REINDEX_ONLINE: 1050 boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType(); 1051 Map<String, Object> eventData = event.getData(); 1052 CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID); 1053 CmsUser user = null; 1054 if (userId != null) { 1055 try { 1056 user = m_adminCms.readUser(userId); 1057 } catch (Throwable t) { 1058 // should not normally happen 1059 LOG.debug(t.getMessage(), t); 1060 } 1061 } 1062 try { 1063 SEARCH_MANAGER_LOCK.lock(); 1064 if (LOG.isDebugEnabled()) { 1065 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0)); 1066 } 1067 CmsObject cms = m_adminCms; 1068 if (!isOnline) { 1069 OpenCms.initCmsObject(m_adminCms); 1070 cms.getRequestContext().setCurrentProject( 1071 cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID))); 1072 } 1073 @SuppressWarnings("unchecked") 1074 List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES); 1075 I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT); 1076 List<CmsResource> resourcesToIndex = new ArrayList<>(); 1077 for (CmsResource res : resources) { 1078 if (res.isFile()) { 1079 resourcesToIndex.add(res); 1080 } else { 1081 try { 1082 resourcesToIndex.addAll( 1083 cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true)); 1084 } catch (CmsException e) { 1085 LOG.error(e, e); 1086 } 1087 } 1088 } 1089 // we reindex and prevent using cached results 1090 cleanExtractionCache(); 1091 List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map( 1092 res -> new CmsPublishedResource(res)).collect(Collectors.toList()); 1093 if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) { 1094 addAdditionallyAffectedResources(cms, publishedResourcesToIndex); 1095 } 1096 if (isOnline) { 1097 updateAllIndexes( 1098 m_adminCms, 1099 publishedResourcesToIndex, 1100 new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE)); 1101 } else { 1102 updateIndexOffline(report, publishedResourcesToIndex); 1103 } 1104 cms = null; 1105 SEARCH_MANAGER_LOCK.unlock(); 1106 if (null != user) { 1107 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1108 OpenCms.getSessionManager().sendBroadcast( 1109 null, 1110 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0), 1111 user, 1112 ContentMode.html); 1113 } 1114 if (LOG.isDebugEnabled()) { 1115 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0)); 1116 } 1117 1118 } catch (Throwable e) { 1119 if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) { 1120 SEARCH_MANAGER_LOCK.unlock(); 1121 } 1122 if (null != user) { 1123 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1124 OpenCms.getSessionManager().sendBroadcast( 1125 null, 1126 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0), 1127 user, 1128 ContentMode.html); 1129 } 1130 if (LOG.isDebugEnabled()) { 1131 LOG.error( 1132 Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()), 1133 e); 1134 } else if (LOG.isErrorEnabled()) { 1135 LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData())); 1136 } 1137 } 1138 break; 1139 default: 1140 // no operation 1141 } 1142 } 1143 1144 /** 1145 * Returns all Solr index.<p> 1146 * 1147 * @return all Solr indexes 1148 */ 1149 public List<CmsSolrIndex> getAllSolrIndexes() { 1150 1151 List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>(); 1152 for (String indexName : getIndexNames()) { 1153 CmsSolrIndex index = getIndexSolr(indexName); 1154 if (index != null) { 1155 result.add(index); 1156 } 1157 } 1158 return result; 1159 } 1160 1161 /** 1162 * Returns an analyzer for the given language.<p> 1163 * 1164 * The analyzer is selected according to the analyzer configuration.<p> 1165 * 1166 * @param locale the locale to get the analyzer for 1167 * @return the appropriate lucene analyzer 1168 * 1169 * @throws CmsSearchException if something goes wrong 1170 */ 1171 public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { 1172 1173 Analyzer analyzer = null; 1174 String className = null; 1175 1176 CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); 1177 if (analyzerConf == null) { 1178 throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); 1179 } 1180 1181 try { 1182 analyzer = getAnalyzer(analyzerConf.getClassName()); 1183 } catch (Exception e) { 1184 throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); 1185 } 1186 1187 return analyzer; 1188 } 1189 1190 /** 1191 * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p> 1192 * 1193 * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. 1194 * 1195 * @return an unmodifiable view of the Analyzers Map 1196 */ 1197 public Map<Locale, CmsSearchAnalyzer> getAnalyzers() { 1198 1199 return Collections.unmodifiableMap(m_analyzers); 1200 } 1201 1202 /** 1203 * Returns the search analyzer for the given locale.<p> 1204 * 1205 * @param locale the locale to get the analyzer for 1206 * 1207 * @return the search analyzer for the given locale 1208 */ 1209 public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { 1210 1211 return m_analyzers.get(locale); 1212 } 1213 1214 /** 1215 * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p> 1216 * 1217 * @return the name of the directory below WEB-INF/ where the search indexes are stored 1218 */ 1219 public String getDirectory() { 1220 1221 return m_path; 1222 } 1223 1224 /** 1225 * Returns the configured Solr home directory <code>null</code> if not set.<p> 1226 * 1227 * @return the Solr home directory 1228 */ 1229 public String getDirectorySolr() { 1230 1231 return m_solrConfig != null ? m_solrConfig.getHome() : null; 1232 } 1233 1234 /** 1235 * Returns the document factory configured under the provided name. 1236 * @param docTypeName the name of the document type. 1237 * @return the factory for the provided name. 1238 */ 1239 public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) { 1240 1241 Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName); 1242 if (factoryMap != null) { 1243 Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator(); 1244 if (factoryIt.hasNext()) { 1245 return factoryMap.values().iterator().next(); 1246 } 1247 } 1248 return null; 1249 } 1250 1251 /** 1252 * Returns a document type config.<p> 1253 * 1254 * @param name the name of the document type config 1255 * @return the document type config. 1256 */ 1257 public CmsSearchDocumentType getDocumentTypeConfig(String name) { 1258 1259 // this is really used only for the search manager GUI, 1260 // so performance is not an issue and no lookup map is generated 1261 for (int i = 0; i < m_documentTypeConfigs.size(); i++) { 1262 CmsSearchDocumentType type = m_documentTypeConfigs.get(i); 1263 if (type.getName().equals(name)) { 1264 return type; 1265 } 1266 } 1267 return null; 1268 } 1269 1270 /** 1271 * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p> 1272 * 1273 * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map 1274 */ 1275 public List<CmsSearchDocumentType> getDocumentTypeConfigs() { 1276 1277 return Collections.unmodifiableList(m_documentTypeConfigs); 1278 } 1279 1280 /** 1281 * Returns the document type keys used to specify the correct document factory. 1282 * 1283 * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys. 1284 * 1285 * @param resource the resource to generate the list of document type keys for. 1286 * @return the document type keys. 1287 */ 1288 public List<String> getDocumentTypeKeys(CmsResource resource) { 1289 1290 // first get the MIME type of the resource 1291 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); 1292 String resourceType = null; 1293 try { 1294 resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); 1295 } catch (CmsLoaderException e) { 1296 // ignore, unknown resource type, resource can not be indexed 1297 LOG.info(e.getLocalizedMessage(), e); 1298 } 1299 return getDocumentTypeKeys(resourceType, mimeType); 1300 } 1301 1302 /** 1303 * Returns the document type keys used to specify the correct document factory. 1304 * One resource typically has more than one key. The document factories are matched 1305 * in the provided order and the first matching factory is used. 1306 * 1307 * The keys for type name "typename" and mimetype "mimetype" would be a subset of: 1308 * <ul> 1309 * <li><code>typename_mimetype</code></li> 1310 * <li><code>typename</code></li> 1311 * <li>if <code>typename</code> is a sub-type of <code>containerpage</code> 1312 * <ul> 1313 * <li><code>containerpage_mimetype</code></li> 1314 * <li><code>containerpage</code></li> 1315 * </ul> 1316 * </li> 1317 * <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code> 1318 * <ul> 1319 * <li><code>xmlcontent_mimetype</code></li> 1320 * <li><code>xmlcontent</code></li> 1321 * </ul> 1322 * </li> 1323 * <li><code>__unconfigured___mimetype</code></li> 1324 * <li><code>__unconfigured__</code></li> 1325 * <li><code>__all___mimetype</code></li> 1326 * <li><code>__all__</code></li> 1327 * <ul> 1328 * Note that all keys except the "__all__"-keys are only added as long as globally 1329 * there is no matching factory for the key. 1330 * This in particular means that a factory matching "typename" will never be used 1331 * if you have a factory for "typename__mimetype" - even if this is not configured 1332 * for the used index source. Eventually, the content will not be indexed in such cases. 1333 * @param resourceType the resource type to generate the list of document type keys for. 1334 * @param mimeType the mime type to generate the list of document type keys for. 1335 * @return the document type keys. 1336 */ 1337 public List<String> getDocumentTypeKeys(String resourceType, String mimeType) { 1338 1339 List<String> result = new ArrayList<>(8); 1340 if (null != resourceType) { 1341 String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); 1342 result.add(currentKey); 1343 if (!m_extractionKeys.contains(currentKey)) { 1344 currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null); 1345 result.add(currentKey); 1346 if (!m_extractionKeys.contains(currentKey)) { 1347 boolean hasGlobalMatch = false; 1348 try { 1349 String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName(); 1350 I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType); 1351 if (!resourceType.equals(containerpageTypeName)) { 1352 if (type instanceof CmsResourceTypeXmlContainerPage) { 1353 if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) { 1354 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType); 1355 result.add(currentKey); 1356 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1357 if (!hasGlobalMatch) { 1358 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null); 1359 result.add(currentKey); 1360 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1361 } 1362 } 1363 } 1364 } 1365 String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName(); 1366 if (!resourceType.equals(containerpageTypeName)) { 1367 if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) { 1368 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType); 1369 result.add(currentKey); 1370 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1371 if (!hasGlobalMatch) { 1372 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null); 1373 result.add(currentKey); 1374 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1375 } 1376 } 1377 } 1378 } catch (Throwable t) { 1379 LOG.warn("Could not read type for name \"" + resourceType + "\".", t); 1380 } 1381 if (!hasGlobalMatch) { 1382 result.add( 1383 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType)); 1384 result.add( 1385 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null)); 1386 } 1387 } 1388 } 1389 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType)); 1390 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null)); 1391 } 1392 return result; 1393 1394 } 1395 1396 /** 1397 * Returns the map from document type keys to document factories with all entries for the provided document type names. 1398 * @param documentTypeNames list of document type names to generate the map for. 1399 * @return the map from document type keys to document factories. 1400 */ 1401 public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) { 1402 1403 Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>(); 1404 if (null != documentTypeNames) { 1405 // Iterate the list in reverse order to prefer factories that are added by document types listed earlier. 1406 ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size()); 1407 while (typesIterator.hasPrevious()) { 1408 Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous()); 1409 if (null != factories) { 1410 result.putAll(factories); 1411 } 1412 } 1413 } 1414 return result; 1415 } 1416 1417 /** 1418 * Returns the maximum age a text extraction result is kept in the cache (in hours).<p> 1419 * 1420 * @return the maximum age a text extraction result is kept in the cache (in hours) 1421 */ 1422 public float getExtractionCacheMaxAge() { 1423 1424 return m_extractionCacheMaxAge; 1425 } 1426 1427 /** 1428 * Returns the search field configuration with the given name.<p> 1429 * 1430 * In case no configuration is available with the given name, <code>null</code> is returned.<p> 1431 * 1432 * @param name the name to get the search field configuration for 1433 * 1434 * @return the search field configuration with the given name 1435 */ 1436 public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) { 1437 1438 return m_fieldConfigurations.get(name); 1439 } 1440 1441 /** 1442 * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p> 1443 * 1444 * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries 1445 */ 1446 public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() { 1447 1448 List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>( 1449 m_fieldConfigurations.values()); 1450 Collections.sort(result); 1451 return Collections.unmodifiableList(result); 1452 } 1453 1454 /** 1455 * Returns the Lucene search field configurations only.<p> 1456 * 1457 * @return the Lucene search field configurations 1458 */ 1459 public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() { 1460 1461 List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>(); 1462 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1463 if (conf instanceof CmsLuceneFieldConfiguration) { 1464 result.add((CmsLuceneFieldConfiguration)conf); 1465 } 1466 } 1467 Collections.sort(result); 1468 return Collections.unmodifiableList(result); 1469 } 1470 1471 /** 1472 * Returns the Solr search field configurations only.<p> 1473 * 1474 * @return the Solr search field configurations 1475 */ 1476 public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() { 1477 1478 List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>(); 1479 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1480 if (conf instanceof CmsSolrFieldConfiguration) { 1481 result.add((CmsSolrFieldConfiguration)conf); 1482 } 1483 } 1484 Collections.sort(result); 1485 return Collections.unmodifiableList(result); 1486 } 1487 1488 /** 1489 * Returns the force unlock mode during indexing.<p> 1490 * 1491 * @return the force unlock mode during indexing 1492 */ 1493 public CmsSearchForceUnlockMode getForceunlock() { 1494 1495 return m_forceUnlockMode; 1496 } 1497 1498 /** 1499 * Returns the highlighter.<p> 1500 * 1501 * @return the highlighter 1502 */ 1503 public I_CmsTermHighlighter getHighlighter() { 1504 1505 return m_highlighter; 1506 } 1507 1508 /** 1509 * Returns the Lucene search index configured with the given name.<p> 1510 * The index must exist, otherwise <code>null</code> is returned. 1511 * 1512 * @param indexName then name of the requested search index 1513 * 1514 * @return the Lucene search index configured with the given name 1515 */ 1516 public I_CmsSearchIndex getIndex(String indexName) { 1517 1518 for (I_CmsSearchIndex index : m_indexes) { 1519 if (indexName.equalsIgnoreCase(index.getName())) { 1520 return index; 1521 } 1522 } 1523 return null; 1524 } 1525 1526 /** 1527 * Returns the seconds to wait for an index lock during an update operation.<p> 1528 * 1529 * @return the seconds to wait for an index lock during an update operation 1530 */ 1531 public int getIndexLockMaxWaitSeconds() { 1532 1533 return m_indexLockMaxWaitSeconds; 1534 } 1535 1536 /** 1537 * Returns the names of all configured indexes.<p> 1538 * 1539 * @return list of names 1540 */ 1541 public List<String> getIndexNames() { 1542 1543 List<String> indexNames = new ArrayList<String>(); 1544 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1545 indexNames.add((m_indexes.get(i)).getName()); 1546 } 1547 1548 return indexNames; 1549 } 1550 1551 /** 1552 * Returns the Solr index configured with the given name.<p> 1553 * The index must exist, otherwise <code>null</code> is returned. 1554 * 1555 * @param indexName then name of the requested Solr index 1556 * @return the Solr index configured with the given name 1557 */ 1558 public CmsSolrIndex getIndexSolr(String indexName) { 1559 1560 I_CmsSearchIndex index = getIndex(indexName); 1561 if (index instanceof CmsSolrIndex) { 1562 return (CmsSolrIndex)index; 1563 } 1564 return null; 1565 } 1566 1567 /** 1568 * Returns a search index source for a specified source name.<p> 1569 * 1570 * @param sourceName the name of the index source 1571 * @return a search index source 1572 */ 1573 public CmsSearchIndexSource getIndexSource(String sourceName) { 1574 1575 return m_indexSources.get(sourceName); 1576 } 1577 1578 /** 1579 * Returns the max. excerpt length.<p> 1580 * 1581 * @return the max excerpt length 1582 */ 1583 public int getMaxExcerptLength() { 1584 1585 return m_maxExcerptLength; 1586 } 1587 1588 /** 1589 * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p> 1590 * 1591 * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) 1592 */ 1593 public long getMaxIndexWaitTime() { 1594 1595 return m_maxIndexWaitTime; 1596 } 1597 1598 /** 1599 * Returns the maximum number of modifications before a commit in the search index is triggered.<p> 1600 * 1601 * @return the maximum number of modifications before a commit in the search index is triggered 1602 */ 1603 public int getMaxModificationsBeforeCommit() { 1604 1605 return m_maxModificationsBeforeCommit; 1606 } 1607 1608 /** 1609 * Returns the update frequency of the offline indexer in milliseconds.<p> 1610 * 1611 * @return the update frequency of the offline indexer in milliseconds 1612 */ 1613 public long getOfflineUpdateFrequency() { 1614 1615 return m_offlineUpdateFrequency; 1616 } 1617 1618 /** 1619 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1620 * 1621 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1622 */ 1623 public List<I_CmsSearchIndex> getSearchIndexes() { 1624 1625 return Collections.unmodifiableList(m_indexes); 1626 } 1627 1628 /** 1629 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1630 * 1631 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1632 */ 1633 public List<I_CmsSearchIndex> getSearchIndexesAll() { 1634 1635 return Collections.unmodifiableList(m_indexes); 1636 } 1637 1638 /** 1639 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1640 * 1641 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1642 */ 1643 public List<CmsSolrIndex> getSearchIndexesSolr() { 1644 1645 List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>(); 1646 for (I_CmsSearchIndex index : m_indexes) { 1647 if (index instanceof CmsSolrIndex) { 1648 indexes.add((CmsSolrIndex)index); 1649 } 1650 } 1651 return Collections.unmodifiableList(indexes); 1652 } 1653 1654 /** 1655 * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p> 1656 * 1657 * @return an unmodifiable view (read-only) of the SearchIndexSources Map 1658 */ 1659 public Map<String, CmsSearchIndexSource> getSearchIndexSources() { 1660 1661 return Collections.unmodifiableMap(m_indexSources); 1662 } 1663 1664 /** 1665 * Return singleton instance of the OpenCms spellchecker.<p> 1666 * 1667 * @return instance of CmsSolrSpellchecker. 1668 */ 1669 public CmsSolrSpellchecker getSolrDictionary() { 1670 1671 // get the core container that contains one core for each configured index 1672 if (m_coreContainer == null) { 1673 m_coreContainer = createCoreContainer(); 1674 } 1675 return CmsSolrSpellchecker.getInstance(m_coreContainer); 1676 } 1677 1678 /** 1679 * Returns the Solr configuration.<p> 1680 * 1681 * @return the Solr configuration 1682 */ 1683 public CmsSolrConfiguration getSolrServerConfiguration() { 1684 1685 return m_solrConfig; 1686 } 1687 1688 /** 1689 * Returns the timeout to abandon threads indexing a resource.<p> 1690 * 1691 * @return the timeout to abandon threads indexing a resource 1692 */ 1693 public long getTimeout() { 1694 1695 return m_timeout; 1696 } 1697 1698 /** 1699 * Initializes the search manager.<p> 1700 * 1701 * @param cms the cms object 1702 * 1703 * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions 1704 */ 1705 public void initialize(CmsObject cms) throws CmsRoleViolationException { 1706 1707 OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); 1708 try { 1709 // store the Admin cms to index Cms resources 1710 m_adminCms = OpenCms.initCmsObject(cms); 1711 } catch (CmsException e) { 1712 // this should never happen 1713 LOG.error(e.getLocalizedMessage(), e); 1714 } 1715 // make sure the site root is the root site 1716 m_adminCms.getRequestContext().setSiteRoot("/"); 1717 1718 // create the extraction result cache 1719 m_extractionResultCache = new CmsExtractionResultCache( 1720 OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), 1721 "/extractCache"); 1722 initializeFieldConfigurations(); 1723 initializeIndexes(); 1724 initOfflineIndexes(); 1725 1726 // register this object as event listener 1727 OpenCms.addCmsEventListener( 1728 this, 1729 new int[] { 1730 I_CmsEventListener.EVENT_CLEAR_CACHES, 1731 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 1732 I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES, 1733 I_CmsEventListener.EVENT_REINDEX_OFFLINE, 1734 I_CmsEventListener.EVENT_REINDEX_ONLINE}); 1735 } 1736 1737 /** 1738 * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations. 1739 */ 1740 public void initializeFieldConfigurations() { 1741 1742 for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) { 1743 config.init(); 1744 } 1745 1746 } 1747 1748 /** 1749 * Initializes all configured document types, index sources and search indexes.<p> 1750 * 1751 * This method needs to be called if after a change in the index configuration has been made. 1752 */ 1753 public void initializeIndexes() { 1754 1755 initAvailableDocumentTypes(); 1756 initIndexSources(); 1757 initSearchIndexes(); 1758 } 1759 1760 /** 1761 * Initialize the offline index handler, require after an offline index has been added.<p> 1762 */ 1763 public void initOfflineIndexes() { 1764 1765 // check which indexes are configured as offline indexes 1766 List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>(); 1767 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 1768 while (i.hasNext()) { 1769 I_CmsSearchIndex index = i.next(); 1770 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 1771 // this is an offline index 1772 offlineIndexes.add(index); 1773 } 1774 } 1775 m_offlineIndexes = offlineIndexes; 1776 m_offlineHandler.initialize(); 1777 1778 } 1779 1780 /** 1781 * Initializes the spell check index.<p> 1782 * 1783 * @param adminCms the ROOT_ADMIN cms context 1784 */ 1785 public void initSpellcheckIndex(CmsObject adminCms) { 1786 1787 if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) { 1788 final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary(); 1789 if (spellchecker != null) { 1790 1791 Runnable initRunner = new Runnable() { 1792 1793 public void run() { 1794 1795 try { 1796 spellchecker.parseAndAddDictionaries(adminCms); 1797 } catch (CmsRoleViolationException e) { 1798 LOG.error(e.getLocalizedMessage(), e); 1799 } 1800 } 1801 }; 1802 new Thread(initRunner).start(); 1803 } 1804 } 1805 } 1806 1807 /** 1808 * Returns if the offline indexing is paused.<p> 1809 * 1810 * @return <code>true</code> if the offline indexing is paused 1811 */ 1812 public boolean isOfflineIndexingPaused() { 1813 1814 return m_offlineUpdateFrequency == Long.MAX_VALUE; 1815 } 1816 1817 /** 1818 * Updates the indexes from as a scheduled job.<p> 1819 * 1820 * @param cms the OpenCms user context to use when reading resources from the VFS 1821 * @param parameters the parameters for the scheduled job 1822 * 1823 * @throws Exception if something goes wrong 1824 * 1825 * @return the String to write in the scheduler log 1826 * 1827 * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) 1828 */ 1829 public String launch(CmsObject cms, Map<String, String> parameters) throws Exception { 1830 1831 CmsSearchManager manager = OpenCms.getSearchManager(); 1832 1833 I_CmsReport report = null; 1834 boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); 1835 1836 if (writeLog) { 1837 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 1838 } 1839 1840 List<String> updateList = null; 1841 String indexList = parameters.get(JOB_PARAM_INDEXLIST); 1842 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { 1843 // index list has been provided as job parameter 1844 updateList = new ArrayList<String>(); 1845 String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); 1846 for (int i = 0; i < indexNames.length; i++) { 1847 // check if the index actually exists 1848 if (manager.getIndex(indexNames[i]) != null) { 1849 updateList.add(indexNames[i]); 1850 } else { 1851 if (LOG.isWarnEnabled()) { 1852 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); 1853 } 1854 } 1855 } 1856 } 1857 1858 long startTime = System.currentTimeMillis(); 1859 1860 if (updateList == null) { 1861 // all indexes need to be updated 1862 manager.rebuildAllIndexes(report); 1863 } else { 1864 // rebuild only the selected indexes 1865 manager.rebuildIndexes(updateList, report); 1866 } 1867 1868 long runTime = System.currentTimeMillis() - startTime; 1869 1870 String finishMessage = Messages.get().getBundle().key( 1871 Messages.LOG_REBUILD_INDEXES_FINISHED_1, 1872 CmsStringUtil.formatRuntime(runTime)); 1873 1874 if (LOG.isInfoEnabled()) { 1875 LOG.info(finishMessage); 1876 } 1877 return finishMessage; 1878 } 1879 1880 /** 1881 * Pauses the offline indexing and returns a pause request id that has to be used for resuming offline indexing again.<p> 1882 * May take some time, because the indexes are updated first.<p> 1883 * 1884 *@return the pause request id. The id has to be given to the {@link #resumeOfflineIndexing(CmsUUID)} method to resume offline indexing. 1885 */ 1886 public CmsUUID pauseOfflineIndexing() { 1887 1888 CmsUUID pauseId = new CmsUUID(); 1889 synchronized (m_pauseRequests) { 1890 if (m_pauseRequests.isEmpty()) { 1891 LOG.info("Pausing offline indexing."); 1892 m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; 1893 m_offlineUpdateFrequency = Long.MAX_VALUE; 1894 updateOfflineIndexes(0); 1895 } 1896 m_pauseRequests.add(pauseId); 1897 if (LOG.isDebugEnabled()) { 1898 LOG.debug("Added pause request with id " + pauseId); 1899 } 1900 } 1901 return pauseId; 1902 } 1903 1904 /** 1905 * Rebuilds (if required creates) all configured indexes.<p> 1906 * 1907 * @param report the report object to write messages (or <code>null</code>) 1908 * 1909 * @throws CmsException if something goes wrong 1910 */ 1911 public void rebuildAllIndexes(I_CmsReport report) throws CmsException { 1912 1913 try { 1914 SEARCH_MANAGER_LOCK.lock(); 1915 1916 CmsMessageContainer container = null; 1917 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1918 // iterate all configured search indexes 1919 I_CmsSearchIndex searchIndex = m_indexes.get(i); 1920 try { 1921 // update the index 1922 updateIndex(searchIndex, report, null); 1923 } catch (CmsException e) { 1924 container = new CmsMessageContainer( 1925 Messages.get(), 1926 Messages.ERR_INDEX_REBUILD_ALL_1, 1927 new Object[] {searchIndex.getName()}); 1928 LOG.error( 1929 Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), 1930 e); 1931 } 1932 } 1933 // clean up the extraction result cache 1934 cleanExtractionCache(); 1935 if (container != null) { 1936 // throw stored exception 1937 throw new CmsSearchException(container); 1938 } 1939 } finally { 1940 SEARCH_MANAGER_LOCK.unlock(); 1941 } 1942 } 1943 1944 /** 1945 * Rebuilds (if required creates) the index with the given name.<p> 1946 * 1947 * @param indexName the name of the index to rebuild 1948 * @param report the report object to write messages (or <code>null</code>) 1949 * 1950 * @throws CmsException if something goes wrong 1951 */ 1952 public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { 1953 1954 try { 1955 SEARCH_MANAGER_LOCK.lock(); 1956 // get the search index by name 1957 I_CmsSearchIndex index = getIndex(indexName); 1958 // update the index 1959 updateIndex(index, report, null); 1960 // clean up the extraction result cache 1961 cleanExtractionCache(); 1962 } finally { 1963 SEARCH_MANAGER_LOCK.unlock(); 1964 } 1965 } 1966 1967 /** 1968 * Rebuilds (if required creates) the List of indexes with the given name.<p> 1969 * 1970 * @param indexNames the names (String) of the index to rebuild 1971 * @param report the report object to write messages (or <code>null</code>) 1972 * 1973 * @throws CmsException if something goes wrong 1974 */ 1975 public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException { 1976 1977 try { 1978 SEARCH_MANAGER_LOCK.lock(); 1979 Iterator<String> i = indexNames.iterator(); 1980 while (i.hasNext()) { 1981 String indexName = i.next(); 1982 // get the search index by name 1983 I_CmsSearchIndex index = getIndex(indexName); 1984 if (index != null) { 1985 // update the index 1986 updateIndex(index, report, null); 1987 } else { 1988 if (LOG.isWarnEnabled()) { 1989 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1990 } 1991 } 1992 } 1993 // clean up the extraction result cache 1994 cleanExtractionCache(); 1995 } finally { 1996 SEARCH_MANAGER_LOCK.unlock(); 1997 } 1998 } 1999 2000 /** 2001 * Registers a new Solr core for the given index.<p> 2002 * 2003 * @param index the index to register a new Solr core for 2004 * 2005 * @throws CmsConfigurationException if no Solr server is configured 2006 */ 2007 @SuppressWarnings("resource") 2008 public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { 2009 2010 if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { 2011 // No solr server configured 2012 throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); 2013 } 2014 2015 if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present. 2016 index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build()); 2017 } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present. 2018 // HTTP Server configured 2019 // TODO Implement multi core support for HTTP server 2020 // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml 2021 index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build()); 2022 } else { // Default to the embedded Solr Server 2023 2024 // get the core container that contains one core for each configured index 2025 if (m_coreContainer == null) { 2026 m_coreContainer = createCoreContainer(); 2027 } 2028 2029 // unload the existing core if it exists to avoid problems with forced unlock. 2030 if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) { 2031 m_coreContainer.unload(index.getCoreName(), false, false, true); 2032 } 2033 // ensure that all locks on the index are gone 2034 ensureIndexIsUnlocked(index.getPath()); 2035 2036 // load the core to the container 2037 File dataDir = new File(index.getPath()); 2038 if (!dataDir.exists()) { 2039 dataDir.mkdirs(); 2040 if (CmsLog.INIT.isInfoEnabled()) { 2041 CmsLog.INIT.info( 2042 Messages.get().getBundle().key( 2043 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2044 index.getName(), 2045 index.getPath())); 2046 } 2047 } 2048 File instanceDir = new File( 2049 m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); 2050 if (!instanceDir.exists()) { 2051 instanceDir.mkdirs(); 2052 if (CmsLog.INIT.isInfoEnabled()) { 2053 CmsLog.INIT.info( 2054 Messages.get().getBundle().key( 2055 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2056 index.getName(), 2057 index.getPath())); 2058 } 2059 } 2060 2061 // create the core 2062 // TODO: suboptimal - forces always the same schema 2063 SolrCore core = null; 2064 try { 2065 // creation includes registration. 2066 // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); 2067 Map<String, String> properties = new HashMap<String, String>(3); 2068 properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); 2069 properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); 2070 core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false); 2071 } catch (NullPointerException e) { 2072 if (core != null) { 2073 core.close(); 2074 } 2075 throw new CmsConfigurationException( 2076 Messages.get().container( 2077 Messages.ERR_SOLR_SERVER_NOT_CREATED_3, 2078 index.getName() + " (" + index.getCoreName() + ")", 2079 index.getPath(), 2080 m_solrConfig.getSolrConfigFile().getAbsolutePath()), 2081 e); 2082 } 2083 2084 if (index.isNoSolrServerSet()) { 2085 index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); 2086 } 2087 if (CmsLog.INIT.isInfoEnabled()) { 2088 CmsLog.INIT.info( 2089 Messages.get().getBundle().key( 2090 Messages.INIT_SOLR_SERVER_CREATED_1, 2091 index.getName() + " (" + index.getCoreName() + ")")); 2092 } 2093 } 2094 } 2095 2096 /** 2097 * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p> 2098 * 2099 * @param fieldConfiguration the field configuration to remove from the configuration 2100 * 2101 * @return true if remove was successful, false if preconditions for removal are ok but the given 2102 * field configuration was unknown to the manager. 2103 * 2104 * @throws CmsIllegalStateException if the given field configuration is still used by at least one 2105 * <code>{@link I_CmsSearchIndex}</code>. 2106 * 2107 */ 2108 public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) 2109 throws CmsIllegalStateException { 2110 2111 // never remove the standard field configuration 2112 if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { 2113 throw new CmsIllegalStateException( 2114 Messages.get().container( 2115 Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, 2116 fieldConfiguration.getName())); 2117 } 2118 // validation if removal will be granted 2119 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2120 I_CmsSearchIndex idx; 2121 // the list for collecting indexes that use the given field configuration 2122 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2123 I_CmsSearchFieldConfiguration refFieldConfig; 2124 while (itIndexes.hasNext()) { 2125 idx = itIndexes.next(); 2126 refFieldConfig = idx.getFieldConfiguration(); 2127 if (refFieldConfig.equals(fieldConfiguration)) { 2128 referrers.add(idx); 2129 } 2130 } 2131 if (referrers.size() > 0) { 2132 throw new CmsIllegalStateException( 2133 Messages.get().container( 2134 Messages.ERR_INDEX_CONFIGURATION_DELETE_2, 2135 fieldConfiguration.getName(), 2136 referrers.toString())); 2137 } 2138 2139 // remove operation (no exception) 2140 return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; 2141 2142 } 2143 2144 /** 2145 * Removes a search field from the field configuration.<p> 2146 * 2147 * @param fieldConfiguration the field configuration 2148 * @param field field to remove from the field configuration 2149 * 2150 * @return true if remove was successful, false if preconditions for removal are ok but the given 2151 * field was unknown. 2152 */ 2153 public boolean removeSearchFieldConfigurationField( 2154 I_CmsSearchFieldConfiguration fieldConfiguration, 2155 CmsSearchField field) { 2156 2157 if (LOG.isInfoEnabled()) { 2158 LOG.info( 2159 Messages.get().getBundle().key( 2160 Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, 2161 field.getName(), 2162 fieldConfiguration.getName())); 2163 } 2164 2165 return fieldConfiguration.getFields().remove(field); 2166 } 2167 2168 /** 2169 * Removes a search field mapping from the given field.<p> 2170 * 2171 * @param field the field 2172 * @param mapping mapping to remove from the field 2173 * 2174 * @return true if remove was successful, false if preconditions for removal are ok but the given 2175 * mapping was unknown. 2176 * 2177 * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. 2178 */ 2179 public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) 2180 throws CmsIllegalStateException { 2181 2182 if (field.getMappings().size() < 2) { 2183 throw new CmsIllegalStateException( 2184 Messages.get().container( 2185 Messages.ERR_FIELD_MAPPING_DELETE_2, 2186 mapping.getType().toString(), 2187 field.getName())); 2188 } else { 2189 2190 if (LOG.isInfoEnabled()) { 2191 LOG.info( 2192 Messages.get().getBundle().key( 2193 Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, 2194 mapping.toString(), 2195 field.getName())); 2196 } 2197 return field.getMappings().remove(mapping); 2198 } 2199 } 2200 2201 /** 2202 * Removes a search index from the configuration.<p> 2203 * 2204 * @param searchIndex the search index to remove 2205 */ 2206 public void removeSearchIndex(I_CmsSearchIndex searchIndex) { 2207 2208 // shut down index to remove potential config files of Solr indexes 2209 searchIndex.shutDown(); 2210 if (searchIndex instanceof CmsSolrIndex) { 2211 CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; 2212 m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); 2213 } 2214 m_indexes.remove(searchIndex); 2215 initOfflineIndexes(); 2216 2217 if (LOG.isInfoEnabled()) { 2218 LOG.info( 2219 Messages.get().getBundle().key( 2220 Messages.LOG_REMOVE_SEARCH_INDEX_2, 2221 searchIndex.getName(), 2222 searchIndex.getProject())); 2223 } 2224 } 2225 2226 /** 2227 * Removes all indexes included in the given list (which must contain the name of an index to remove).<p> 2228 * 2229 * @param indexNames the names of the index to remove 2230 */ 2231 public void removeSearchIndexes(List<String> indexNames) { 2232 2233 Iterator<String> i = indexNames.iterator(); 2234 while (i.hasNext()) { 2235 String indexName = i.next(); 2236 // get the search index by name 2237 I_CmsSearchIndex index = getIndex(indexName); 2238 if (index != null) { 2239 // remove the index 2240 removeSearchIndex(index); 2241 } else { 2242 if (LOG.isWarnEnabled()) { 2243 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 2244 } 2245 } 2246 } 2247 } 2248 2249 /** 2250 * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p> 2251 * 2252 * @param indexsource the indexsource to remove from the configuration 2253 * 2254 * @return true if remove was successful, false if preconditions for removal are ok but the given 2255 * searchindex was unknown to the manager. 2256 * 2257 * @throws CmsIllegalStateException if the given indexsource is still used by at least one 2258 * <code>{@link I_CmsSearchIndex}</code>. 2259 * 2260 */ 2261 public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { 2262 2263 // validation if removal will be granted 2264 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2265 I_CmsSearchIndex idx; 2266 // the list for collecting indexes that use the given index source 2267 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2268 // the current list of referred index sources of the iterated index 2269 List<CmsSearchIndexSource> refsources; 2270 while (itIndexes.hasNext()) { 2271 idx = itIndexes.next(); 2272 refsources = idx.getSources(); 2273 if (refsources != null) { 2274 if (refsources.contains(indexsource)) { 2275 referrers.add(idx); 2276 } 2277 } 2278 } 2279 if (referrers.size() > 0) { 2280 throw new CmsIllegalStateException( 2281 Messages.get().container( 2282 Messages.ERR_INDEX_SOURCE_DELETE_2, 2283 indexsource.getName(), 2284 referrers.toString())); 2285 } 2286 2287 // remove operation (no exception) 2288 return m_indexSources.remove(indexsource.getName()) != null; 2289 2290 } 2291 2292 /** 2293 * Resumes offline indexing if it was paused and no pause for another pauseId is still present.<p> 2294 * @param pauseId the id of the pause request, which now allows for resuming. 2295 */ 2296 public void resumeOfflineIndexing(CmsUUID pauseId) { 2297 2298 synchronized (m_pauseRequests) { 2299 if (!m_pauseRequests.contains(pauseId)) { 2300 try { 2301 throw new IllegalArgumentException(); 2302 } catch (IllegalArgumentException e) { 2303 LOG.warn("Cannot resume for pause request " + pauseId + ". The request id is unknown.", e); 2304 } 2305 } else { 2306 m_pauseRequests.remove(pauseId); 2307 if (LOG.isDebugEnabled()) { 2308 LOG.debug( 2309 "Removed pause request " 2310 + pauseId 2311 + " from pause requests. Remaining pauses are: " 2312 + m_pauseRequests); 2313 } 2314 if (m_pauseRequests.isEmpty()) { 2315 LOG.info("Resuming offline indexing."); 2316 setOfflineUpdateFrequency( 2317 m_configuredOfflineIndexingFrequency > 0 2318 ? m_configuredOfflineIndexingFrequency 2319 : DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2320 } 2321 } 2322 } 2323 } 2324 2325 /** 2326 * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p> 2327 * 2328 * @param value the name of the directory below WEB-INF/ where the search indexes are stored 2329 */ 2330 public void setDirectory(String value) { 2331 2332 m_path = value; 2333 } 2334 2335 /** 2336 * Sets the maximum age a text extraction result is kept in the cache (in hours).<p> 2337 * 2338 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2339 */ 2340 public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { 2341 2342 m_extractionCacheMaxAge = extractionCacheMaxAge; 2343 } 2344 2345 /** 2346 * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p> 2347 * 2348 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2349 */ 2350 public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { 2351 2352 try { 2353 setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); 2354 } catch (NumberFormatException e) { 2355 LOG.error( 2356 Messages.get().getBundle().key( 2357 Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, 2358 extractionCacheMaxAge, 2359 Float.valueOf(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), 2360 e); 2361 setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); 2362 } 2363 } 2364 2365 /** 2366 * Sets the unlock mode during indexing.<p> 2367 * 2368 * @param value the value 2369 */ 2370 public void setForceunlock(String value) { 2371 2372 m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); 2373 } 2374 2375 /** 2376 * Sets the highlighter.<p> 2377 * 2378 * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p> 2379 * 2380 * @param highlighter the package/class name of the highlighter 2381 */ 2382 public void setHighlighter(String highlighter) { 2383 2384 try { 2385 m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); 2386 } catch (Exception e) { 2387 m_highlighter = null; 2388 LOG.error(e.getLocalizedMessage(), e); 2389 } 2390 } 2391 2392 /** 2393 * Sets the seconds to wait for an index lock during an update operation.<p> 2394 * 2395 * @param value the seconds to wait for an index lock during an update operation 2396 */ 2397 public void setIndexLockMaxWaitSeconds(int value) { 2398 2399 m_indexLockMaxWaitSeconds = value; 2400 } 2401 2402 /** 2403 * Sets the max. excerpt length.<p> 2404 * 2405 * @param maxExcerptLength the max. excerpt length to set 2406 */ 2407 public void setMaxExcerptLength(int maxExcerptLength) { 2408 2409 m_maxExcerptLength = maxExcerptLength; 2410 } 2411 2412 /** 2413 * Sets the max. excerpt length as a String.<p> 2414 * 2415 * @param maxExcerptLength the max. excerpt length to set 2416 */ 2417 public void setMaxExcerptLength(String maxExcerptLength) { 2418 2419 try { 2420 setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); 2421 } catch (Exception e) { 2422 LOG.error( 2423 Messages.get().getBundle().key( 2424 Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, 2425 maxExcerptLength, 2426 Integer.valueOf(DEFAULT_EXCERPT_LENGTH)), 2427 e); 2428 setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); 2429 } 2430 } 2431 2432 /** 2433 * Sets the maximal wait time for offline index updates after edit operations.<p> 2434 * 2435 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2436 */ 2437 public void setMaxIndexWaitTime(long maxIndexWaitTime) { 2438 2439 m_maxIndexWaitTime = maxIndexWaitTime; 2440 } 2441 2442 /** 2443 * Sets the maximal wait time for offline index updates after edit operations.<p> 2444 * 2445 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2446 */ 2447 public void setMaxIndexWaitTime(String maxIndexWaitTime) { 2448 2449 try { 2450 setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); 2451 } catch (Exception e) { 2452 LOG.error( 2453 Messages.get().getBundle().key( 2454 Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, 2455 maxIndexWaitTime, 2456 Long.valueOf(DEFAULT_MAX_INDEX_WAITTIME)), 2457 e); 2458 setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); 2459 } 2460 } 2461 2462 /** 2463 * Sets the maximum number of modifications before a commit in the search index is triggered.<p> 2464 * 2465 * @param maxModificationsBeforeCommit the maximum number of modifications to set 2466 */ 2467 public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { 2468 2469 m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; 2470 } 2471 2472 /** 2473 * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p> 2474 * 2475 * @param value the maximum number of modifications to set 2476 */ 2477 public void setMaxModificationsBeforeCommit(String value) { 2478 2479 try { 2480 setMaxModificationsBeforeCommit(Integer.parseInt(value)); 2481 } catch (Exception e) { 2482 LOG.error( 2483 Messages.get().getBundle().key( 2484 Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, 2485 value, 2486 Integer.valueOf(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), 2487 e); 2488 setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); 2489 } 2490 } 2491 2492 /** 2493 * Sets the update frequency of the offline indexer in milliseconds.<p> 2494 * 2495 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2496 */ 2497 public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { 2498 2499 m_offlineUpdateFrequency = offlineUpdateFrequency; 2500 updateOfflineIndexes(0); 2501 } 2502 2503 /** 2504 * Sets the update frequency of the offline indexer in milliseconds.<p> 2505 * 2506 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2507 */ 2508 public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { 2509 2510 try { 2511 setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); 2512 } catch (Exception e) { 2513 LOG.error( 2514 Messages.get().getBundle().key( 2515 Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, 2516 offlineUpdateFrequency, 2517 Long.valueOf(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), 2518 e); 2519 setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2520 } 2521 } 2522 2523 /** 2524 * Sets the Solr configuration.<p> 2525 * 2526 * @param config the Solr configuration 2527 */ 2528 public void setSolrServerConfiguration(CmsSolrConfiguration config) { 2529 2530 m_solrConfig = config; 2531 } 2532 2533 /** 2534 * Sets the timeout to abandon threads indexing a resource.<p> 2535 * 2536 * @param value the timeout in milliseconds 2537 */ 2538 public void setTimeout(long value) { 2539 2540 m_timeout = value; 2541 } 2542 2543 /** 2544 * Sets the timeout to abandon threads indexing a resource as a String.<p> 2545 * 2546 * @param value the timeout in milliseconds 2547 */ 2548 public void setTimeout(String value) { 2549 2550 try { 2551 setTimeout(Long.parseLong(value)); 2552 } catch (Exception e) { 2553 LOG.error( 2554 Messages.get().getBundle().key( 2555 Messages.LOG_PARSE_TIMEOUT_FAILED_2, 2556 value, 2557 Long.valueOf(DEFAULT_TIMEOUT)), 2558 e); 2559 setTimeout(DEFAULT_TIMEOUT); 2560 } 2561 } 2562 2563 /** 2564 * Shuts down the search manager.<p> 2565 * 2566 * This will cause all search indices to be shut down.<p> 2567 */ 2568 public void shutDown() { 2569 2570 if (m_offlineIndexThread != null) { 2571 m_offlineIndexThread.shutDown(); 2572 } 2573 2574 if (m_offlineHandler != null) { 2575 OpenCms.removeCmsEventListener(m_offlineHandler); 2576 } 2577 2578 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 2579 while (i.hasNext()) { 2580 I_CmsSearchIndex index = i.next(); 2581 index.shutDown(); 2582 index = null; 2583 } 2584 m_indexes.clear(); 2585 2586 shutDownSolrContainer(); 2587 2588 if (CmsLog.INIT.isInfoEnabled()) { 2589 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); 2590 } 2591 } 2592 2593 /** 2594 * Updates all offline indexes.<p> 2595 * 2596 * Can be used to force an index update when it's not convenient to wait until the 2597 * offline update interval has eclipsed.<p> 2598 * 2599 * Since the offline indexes still need some time to update the new resources, 2600 * the method waits for at most the configurable <code>maxIndexWaitTime</code> 2601 * to ensure that updating is finished. 2602 * 2603 * @see #updateOfflineIndexes(long) 2604 * 2605 */ 2606 public void updateOfflineIndexes() { 2607 2608 updateOfflineIndexes(getMaxIndexWaitTime()); 2609 } 2610 2611 /** 2612 * Updates all offline indexes.<p> 2613 * 2614 * Can be used to force an index update when it's not convenient to wait until the 2615 * offline update interval has eclipsed.<p> 2616 * 2617 * Since the offline index will still need some time to update the new resources even if it runs directly, 2618 * a wait time of 2500 or so should be given in order to make sure the index finished updating. 2619 * 2620 * @param waitTime milliseconds to wait after the offline update index was notified of the changes 2621 */ 2622 public void updateOfflineIndexes(long waitTime) { 2623 2624 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 2625 // notify existing thread of update frequency change 2626 if (LOG.isDebugEnabled()) { 2627 LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); 2628 } 2629 m_offlineIndexThread.interrupt(); 2630 if (waitTime > 0) { 2631 m_offlineIndexThread.getWaitHandle().enter(waitTime); 2632 } 2633 } 2634 } 2635 2636 /** 2637 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2638 * We take transitive dependencies into account and handle cyclic dependencies correctly as well. 2639 * 2640 * @param adminCms an OpenCms user context with Admin permissions 2641 * @param updateResources the resources to be re-indexed 2642 * 2643 * @return the updated list of resource to re-index 2644 */ 2645 protected List<CmsPublishedResource> addAdditionallyAffectedResources( 2646 CmsObject adminCms, 2647 List<CmsPublishedResource> updateResources) { 2648 2649 if (updateResources.size() > 0) { 2650 Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources); 2651 Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet; 2652 Collection<CmsPublishedResource> additionalResources = Collections.emptySet(); 2653 do { 2654 additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck); 2655 additionalResources.addAll( 2656 addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck)); 2657 updateResources.addAll(additionalResources); 2658 updateResourceSet.addAll(additionalResources); 2659 resourcesToCheck = additionalResources; 2660 } while (resourcesToCheck.size() > 0); 2661 } 2662 return updateResources; 2663 } 2664 2665 /** 2666 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2667 * 2668 * @param adminCms an OpenCms user context with Admin permissions 2669 * @param updateResources the resources to be re-indexed 2670 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2671 * 2672 * @return the list of resources that need to be additionally re-index 2673 */ 2674 protected Collection<CmsPublishedResource> addIndexContentRelatedResources( 2675 CmsObject adminCms, 2676 Collection<CmsPublishedResource> updateResources, 2677 Collection<CmsPublishedResource> updateResourcesToCheck) { 2678 2679 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2680 for (CmsPublishedResource checkedRes : updateResourcesToCheck) { 2681 try { 2682 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId()); 2683 filter = filter.filterType(CmsRelationType.INDEX_CONTENT); 2684 List<CmsRelation> relations = adminCms.readRelations(filter); 2685 for (CmsRelation relation : relations) { 2686 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2687 CmsPublishedResource additionalPubRes = new CmsPublishedResource(res); 2688 if (!updateResources.contains(additionalPubRes)) { 2689 additionalUpdateResources.add(additionalPubRes); 2690 } 2691 } 2692 } catch (CmsException e) { 2693 LOG.error(e.getLocalizedMessage(), e); 2694 } 2695 } 2696 return additionalUpdateResources; 2697 } 2698 2699 /** 2700 * Cleans up the extraction result cache.<p> 2701 */ 2702 protected void cleanExtractionCache() { 2703 2704 // clean up the extraction result cache 2705 m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); 2706 } 2707 2708 /** 2709 * Collects the related containerpages to the resources that have been published.<p> 2710 * 2711 * @param adminCms an OpenCms user context with Admin permissions 2712 * @param updateResources the resources to be re-indexed 2713 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2714 * 2715 * @return the list of resources that need to be additionally re-index 2716 */ 2717 protected Collection<CmsPublishedResource> findRelatedContainerPages( 2718 CmsObject adminCms, 2719 Collection<CmsPublishedResource> updateResources, 2720 Collection<CmsPublishedResource> updateResourcesToCheck) { 2721 2722 CmsResourceManager resMan = OpenCms.getResourceManager(); 2723 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2724 2725 Set<CmsResource> containerPages = new HashSet<CmsResource>(); 2726 int containerPageTypeId = -1; 2727 try { 2728 containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); 2729 } catch (CmsLoaderException e) { 2730 // will happen during setup, when container page type is not available yet 2731 LOG.info(e.getLocalizedMessage(), e); 2732 } 2733 if (containerPageTypeId != -1) { 2734 for (CmsPublishedResource pubRes : updateResourcesToCheck) { 2735 try { 2736 if (resMan.getResourceType(pubRes.getType()) instanceof CmsResourceTypeXmlContent) { 2737 if (!isGroup(pubRes.getType())) { 2738 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId( 2739 pubRes.getStructureId()).filterStrong(); 2740 List<CmsRelation> relations = adminCms.readRelations(filter); 2741 for (CmsRelation relation : relations) { 2742 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2743 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2744 containerPages.add(res); 2745 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2746 adminCms, 2747 adminCms.getSitePath(res))) { 2748 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2749 } 2750 } 2751 } 2752 } 2753 } 2754 if (containerPageTypeId == pubRes.getType()) { 2755 addDetailContent( 2756 adminCms, 2757 containerPages, 2758 adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); 2759 } 2760 } catch (CmsException e) { 2761 LOG.error(e.getLocalizedMessage(), e); 2762 } 2763 } 2764 // add all found container pages as published resource objects to the list 2765 for (CmsResource page : containerPages) { 2766 CmsPublishedResource pubCont = new CmsPublishedResource(page); 2767 if (!updateResources.contains(pubCont)) { 2768 // ensure container page is added only once 2769 additionalUpdateResources.add(pubCont); 2770 } 2771 } 2772 } 2773 return additionalUpdateResources; 2774 } 2775 2776 /** 2777 * Returns the set of names of all configured document types.<p> 2778 * 2779 * @return the set of names of all configured document types 2780 */ 2781 protected List<String> getDocumentTypes() { 2782 2783 return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet())); 2784 } 2785 2786 /** 2787 * Returns the a offline project used for offline indexing.<p> 2788 * 2789 * @return the offline project if available 2790 */ 2791 protected CmsProject getOfflineIndexProject() { 2792 2793 CmsProject result = null; 2794 for (I_CmsSearchIndex index : m_offlineIndexes) { 2795 try { 2796 result = m_adminCms.readProject(index.getProject()); 2797 2798 if (!result.isOnlineProject()) { 2799 break; 2800 } 2801 } catch (Exception e) { 2802 // may be a missconfigured index, ignore 2803 LOG.error(e.getLocalizedMessage(), e); 2804 } 2805 } 2806 return result; 2807 } 2808 2809 /** 2810 * Returns a new thread manager for the indexing threads.<p> 2811 * 2812 * @return a new thread manager for the indexing threads 2813 */ 2814 protected CmsIndexingThreadManager getThreadManager() { 2815 2816 return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); 2817 } 2818 2819 /** 2820 * Initializes the available Cms resource types to be indexed.<p> 2821 * 2822 * A map stores document factories keyed by a string representing 2823 * a colon separated list of Cms resource types and/or mimetypes.<p> 2824 * 2825 * The keys of this map are used to trigger a document factory to convert 2826 * a Cms resource into a Lucene index document.<p> 2827 * 2828 * A document factory is a class implementing the interface 2829 * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p> 2830 */ 2831 protected void initAvailableDocumentTypes() { 2832 2833 CmsSearchDocumentType documenttype = null; 2834 String className = null; 2835 String name = null; 2836 I_CmsDocumentFactory documentFactory = null; 2837 List<String> resourceTypes = null; 2838 List<String> mimeTypes = null; 2839 Class<?> c = null; 2840 2841 m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>(); 2842 2843 for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { 2844 2845 documenttype = m_documentTypeConfigs.get(i); 2846 name = documenttype.getName(); 2847 2848 try { 2849 className = documenttype.getClassName(); 2850 resourceTypes = documenttype.getResourceTypes(); 2851 mimeTypes = documenttype.getMimeTypes(); 2852 2853 if (name == null) { 2854 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); 2855 } 2856 if (className == null) { 2857 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); 2858 } 2859 if (resourceTypes.size() == 0) { 2860 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); 2861 } 2862 2863 try { 2864 c = Class.forName(className); 2865 documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( 2866 new Object[] {name}); 2867 } catch (ClassNotFoundException exc) { 2868 throw new CmsIndexException( 2869 Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), 2870 exc); 2871 } catch (Exception exc) { 2872 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); 2873 } 2874 2875 if (documentFactory.isUsingCache()) { 2876 // init cache if used by the factory 2877 documentFactory.setCache(m_extractionResultCache); 2878 } 2879 2880 Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>(); 2881 for (Iterator<String> keyIt = documentFactory.getDocumentKeys( 2882 resourceTypes, 2883 mimeTypes).iterator(); keyIt.hasNext();) { 2884 String key = keyIt.next(); 2885 matchingTypes.put(key, documentFactory); 2886 m_extractionKeys.add(key); 2887 } 2888 m_documentTypes.put(name, matchingTypes); 2889 2890 } catch (CmsException e) { 2891 if (LOG.isWarnEnabled()) { 2892 LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); 2893 } 2894 } 2895 } 2896 } 2897 2898 /** 2899 * Initializes the index sources. 2900 */ 2901 protected void initIndexSources() { 2902 2903 for (CmsSearchIndexSource source : m_indexSources.values()) { 2904 source.init(); 2905 } 2906 } 2907 2908 /** 2909 * Initializes the configured search indexes.<p> 2910 * 2911 * This initializes also the list of Cms resources types 2912 * to be indexed by an index source.<p> 2913 */ 2914 protected void initSearchIndexes() { 2915 2916 I_CmsSearchIndex index = null; 2917 for (int i = 0, n = m_indexes.size(); i < n; i++) { 2918 index = m_indexes.get(i); 2919 // reset disabled flag 2920 index.setEnabled(true); 2921 // check if the index has been configured correctly 2922 if (index.checkConfiguration(m_adminCms)) { 2923 // the index is configured correctly 2924 try { 2925 index.initialize(); 2926 } catch (Exception e) { 2927 if (CmsLog.INIT.isWarnEnabled()) { 2928 // in this case the index will be disabled 2929 CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); 2930 } 2931 } 2932 } 2933 // output a log message if the index was successfully configured or not 2934 if (CmsLog.INIT.isInfoEnabled()) { 2935 if (index.isEnabled()) { 2936 CmsLog.INIT.info( 2937 Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); 2938 } else { 2939 CmsLog.INIT.warn( 2940 Messages.get().getBundle().key( 2941 Messages.INIT_INDEX_NOT_CONFIGURED_2, 2942 index, 2943 index.getProject())); 2944 } 2945 } 2946 } 2947 } 2948 2949 /** 2950 * Checks, if the index should be rebuilt/updated at all by the search manager. 2951 * @param index the index to check. 2952 * @return a flag, indicating if the index should be rebuilt/updated at all. 2953 */ 2954 protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) { 2955 2956 if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) { 2957 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName())); 2958 return false; 2959 } else { 2960 return true; 2961 } 2962 2963 } 2964 2965 /** 2966 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code> 2967 * after resources have been published.<p> 2968 * 2969 * @param adminCms an OpenCms user context with Admin permissions 2970 * @param publishHistoryId the history ID of the published project 2971 * @param report the report to write the output to 2972 */ 2973 protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { 2974 2975 int oldPriority = Thread.currentThread().getPriority(); 2976 try { 2977 SEARCH_MANAGER_LOCK.lock(); 2978 Thread.currentThread().setPriority(Thread.MIN_PRIORITY); 2979 List<CmsPublishedResource> publishedResources; 2980 try { 2981 // read the list of all published resources 2982 publishedResources = adminCms.readPublishedResources(publishHistoryId); 2983 } catch (CmsException e) { 2984 LOG.error( 2985 Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), 2986 e); 2987 return; 2988 } 2989 Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); 2990 // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved 2991 2992 List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>(); 2993 for (CmsPublishedResource res : publishedResources) { 2994 if (res.getState().isUnchanged()) { 2995 // unchanged resources don't need to be indexed after publish 2996 continue; 2997 } 2998 if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { 2999 if (updateResources.contains(res)) { 3000 // resource may have been added as a sibling of another resource 3001 // in this case we make sure to use the value from the publish list because of the "deleted" flag 3002 boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) 3003 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) 3004 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); 3005 // check it this is a moved resource with source / target info, in this case we need both entries 3006 if (!hasMoved) { 3007 // if the resource was moved, we must contain both entries 3008 updateResources.remove(res); 3009 } 3010 // "equals()" implementation of published resource checks for id, 3011 // so the removed value may have a different "deleted" or "modified" status value 3012 updateResources.add(res); 3013 } else { 3014 // resource not yet contained in the list 3015 updateResources.add(res); 3016 // check for the siblings (not for deleted resources, these are already gone) 3017 if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { 3018 // this resource has siblings 3019 try { 3020 // read siblings from the online project 3021 List<CmsResource> siblings = adminCms.readSiblings( 3022 res.getRootPath(), 3023 CmsResourceFilter.ALL); 3024 Iterator<CmsResource> itSib = siblings.iterator(); 3025 while (itSib.hasNext()) { 3026 // check all siblings 3027 CmsResource sibling = itSib.next(); 3028 CmsPublishedResource sib = new CmsPublishedResource(sibling); 3029 if (!updateResources.contains(sib)) { 3030 // ensure sibling is added only once 3031 updateResources.add(sib); 3032 } 3033 } 3034 } catch (CmsException e) { 3035 // ignore, just use the original resource 3036 if (LOG.isWarnEnabled()) { 3037 LOG.warn( 3038 Messages.get().getBundle().key( 3039 Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, 3040 res.getRootPath()), 3041 e); 3042 } 3043 } 3044 } 3045 } 3046 } 3047 } 3048 3049 addAdditionallyAffectedResources(adminCms, updateResources); 3050 updateAllIndexes(adminCms, updateResources, report); 3051 } finally { 3052 SEARCH_MANAGER_LOCK.unlock(); 3053 Thread.currentThread().setPriority(oldPriority); 3054 } 3055 } 3056 3057 /** 3058 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p> 3059 * 3060 * @param adminCms an OpenCms user context with Admin permissions 3061 * @param updateResources the resources to update 3062 * @param report the report to write the output to 3063 */ 3064 protected void updateAllIndexes( 3065 CmsObject adminCms, 3066 List<CmsPublishedResource> updateResources, 3067 I_CmsReport report) { 3068 3069 try { 3070 SEARCH_MANAGER_LOCK.lock(); 3071 if (!updateResources.isEmpty()) { 3072 // sort the resource to update 3073 Collections.sort(updateResources); 3074 // only update the indexes if the list of remaining published resources is not empty 3075 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 3076 while (i.hasNext()) { 3077 I_CmsSearchIndex index = i.next(); 3078 if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { 3079 // only update indexes which have the rebuild mode set to "auto" 3080 try { 3081 updateIndex(index, report, updateResources); 3082 } catch (CmsException e) { 3083 LOG.error( 3084 Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), 3085 e); 3086 } 3087 } 3088 } 3089 } 3090 // clean up the extraction result cache 3091 cleanExtractionCache(); 3092 } finally { 3093 SEARCH_MANAGER_LOCK.unlock(); 3094 } 3095 3096 } 3097 3098 /** 3099 * Updates (if required creates) the index with the given name.<p> 3100 * 3101 * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be 3102 * incrementally updated for these resources only. If this List is <code>null</code> or empty, 3103 * the index will be fully rebuild.<p> 3104 * 3105 * @param index the index to update or rebuild 3106 * @param report the report to write output messages to 3107 * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3108 * 3109 * @throws CmsException if something goes wrong 3110 */ 3111 protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) 3112 throws CmsException { 3113 3114 if (shouldUpdateAtAll(index)) { 3115 try { 3116 SEARCH_MANAGER_LOCK.lock(); 3117 3118 // copy the stored admin context for the indexing 3119 CmsObject cms = OpenCms.initCmsObject(m_adminCms); 3120 // make sure a report is available 3121 if (report == null) { 3122 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 3123 } 3124 3125 // check if the index has been configured correctly 3126 if (!index.checkConfiguration(cms)) { 3127 // the index is disabled 3128 return; 3129 } 3130 3131 // set site root and project for this index 3132 cms.getRequestContext().setSiteRoot("/"); 3133 // switch to the index project 3134 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3135 3136 if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { 3137 // rebuild the complete index 3138 3139 updateIndexCompletely(cms, index, report); 3140 } else { 3141 updateIndexIncremental(cms, index, report, resourcesToIndex); 3142 } 3143 } finally { 3144 SEARCH_MANAGER_LOCK.unlock(); 3145 } 3146 } 3147 } 3148 3149 /** 3150 * The method updates all OpenCms documents that are indexed. 3151 * @param cms the OpenCms user context to use for accessing the VFS 3152 * @param index the index to update 3153 * @param report the report to write output messages to 3154 * @throws CmsIndexException thrown if indexing fails for some reason 3155 */ 3156 @SuppressWarnings("null") 3157 protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report) 3158 throws CmsIndexException { 3159 3160 // create a new thread manager for the indexing threads 3161 CmsIndexingThreadManager threadManager = getThreadManager(); 3162 3163 boolean isOfflineIndex = false; 3164 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 3165 // disable offline indexing while the complete index is rebuild 3166 isOfflineIndex = true; 3167 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL); 3168 // re-initialize the offline indexes, this will disable this offline index 3169 initOfflineIndexes(); 3170 } 3171 3172 I_CmsIndexWriter writer = null; 3173 try { 3174 // create a backup of the existing index 3175 CmsSearchIndex indexInternal = null; 3176 String backup = null; 3177 if (index instanceof CmsSearchIndex) { 3178 indexInternal = (CmsSearchIndex)index; 3179 backup = indexInternal.createIndexBackup(); 3180 if (backup != null) { 3181 indexInternal.indexSearcherOpen(backup); 3182 } 3183 } 3184 3185 // create a new index writer 3186 writer = index.getIndexWriter(report, true); 3187 if (writer instanceof I_CmsSolrIndexWriter) { 3188 try { 3189 ((I_CmsSolrIndexWriter)writer).deleteAllDocuments(); 3190 } catch (IOException e) { 3191 LOG.error(e.getMessage(), e); 3192 } 3193 } 3194 3195 // output start information on the report 3196 report.println( 3197 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), 3198 I_CmsReport.FORMAT_HEADLINE); 3199 3200 // iterate all configured index sources of this index 3201 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3202 while (sources.hasNext()) { 3203 // get the next index source 3204 CmsSearchIndexSource source = sources.next(); 3205 // create the indexer 3206 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3207 // new index creation, use all resources from the index source 3208 indexer.rebuildIndex(writer, threadManager, source); 3209 3210 // wait for indexing threads to finish 3211 while (threadManager.isRunning()) { 3212 try { 3213 Thread.sleep(500); 3214 } catch (InterruptedException e) { 3215 // just continue with the loop after interruption 3216 LOG.info(e.getLocalizedMessage(), e); 3217 } 3218 } 3219 3220 // commit and optimize the index after each index source has been finished 3221 try { 3222 writer.commit(); 3223 } catch (IOException e) { 3224 if (LOG.isWarnEnabled()) { 3225 LOG.warn( 3226 Messages.get().getBundle().key( 3227 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3228 index.getName(), 3229 index.getPath()), 3230 e); 3231 } 3232 } 3233 try { 3234 writer.optimize(); 3235 } catch (IOException e) { 3236 if (LOG.isWarnEnabled()) { 3237 LOG.warn( 3238 Messages.get().getBundle().key( 3239 Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, 3240 index.getName(), 3241 index.getPath()), 3242 e); 3243 } 3244 } 3245 } 3246 3247 // we are sure here that indexInternal is not null 3248 if (backup != null) { 3249 // remove the backup after the files have been re-indexed 3250 indexInternal.indexSearcherClose(); 3251 indexInternal.removeIndexBackup(backup); 3252 } 3253 3254 // output finish information on the report 3255 report.println( 3256 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), 3257 I_CmsReport.FORMAT_HEADLINE); 3258 3259 } finally { 3260 if (writer != null) { 3261 try { 3262 writer.close(); 3263 } catch (IOException e) { 3264 if (LOG.isWarnEnabled()) { 3265 LOG.warn( 3266 Messages.get().getBundle().key( 3267 Messages.LOG_IO_INDEX_WRITER_CLOSE_2, 3268 index.getPath(), 3269 index.getName()), 3270 e); 3271 } 3272 } 3273 } 3274 if (isOfflineIndex) { 3275 // reset the mode of the offline index 3276 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE); 3277 // re-initialize the offline indexes, this will re-enable this index 3278 initOfflineIndexes(); 3279 } 3280 // index has changed - initialize the index searcher instance 3281 index.onIndexChanged(true); 3282 } 3283 3284 // show information about indexing runtime 3285 threadManager.reportStatistics(report); 3286 } 3287 3288 /** 3289 * Incrementally updates the given index.<p> 3290 * 3291 * @param cms the OpenCms user context to use for accessing the VFS 3292 * @param index the index to update 3293 * @param report the report to write output messages to 3294 * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3295 * 3296 * @throws CmsException if something goes wrong 3297 */ 3298 protected void updateIndexIncremental( 3299 CmsObject cms, 3300 I_CmsSearchIndex index, 3301 I_CmsReport report, 3302 List<CmsPublishedResource> resourcesToIndex) 3303 throws CmsException { 3304 3305 try { 3306 SEARCH_MANAGER_LOCK.lock(); 3307 3308 // update the existing index 3309 List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>(); 3310 3311 boolean hasResourcesToDelete = false; 3312 boolean hasResourcesToUpdate = false; 3313 3314 // iterate all configured index sources of this index 3315 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3316 while (sources.hasNext()) { 3317 // get the next index source 3318 CmsSearchIndexSource source = sources.next(); 3319 // create the indexer 3320 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3321 // collect the resources to update 3322 CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); 3323 if (!updateData.isEmpty()) { 3324 // add the update collection to the internal pipeline 3325 updateCollections.add(updateData); 3326 hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); 3327 hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); 3328 } 3329 } 3330 3331 // only start index modification if required 3332 if (hasResourcesToDelete || hasResourcesToUpdate) { 3333 // output start information on the report 3334 report.println( 3335 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), 3336 I_CmsReport.FORMAT_HEADLINE); 3337 3338 I_CmsIndexWriter writer = null; 3339 try { 3340 // obtain an index writer that updates the current index 3341 writer = index.getIndexWriter(report, false); 3342 3343 if (hasResourcesToDelete) { 3344 // delete the resource from the index 3345 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3346 while (i.hasNext()) { 3347 CmsSearchIndexUpdateData updateCollection = i.next(); 3348 if (updateCollection.hasResourcesToDelete()) { 3349 updateCollection.getIndexer().deleteResources( 3350 writer, 3351 updateCollection.getResourcesToDelete()); 3352 } 3353 } 3354 } 3355 3356 if (hasResourcesToUpdate) { 3357 // create a new thread manager 3358 CmsIndexingThreadManager threadManager = getThreadManager(); 3359 3360 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3361 while (i.hasNext()) { 3362 CmsSearchIndexUpdateData updateCollection = i.next(); 3363 if (updateCollection.hasResourceToUpdate()) { 3364 updateCollection.getIndexer().updateResources( 3365 writer, 3366 threadManager, 3367 updateCollection.getResourcesToUpdate()); 3368 } 3369 } 3370 3371 // wait for indexing threads to finish 3372 while (threadManager.isRunning()) { 3373 try { 3374 Thread.sleep(500); 3375 } catch (InterruptedException e) { 3376 // just continue with the loop after interruption 3377 LOG.info(e.getLocalizedMessage(), e); 3378 } 3379 } 3380 } 3381 } finally { 3382 // close the index writer 3383 if (writer != null) { 3384 try { 3385 writer.commit(); 3386 } catch (IOException e) { 3387 LOG.error( 3388 Messages.get().getBundle().key( 3389 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3390 index.getName(), 3391 index.getPath()), 3392 e); 3393 } 3394 } 3395 // index has changed - initialize the index searcher instance 3396 index.onIndexChanged(false); 3397 } 3398 3399 // output finish information on the report 3400 report.println( 3401 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), 3402 I_CmsReport.FORMAT_HEADLINE); 3403 } 3404 } finally { 3405 SEARCH_MANAGER_LOCK.unlock(); 3406 } 3407 } 3408 3409 /** 3410 * Updates the offline search indexes for the given list of resources.<p> 3411 * 3412 * @param report the report to write the index information to 3413 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 3414 */ 3415 protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 3416 3417 CmsObject cms = m_adminCms; 3418 try { 3419 // copy the administration context for the indexing 3420 cms = OpenCms.initCmsObject(m_adminCms); 3421 // set site root and project for this index 3422 cms.getRequestContext().setSiteRoot("/"); 3423 } catch (CmsException e) { 3424 LOG.error(e.getLocalizedMessage(), e); 3425 } 3426 3427 Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator(); 3428 while (j.hasNext()) { 3429 I_CmsSearchIndex index = j.next(); 3430 if (index.getSources() != null) { 3431 try { 3432 // switch to the index project 3433 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3434 updateIndexIncremental(cms, index, report, resourcesToIndex); 3435 } catch (CmsException e) { 3436 LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); 3437 } 3438 } 3439 } 3440 } 3441 3442 /** 3443 * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p> 3444 * 3445 * @param adminCms the cms context 3446 * @param containerPages the containerpages 3447 * @param containerPage the container page site path 3448 */ 3449 private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) { 3450 3451 if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) { 3452 3453 try { 3454 CmsResource detailRes = adminCms.readResource( 3455 CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage), 3456 CmsResourceFilter.IGNORE_EXPIRATION); 3457 containerPages.add(detailRes); 3458 } catch (Throwable e) { 3459 if (LOG.isWarnEnabled()) { 3460 LOG.warn(e.getLocalizedMessage(), e); 3461 } 3462 } 3463 } 3464 } 3465 3466 /** 3467 * Creates the Solr core container.<p> 3468 * 3469 * @return the created core container 3470 */ 3471 private CoreContainer createCoreContainer() { 3472 3473 CoreContainer container = null; 3474 try { 3475 // get the core container 3476 // still no core container: create it 3477 container = CoreContainer.createAndLoad( 3478 Paths.get(m_solrConfig.getHome()), 3479 m_solrConfig.getSolrFile().toPath()); 3480 if (CmsLog.INIT.isInfoEnabled()) { 3481 CmsLog.INIT.info( 3482 Messages.get().getBundle().key( 3483 Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, 3484 m_solrConfig.getHome(), 3485 m_solrConfig.getSolrFile().getName())); 3486 } 3487 } catch (Exception e) { 3488 LOG.error( 3489 Messages.get().getBundle().key( 3490 Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, 3491 m_solrConfig.getSolrFile().getAbsolutePath()), 3492 e); 3493 } 3494 return container; 3495 3496 } 3497 3498 /** 3499 * Remove write.lock file in the data directory to ensure the index is unlocked. 3500 * @param dataDir the data directory of the Solr index that should be unlocked. 3501 */ 3502 private void ensureIndexIsUnlocked(String dataDir) { 3503 3504 Collection<File> lockFiles = new ArrayList<File>(2); 3505 lockFiles.add( 3506 new File( 3507 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock")); 3508 lockFiles.add( 3509 new File( 3510 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck") 3511 + "write.lock")); 3512 for (File lockFile : lockFiles) { 3513 if (lockFile.exists()) { 3514 lockFile.delete(); 3515 LOG.warn( 3516 "Forcely unlocking index with data dir \"" 3517 + dataDir 3518 + "\" by removing file \"" 3519 + lockFile.getAbsolutePath() 3520 + "\"."); 3521 } 3522 } 3523 } 3524 3525 /** 3526 * Returns the report in the given event data, if <code>null</code> 3527 * a new log report is used.<p> 3528 * 3529 * @param event the event to get the report for 3530 * 3531 * @return the report 3532 */ 3533 private I_CmsReport getEventReport(CmsEvent event) { 3534 3535 I_CmsReport report = null; 3536 if (event.getData() != null) { 3537 report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); 3538 } 3539 if (report == null) { 3540 report = new CmsLogReport(Locale.ENGLISH, getClass()); 3541 } 3542 return report; 3543 } 3544 3545 /** 3546 * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p> 3547 * 3548 * @param publishedResources a list of published resources 3549 * 3550 * @return the set of structure ids that satisfy the condition above 3551 */ 3552 private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted( 3553 List<CmsPublishedResource> publishedResources) { 3554 3555 Set<CmsUUID> result = new HashSet<CmsUUID>(); 3556 Set<CmsUUID> deletedSet = new HashSet<CmsUUID>(); 3557 for (CmsPublishedResource pubRes : publishedResources) { 3558 if (pubRes.getState().isNew()) { 3559 result.add(pubRes.getStructureId()); 3560 } 3561 if (pubRes.getState().isDeleted()) { 3562 deletedSet.add(pubRes.getStructureId()); 3563 } 3564 } 3565 result.retainAll(deletedSet); 3566 return result; 3567 } 3568 3569 /** 3570 * Checks if the given type id belongs to a group type. 3571 * 3572 * @param type the type id to check 3573 * @return true if the type is a group type 3574 */ 3575 private boolean isGroup(int type) { 3576 3577 for (String groupType : groupTypes) { 3578 if (OpenCms.getResourceManager().matchResourceType(groupType, type)) { 3579 return true; 3580 } 3581 } 3582 return false; 3583 3584 } 3585 3586 /** 3587 * Shuts down the Solr core container.<p> 3588 */ 3589 private void shutDownSolrContainer() { 3590 3591 if (m_coreContainer != null) { 3592 for (SolrCore core : m_coreContainer.getCores()) { 3593 // do not unload spellcheck core because otherwise the core.properties file is removed 3594 // even when calling m_coreContainer.unload(core.getName(), false, false, false); 3595 if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { 3596 m_coreContainer.unload(core.getName(), false, false, true); 3597 } 3598 } 3599 m_coreContainer.shutdown(); 3600 if (CmsLog.INIT.isInfoEnabled()) { 3601 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); 3602 } 3603 m_coreContainer = null; 3604 } 3605 } 3606 3607}