001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 031import org.opencms.configuration.CmsConfigurationException; 032import org.opencms.db.CmsDriverManager; 033import org.opencms.db.CmsPublishedResource; 034import org.opencms.db.CmsResourceState; 035import org.opencms.file.CmsObject; 036import org.opencms.file.CmsProject; 037import org.opencms.file.CmsResource; 038import org.opencms.file.CmsResourceFilter; 039import org.opencms.file.CmsUser; 040import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 041import org.opencms.file.types.CmsResourceTypeXmlContent; 042import org.opencms.file.types.I_CmsResourceType; 043import org.opencms.i18n.CmsLocaleManager; 044import org.opencms.i18n.CmsMessageContainer; 045import org.opencms.loader.CmsLoaderException; 046import org.opencms.loader.CmsResourceManager; 047import org.opencms.main.CmsBroadcast.ContentMode; 048import org.opencms.main.CmsEvent; 049import org.opencms.main.CmsException; 050import org.opencms.main.CmsIllegalArgumentException; 051import org.opencms.main.CmsIllegalStateException; 052import org.opencms.main.CmsLog; 053import org.opencms.main.I_CmsEventListener; 054import org.opencms.main.OpenCms; 055import org.opencms.main.OpenCmsSolrHandler; 056import org.opencms.relations.CmsRelation; 057import org.opencms.relations.CmsRelationFilter; 058import org.opencms.relations.CmsRelationType; 059import org.opencms.report.CmsLogReport; 060import org.opencms.report.CmsShellLogReport; 061import org.opencms.report.I_CmsReport; 062import org.opencms.scheduler.I_CmsScheduledJob; 063import org.opencms.search.documents.A_CmsVfsDocument; 064import org.opencms.search.documents.CmsExtractionResultCache; 065import org.opencms.search.documents.I_CmsDocumentFactory; 066import org.opencms.search.documents.I_CmsTermHighlighter; 067import org.opencms.search.fields.CmsLuceneField; 068import org.opencms.search.fields.CmsLuceneFieldConfiguration; 069import org.opencms.search.fields.CmsSearchField; 070import org.opencms.search.fields.CmsSearchFieldConfiguration; 071import org.opencms.search.fields.CmsSearchFieldMapping; 072import org.opencms.search.fields.I_CmsSearchFieldConfiguration; 073import org.opencms.search.solr.CmsSolrConfiguration; 074import org.opencms.search.solr.CmsSolrFieldConfiguration; 075import org.opencms.search.solr.CmsSolrIndex; 076import org.opencms.search.solr.I_CmsSolrIndexWriter; 077import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker; 078import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer; 079import org.opencms.security.CmsRole; 080import org.opencms.security.CmsRoleViolationException; 081import org.opencms.util.A_CmsModeStringEnumeration; 082import org.opencms.util.CmsFileUtil; 083import org.opencms.util.CmsStringUtil; 084import org.opencms.util.CmsUUID; 085import org.opencms.util.CmsWaitHandle; 086 087import java.io.File; 088import java.io.IOException; 089import java.nio.file.FileSystems; 090import java.nio.file.Paths; 091import java.util.ArrayList; 092import java.util.Collection; 093import java.util.Collections; 094import java.util.HashMap; 095import java.util.HashSet; 096import java.util.Iterator; 097import java.util.LinkedHashMap; 098import java.util.List; 099import java.util.ListIterator; 100import java.util.Locale; 101import java.util.Map; 102import java.util.Set; 103import java.util.TreeMap; 104import java.util.concurrent.locks.ReentrantLock; 105import java.util.stream.Collectors; 106 107import org.apache.commons.logging.Log; 108import org.apache.lucene.analysis.Analyzer; 109import org.apache.lucene.analysis.CharArraySet; 110import org.apache.lucene.analysis.standard.StandardAnalyzer; 111import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 112import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder; 113import org.apache.solr.core.CoreContainer; 114import org.apache.solr.core.CoreDescriptor; 115import org.apache.solr.core.SolrCore; 116 117/** 118 * Implements the general management and configuration of the search and 119 * indexing facilities in OpenCms.<p> 120 * 121 * @since 6.0.0 122 */ 123public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { 124 125 /** 126 * Enumeration class for force unlock types.<p> 127 */ 128 public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { 129 130 /** Force unlock type "always". */ 131 public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); 132 133 /** Force unlock type "never". */ 134 public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); 135 136 /** Force unlock type "only full". */ 137 public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); 138 139 /** Serializable version id. */ 140 private static final long serialVersionUID = 74746076708908673L; 141 142 /** 143 * Creates a new force unlock type with the given name.<p> 144 * 145 * @param mode the mode id to use 146 */ 147 protected CmsSearchForceUnlockMode(String mode) { 148 149 super(mode); 150 } 151 152 /** 153 * Returns the lock type for the given type value.<p> 154 * 155 * @param type the type value to get the lock type for 156 * 157 * @return the lock type for the given type value 158 */ 159 public static CmsSearchForceUnlockMode valueOf(String type) { 160 161 if (type.equals(ALWAYS.toString())) { 162 return ALWAYS; 163 } else if (type.equals(NEVER.toString())) { 164 return NEVER; 165 } else { 166 return ONLYFULL; 167 } 168 } 169 } 170 171 /** 172 * Handles offline index generation.<p> 173 */ 174 protected class CmsSearchOfflineHandler implements I_CmsEventListener { 175 176 /** Indicates if the event handlers for the offline search have been already registered. */ 177 private boolean m_isEventRegistered; 178 179 /** The list of resources to index. */ 180 private List<CmsPublishedResource> m_resourcesToIndex; 181 182 /** 183 * Initializes the offline index handler.<p> 184 */ 185 protected CmsSearchOfflineHandler() { 186 187 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 188 } 189 190 /** 191 * Implements the event listener of this class.<p> 192 * 193 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 194 */ 195 @SuppressWarnings("unchecked") 196 public void cmsEvent(CmsEvent event) { 197 198 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 199 switch (event.getType()) { 200 case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: 201 case I_CmsEventListener.EVENT_RESOURCE_CREATED: 202 case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: 203 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 204 if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { 205 // skip lock & unlock 206 return; 207 } 208 // skip indexing if flag is set in event 209 Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); 210 if (skip != null) { 211 return; 212 } 213 214 // a resource has been modified - offline indexes require (re)indexing 215 List<CmsResource> resources = Collections.singletonList( 216 (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); 217 reIndexResources(resources); 218 break; 219 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 220 List<CmsResource> eventResources = (List<CmsResource>)event.getData().get( 221 I_CmsEventListener.KEY_RESOURCES); 222 List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources); 223 for (CmsResource res : resourcesToDelete) { 224 if (res.getState().isNew()) { 225 // if the resource is new and a delete action was performed 226 // --> set the state of the resource to deleted 227 res.setState(CmsResourceState.STATE_DELETED); 228 } 229 } 230 reIndexResources(resourcesToDelete); 231 break; 232 case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: 233 if (I_CmsEventListener.VALUE_CREATE_SIBLING.equals(change)) { 234 List<CmsResource> resList = (List<CmsResource>)event.getData().get( 235 I_CmsEventListener.KEY_RESOURCES); 236 if ((resList != null) && (resList.size() >= 3)) { 237 System.out.println("Sibling creation case, resource = " + resList.get(1).getRootPath()); 238 reIndexResources(Collections.singletonList(resList.get(1))); 239 240 } 241 } else { 242 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 243 } 244 break; 245 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 246 case I_CmsEventListener.EVENT_RESOURCE_COPIED: 247 case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: 248 249 // a list of resources has been modified - offline indexes require (re)indexing 250 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 251 break; 252 default: 253 // no operation 254 } 255 } 256 257 /** 258 * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p> 259 * 260 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed 261 */ 262 protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) { 263 264 m_resourcesToIndex.addAll(resourcesToIndex); 265 } 266 267 /** 268 * Returns the list of {@link CmsPublishedResource} objects to index.<p> 269 * 270 * @return the resources to index 271 */ 272 protected List<CmsPublishedResource> getResourcesToIndex() { 273 274 List<CmsPublishedResource> result; 275 synchronized (this) { 276 result = m_resourcesToIndex; 277 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 278 } 279 try { 280 CmsObject cms = m_adminCms; 281 CmsProject offline = getOfflineIndexProject(); 282 if (offline != null) { 283 // switch to the offline project if available 284 cms = OpenCms.initCmsObject(m_adminCms); 285 cms.getRequestContext().setCurrentProject(offline); 286 } 287 addAdditionallyAffectedResources(cms, result); 288 } catch (CmsException e) { 289 LOG.error(e.getLocalizedMessage(), e); 290 } 291 return result; 292 } 293 294 /** 295 * Initializes this offline search handler, registering the event handlers if required.<p> 296 */ 297 protected void initialize() { 298 299 if (m_offlineIndexes.size() > 0) { 300 // there is at least one offline index configured 301 if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { 302 // create the offline indexing thread 303 m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); 304 // start the offline index thread 305 m_offlineIndexThread.start(); 306 } 307 } else { 308 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 309 // no offline indexes but thread still running, stop the thread 310 m_offlineIndexThread.shutDown(); 311 m_offlineIndexThread = null; 312 } 313 } 314 // do this only in case there are offline indexes configured 315 if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { 316 m_isEventRegistered = true; 317 // register this object as event listener 318 OpenCms.addCmsEventListener( 319 this, 320 new int[] { 321 I_CmsEventListener.EVENT_PROPERTY_MODIFIED, 322 I_CmsEventListener.EVENT_RESOURCE_CREATED, 323 I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, 324 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 325 I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, 326 I_CmsEventListener.EVENT_RESOURCE_MOVED, 327 I_CmsEventListener.EVENT_RESOURCE_DELETED, 328 I_CmsEventListener.EVENT_RESOURCE_COPIED, 329 I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); 330 } 331 } 332 333 /** 334 * Updates all offline indexes for the given list of {@link CmsResource} objects.<p> 335 * 336 * @param resources a list of {@link CmsResource} objects to update in the offline indexes 337 */ 338 protected synchronized void reIndexResources(List<CmsResource> resources) { 339 340 List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size()); 341 for (CmsResource res : resources) { 342 CmsPublishedResource pubRes = new CmsPublishedResource(res); 343 resourcesToIndex.add(pubRes); 344 } 345 if (resourcesToIndex.size() > 0) { 346 // add the resources found to the offline index thread 347 addResourcesToIndex(resourcesToIndex); 348 } 349 } 350 } 351 352 /** 353 * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p> 354 */ 355 protected class CmsSearchOfflineIndexThread extends Thread { 356 357 /** The event handler that triggers this thread. */ 358 CmsSearchOfflineHandler m_handler; 359 360 /** Indicates if this thread is still alive. */ 361 boolean m_isAlive; 362 363 /** Indicates that an index update thread is currently running. */ 364 private boolean m_isUpdating; 365 366 /** If true a manual update (after file upload) was triggered. */ 367 private boolean m_updateTriggered; 368 369 /** The wait handle used for signalling when the worker thread has finished. */ 370 private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); 371 372 /** 373 * Constructor.<p> 374 * 375 * @param handler the offline index event handler 376 */ 377 protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { 378 379 super("OpenCms: Offline Search Indexer"); 380 m_handler = handler; 381 } 382 383 /** 384 * Gets the wait handle used for signalling when the worker thread has finished. 385 * 386 * @return the wait handle 387 **/ 388 public CmsWaitHandle getWaitHandle() { 389 390 return m_waitHandle; 391 } 392 393 /** 394 * @see java.lang.Thread#interrupt() 395 */ 396 @Override 397 public void interrupt() { 398 399 super.interrupt(); 400 m_updateTriggered = true; 401 } 402 403 /** 404 * @see java.lang.Thread#run() 405 */ 406 @Override 407 public void run() { 408 409 // create a log report for the output 410 I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); 411 long offlineUpdateFrequency = getOfflineUpdateFrequency(); 412 m_updateTriggered = false; 413 try { 414 while (m_isAlive) { 415 if (!m_updateTriggered) { 416 try { 417 sleep(offlineUpdateFrequency); 418 } catch (InterruptedException e) { 419 // continue the thread after interruption 420 if (!m_isAlive) { 421 // the thread has been shut down while sleeping 422 continue; 423 } 424 if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { 425 // offline update frequency change - clear interrupt status 426 offlineUpdateFrequency = getOfflineUpdateFrequency(); 427 } 428 LOG.info(e.getLocalizedMessage(), e); 429 } 430 } 431 if (m_isAlive) { 432 // set update trigger to false since we do the update now 433 m_updateTriggered = false; 434 // get list of resource to update 435 List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex(); 436 if (resourcesToIndex.size() > 0) { 437 // only start indexing if there is at least one resource 438 startOfflineUpdateThread(report, resourcesToIndex); 439 } else { 440 getWaitHandle().release(); 441 } 442 // this is just called to clear the interrupt status of the thread 443 interrupted(); 444 } 445 } 446 } finally { 447 // make sure that live status is reset in case of Exceptions 448 m_isAlive = false; 449 } 450 451 } 452 453 /** 454 * @see java.lang.Thread#start() 455 */ 456 @Override 457 public synchronized void start() { 458 459 m_isAlive = true; 460 super.start(); 461 } 462 463 /** 464 * Obtains the list of resource to update in the offline index, 465 * then optimizes the list by removing duplicate entries.<p> 466 * 467 * @return the list of resource to update in the offline index 468 */ 469 protected List<CmsPublishedResource> getResourcesToIndex() { 470 471 List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex(); 472 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size()); 473 474 // Reverse to always keep the last list entries 475 Collections.reverse(resourcesToIndex); 476 for (CmsPublishedResource pubRes : resourcesToIndex) { 477 boolean addResource = true; 478 for (CmsPublishedResource resRes : result) { 479 if (pubRes.equals(resRes) 480 && (pubRes.getState() == resRes.getState()) 481 && (pubRes.getMovedState() == resRes.getMovedState()) 482 && pubRes.getRootPath().equals(resRes.getRootPath())) { 483 // resource already in the update list 484 addResource = false; 485 break; 486 } 487 } 488 if (addResource) { 489 result.add(pubRes); 490 } 491 492 } 493 Collections.reverse(result); 494 return changeStateOfMoveOriginsToDeleted(result); 495 } 496 497 /** 498 * Shuts down this offline index thread.<p> 499 */ 500 protected void shutDown() { 501 502 m_isAlive = false; 503 interrupt(); 504 if (m_isUpdating) { 505 long waitTime = getOfflineUpdateFrequency() / 2; 506 int waitSteps = 0; 507 do { 508 try { 509 // wait half the time of the offline index frequency for the thread to finish 510 Thread.sleep(waitTime); 511 } catch (InterruptedException e) { 512 // continue 513 LOG.info(e.getLocalizedMessage(), e); 514 } 515 waitSteps++; 516 // wait 5 times then stop waiting 517 } while ((waitSteps < 5) && m_isUpdating); 518 } 519 } 520 521 /** 522 * Updates the offline search indexes for the given list of resources.<p> 523 * 524 * @param report the report to write the index information to 525 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 526 */ 527 protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 528 529 CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); 530 long startTime = System.currentTimeMillis(); 531 long waitTime = getOfflineUpdateFrequency() / 2; 532 if (LOG.isDebugEnabled()) { 533 LOG.debug( 534 Messages.get().getBundle().key( 535 Messages.LOG_OI_UPDATE_START_1, 536 Integer.valueOf(resourcesToIndex.size()))); 537 } 538 539 m_isUpdating = true; 540 thread.start(); 541 542 do { 543 try { 544 // wait half the time of the offline index frequency for the thread to finish 545 thread.join(waitTime); 546 } catch (InterruptedException e) { 547 // continue 548 LOG.info(e.getLocalizedMessage(), e); 549 } 550 if (thread.isAlive()) { 551 LOG.warn( 552 Messages.get().getBundle().key( 553 Messages.LOG_OI_UPDATE_LONG_2, 554 Integer.valueOf(resourcesToIndex.size()), 555 Long.valueOf(System.currentTimeMillis() - startTime))); 556 } 557 } while (thread.isAlive()); 558 m_isUpdating = false; 559 560 if (LOG.isDebugEnabled()) { 561 LOG.debug( 562 Messages.get().getBundle().key( 563 Messages.LOG_OI_UPDATE_FINISH_2, 564 Integer.valueOf(resourcesToIndex.size()), 565 Long.valueOf(System.currentTimeMillis() - startTime))); 566 } 567 } 568 569 /** 570 * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. 571 * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, 572 * 573 * @param resourcesToIndex the resources to index 574 * 575 * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths 576 */ 577 private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted( 578 List<CmsPublishedResource> resourcesToIndex) { 579 580 Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>(); 581 for (CmsPublishedResource resource : resourcesToIndex) { 582 if (resource.getState().isDeleted()) { 583 // we don't want the last path to be from a deleted resource 584 continue; 585 } 586 lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); 587 } 588 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(); 589 for (CmsPublishedResource resource : resourcesToIndex) { 590 if (resource.getState().isDeleted()) { 591 result.add(resource); 592 continue; 593 } 594 String lastValidPath = lastValidPaths.get(resource.getStructureId()); 595 if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { 596 result.add(resource); 597 } else { 598 result.add( 599 new CmsPublishedResource( 600 resource.getStructureId(), 601 resource.getResourceId(), 602 resource.getPublishTag(), 603 resource.getRootPath(), 604 resource.getType(), 605 resource.isFolder(), 606 CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted 607 resource.getSiblingCount())); 608 } 609 } 610 return result; 611 } 612 } 613 614 /** 615 * An offline index worker Thread runs each time for every offline index update action.<p> 616 * 617 * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid 618 * problems if a single operation "hangs" the Tread.<p> 619 */ 620 protected class CmsSearchOfflineIndexWorkThread extends Thread { 621 622 /** The report to write the index information to. */ 623 I_CmsReport m_report; 624 625 /** The list of {@link CmsPublishedResource} objects to index. */ 626 List<CmsPublishedResource> m_resourcesToIndex; 627 628 /** 629 * Updates the offline search indexes for the given list of resources.<p> 630 * 631 * @param report the report to write the index information to 632 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 633 */ 634 protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 635 636 super("OpenCms: Offline Search Index Worker"); 637 m_report = report; 638 m_resourcesToIndex = resourcesToIndex; 639 } 640 641 /** 642 * @see java.lang.Thread#run() 643 */ 644 @Override 645 public void run() { 646 647 updateIndexOffline(m_report, m_resourcesToIndex); 648 if (m_offlineIndexThread != null) { 649 m_offlineIndexThread.getWaitHandle().release(); 650 } 651 } 652 } 653 654 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 655 private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true); 656 657 /** The default value used for generating search result excerpts (1024 chars). */ 658 public static final int DEFAULT_EXCERPT_LENGTH = 1024; 659 660 /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ 661 public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; 662 663 /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ 664 public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; 665 666 /** The default update frequency for offline indexes (15000 msec = 15 sec). */ 667 public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; 668 669 /** The default maximal wait time for re-indexing after editing a content. */ 670 public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; 671 672 /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ 673 public static final int DEFAULT_TIMEOUT = 60000; 674 675 /** Scheduler parameter: Update only a specified list of indexes. */ 676 public static final String JOB_PARAM_INDEXLIST = "indexList"; 677 678 /** Scheduler parameter: Write the output of the update to the logfile. */ 679 public static final String JOB_PARAM_WRITELOG = "writeLog"; 680 681 /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */ 682 public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; 683 684 /** The log object for this class. */ 685 protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); 686 687 /** List of resource types which represent groups of elements. */ 688 private static final String[] groupTypes = { 689 CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME, 690 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME, 691 CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME}; 692 693 /** The administrator OpenCms user context to access OpenCms VFS resources. */ 694 protected CmsObject m_adminCms; 695 696 /** The list of indexes that are configured for offline index mode. */ 697 protected List<I_CmsSearchIndex> m_offlineIndexes; 698 699 /** The thread used of offline indexing. */ 700 protected CmsSearchOfflineIndexThread m_offlineIndexThread; 701 702 /** Configured analyzers for languages using <analyzer>. */ 703 private HashMap<Locale, CmsSearchAnalyzer> m_analyzers; 704 705 /** Stores the offline update frequency while indexing is paused. */ 706 private long m_configuredOfflineIndexingFrequency; 707 708 /** The Solr core container. */ 709 private CoreContainer m_coreContainer; 710 711 /** A map of document factory configurations. */ 712 private List<CmsSearchDocumentType> m_documentTypeConfigs; 713 714 /** A map of document factories keyed first by their name and then by their extraction keys. */ 715 private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes; 716 717 /** The set of all globally available extraction keys for document factories. */ 718 private Set<String> m_extractionKeys; 719 720 /** The max age for extraction results to remain in the cache. */ 721 private float m_extractionCacheMaxAge; 722 723 /** The cache for the extraction results. */ 724 private CmsExtractionResultCache m_extractionResultCache; 725 726 /** Contains the available field configurations. */ 727 private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations; 728 729 /** The force unlock type. */ 730 private CmsSearchForceUnlockMode m_forceUnlockMode; 731 732 /** The class used to highlight the search terms in the excerpt of a search result. */ 733 private I_CmsTermHighlighter m_highlighter; 734 735 /** A list of search indexes. */ 736 private List<I_CmsSearchIndex> m_indexes; 737 738 /** Seconds to wait for an index lock. */ 739 private int m_indexLockMaxWaitSeconds = 10; 740 741 /** Configured index sources. */ 742 private Map<String, CmsSearchIndexSource> m_indexSources; 743 744 /** The max. char. length of the excerpt in the search result. */ 745 private int m_maxExcerptLength; 746 747 /** The maximum number of modifications before a commit in the search index is triggered. */ 748 private int m_maxModificationsBeforeCommit; 749 750 /** The offline index search handler. */ 751 private CmsSearchOfflineHandler m_offlineHandler; 752 753 /** The update frequency of the offline indexer in milliseconds. */ 754 private long m_offlineUpdateFrequency; 755 756 /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ 757 private long m_maxIndexWaitTime; 758 759 /** Path to index files below WEB-INF/. */ 760 private String m_path; 761 762 /** The Solr configuration. */ 763 private CmsSolrConfiguration m_solrConfig; 764 765 /** Timeout for abandoning indexing thread. */ 766 private long m_timeout; 767 768 /** 769 * Default constructor when called as cron job.<p> 770 */ 771 public CmsSearchManager() { 772 773 m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>(); 774 m_extractionKeys = new HashSet<String>(); 775 m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>(); 776 m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>(); 777 m_indexes = new ArrayList<I_CmsSearchIndex>(); 778 m_indexSources = new TreeMap<String, CmsSearchIndexSource>(); 779 m_offlineHandler = new CmsSearchOfflineHandler(); 780 m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; 781 m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; 782 m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; 783 m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; 784 m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; 785 786 m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>(); 787 // make sure we have a "standard" field configuration 788 addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); 789 790 if (CmsLog.INIT.isInfoEnabled()) { 791 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); 792 } 793 } 794 795 /** 796 * Returns an analyzer for the given class name.<p> 797 * 798 * @param className the class name of the analyzer 799 * 800 * @return the appropriate lucene analyzer 801 * 802 * @throws Exception if something goes wrong 803 */ 804 public static Analyzer getAnalyzer(String className) throws Exception { 805 806 Analyzer analyzer = null; 807 Class<?> analyzerClass; 808 try { 809 analyzerClass = Class.forName(className); 810 } catch (ClassNotFoundException e) { 811 // allow Lucene standard classes to be written in a short form 812 analyzerClass = Class.forName(LUCENE_ANALYZER + className); 813 } 814 815 // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor 816 if (StandardAnalyzer.class.equals(analyzerClass)) { 817 // the Lucene standard analyzer is used - but without any stopwords. 818 analyzer = new StandardAnalyzer(new CharArraySet(0, false)); 819 } else { 820 analyzer = (Analyzer)analyzerClass.newInstance(); 821 } 822 return analyzer; 823 } 824 825 /** 826 * Returns the Solr index configured with the parameters name. 827 * The parameters must contain a key/value pair with an existing 828 * Solr index, otherwise <code>null</code> is returned.<p> 829 * 830 * @param cms the current context 831 * @param params the parameter map 832 * 833 * @return the best matching Solr index 834 */ 835 public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) { 836 837 String indexName = null; 838 CmsSolrIndex index = null; 839 // try to get the index name from the parameters: 'core' or 'index' 840 if (params != null) { 841 indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null 842 ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] 843 : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null 844 ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] 845 : null); 846 } 847 if (indexName == null) { 848 // if no parameter is specified try to use the default online/offline indexes by context 849 indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() 850 ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE 851 : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; 852 } 853 // try to get the index 854 index = OpenCms.getSearchManager().getIndexSolr(indexName); 855 if (index == null) { 856 // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. 857 List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes(); 858 if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { 859 index = solrs.get(0); 860 } 861 } 862 return index; 863 } 864 865 /** 866 * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p> 867 * 868 * @param indexName the name of the index to check 869 * 870 * @return <code>true</code> if the index for the given name is a Lucene index 871 */ 872 public static boolean isLuceneIndex(String indexName) { 873 874 I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); 875 return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex)); 876 } 877 878 /** 879 * Adds an analyzer.<p> 880 * 881 * @param analyzer an analyzer 882 */ 883 public void addAnalyzer(CmsSearchAnalyzer analyzer) { 884 885 m_analyzers.put(analyzer.getLocale(), analyzer); 886 887 if (CmsLog.INIT.isInfoEnabled()) { 888 CmsLog.INIT.info( 889 Messages.get().getBundle().key( 890 Messages.INIT_ADD_ANALYZER_2, 891 analyzer.getLocale(), 892 analyzer.getClassName())); 893 } 894 } 895 896 /** 897 * Adds a document type.<p> 898 * 899 * @param documentType a document type 900 */ 901 public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { 902 903 m_documentTypeConfigs.add(documentType); 904 905 if (CmsLog.INIT.isInfoEnabled()) { 906 CmsLog.INIT.info( 907 Messages.get().getBundle().key( 908 Messages.INIT_SEARCH_DOC_TYPES_2, 909 documentType.getName(), 910 documentType.getClassName())); 911 } 912 } 913 914 /** 915 * Adds a search field configuration to the search manager.<p> 916 * 917 * @param fieldConfiguration the search field configuration to add 918 */ 919 public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) { 920 921 m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); 922 } 923 924 /** 925 * Adds a search index to the configuration.<p> 926 * 927 * @param searchIndex the search index to add 928 */ 929 public void addSearchIndex(I_CmsSearchIndex searchIndex) { 930 931 if (!searchIndex.isInitialized()) { 932 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 933 try { 934 searchIndex.initialize(); 935 } catch (CmsException e) { 936 // should never happen 937 LOG.error(e.getMessage(), e); 938 } 939 } 940 } 941 942 // name: not null or emtpy and unique 943 String name = searchIndex.getName(); 944 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 945 throw new CmsIllegalArgumentException( 946 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 947 } 948 if (m_indexSources.keySet().contains(name)) { 949 throw new CmsIllegalArgumentException( 950 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); 951 } 952 953 m_indexes.add(searchIndex); 954 if (m_adminCms != null) { 955 initOfflineIndexes(); 956 } 957 958 if (CmsLog.INIT.isInfoEnabled()) { 959 CmsLog.INIT.info( 960 Messages.get().getBundle().key( 961 Messages.INIT_ADD_SEARCH_INDEX_2, 962 searchIndex.getName(), 963 searchIndex.getProject())); 964 } 965 } 966 967 /** 968 * Adds a search index source configuration.<p> 969 * 970 * @param searchIndexSource a search index source configuration 971 */ 972 public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { 973 974 m_indexSources.put(searchIndexSource.getName(), searchIndexSource); 975 976 if (CmsLog.INIT.isInfoEnabled()) { 977 CmsLog.INIT.info( 978 Messages.get().getBundle().key( 979 Messages.INIT_SEARCH_INDEX_SOURCE_2, 980 searchIndexSource.getName(), 981 searchIndexSource.getIndexerClassName())); 982 } 983 } 984 985 /** 986 * Implements the event listener of this class.<p> 987 * 988 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 989 */ 990 public void cmsEvent(CmsEvent event) { 991 992 switch (event.getType()) { 993 case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: 994 List<String> indexNames = null; 995 if ((event.getData() != null) 996 && CmsStringUtil.isNotEmptyOrWhitespaceOnly( 997 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { 998 indexNames = CmsStringUtil.splitAsList( 999 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), 1000 ",", 1001 true); 1002 } 1003 try { 1004 if (LOG.isDebugEnabled()) { 1005 LOG.debug( 1006 Messages.get().getBundle().key( 1007 Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, 1008 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1009 new Exception()); 1010 } 1011 if (indexNames == null) { 1012 rebuildAllIndexes(getEventReport(event)); 1013 } else { 1014 rebuildIndexes(indexNames, getEventReport(event)); 1015 } 1016 } catch (CmsException e) { 1017 if (LOG.isErrorEnabled()) { 1018 LOG.error( 1019 Messages.get().getBundle().key( 1020 Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, 1021 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1022 e); 1023 } 1024 } 1025 break; 1026 case I_CmsEventListener.EVENT_CLEAR_CACHES: 1027 if (LOG.isDebugEnabled()) { 1028 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); 1029 } 1030 break; 1031 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 1032 // event data contains a list of the published resources 1033 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 1034 if (LOG.isDebugEnabled()) { 1035 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); 1036 } 1037 updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); 1038 if (LOG.isDebugEnabled()) { 1039 LOG.debug( 1040 Messages.get().getBundle().key( 1041 Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, 1042 publishHistoryId)); 1043 } 1044 break; 1045 case I_CmsEventListener.EVENT_REINDEX_OFFLINE: 1046 case I_CmsEventListener.EVENT_REINDEX_ONLINE: 1047 boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType(); 1048 Map<String, Object> eventData = event.getData(); 1049 CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID); 1050 CmsUser user = null; 1051 if (userId != null) { 1052 try { 1053 user = m_adminCms.readUser(userId); 1054 } catch (Throwable t) { 1055 // should not normally happen 1056 LOG.debug(t.getMessage(), t); 1057 } 1058 } 1059 try { 1060 SEARCH_MANAGER_LOCK.lock(); 1061 if (LOG.isDebugEnabled()) { 1062 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0)); 1063 } 1064 CmsObject cms = m_adminCms; 1065 if (!isOnline) { 1066 OpenCms.initCmsObject(m_adminCms); 1067 cms.getRequestContext().setCurrentProject( 1068 cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID))); 1069 } 1070 @SuppressWarnings("unchecked") 1071 List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES); 1072 I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT); 1073 List<CmsResource> resourcesToIndex = new ArrayList<>(); 1074 for (CmsResource res : resources) { 1075 if (res.isFile()) { 1076 resourcesToIndex.add(res); 1077 } else { 1078 try { 1079 resourcesToIndex.addAll( 1080 cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true)); 1081 } catch (CmsException e) { 1082 LOG.error(e, e); 1083 } 1084 } 1085 } 1086 // we reindex and prevent using cached results 1087 cleanExtractionCache(); 1088 List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map( 1089 res -> new CmsPublishedResource(res)).collect(Collectors.toList()); 1090 if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) { 1091 addAdditionallyAffectedResources(cms, publishedResourcesToIndex); 1092 } 1093 if (isOnline) { 1094 updateAllIndexes( 1095 m_adminCms, 1096 publishedResourcesToIndex, 1097 new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE)); 1098 } else { 1099 updateIndexOffline(report, publishedResourcesToIndex); 1100 } 1101 cms = null; 1102 SEARCH_MANAGER_LOCK.unlock(); 1103 if (null != user) { 1104 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1105 OpenCms.getSessionManager().sendBroadcast( 1106 null, 1107 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0), 1108 user, 1109 ContentMode.html); 1110 } 1111 if (LOG.isDebugEnabled()) { 1112 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0)); 1113 } 1114 1115 } catch (Throwable e) { 1116 if (SEARCH_MANAGER_LOCK.isHeldByCurrentThread()) { 1117 SEARCH_MANAGER_LOCK.unlock(); 1118 } 1119 if (null != user) { 1120 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1121 OpenCms.getSessionManager().sendBroadcast( 1122 null, 1123 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0), 1124 user, 1125 ContentMode.html); 1126 } 1127 if (LOG.isDebugEnabled()) { 1128 LOG.error( 1129 Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()), 1130 e); 1131 } else if (LOG.isErrorEnabled()) { 1132 LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData())); 1133 } 1134 } 1135 break; 1136 default: 1137 // no operation 1138 } 1139 } 1140 1141 /** 1142 * Returns all Solr index.<p> 1143 * 1144 * @return all Solr indexes 1145 */ 1146 public List<CmsSolrIndex> getAllSolrIndexes() { 1147 1148 List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>(); 1149 for (String indexName : getIndexNames()) { 1150 CmsSolrIndex index = getIndexSolr(indexName); 1151 if (index != null) { 1152 result.add(index); 1153 } 1154 } 1155 return result; 1156 } 1157 1158 /** 1159 * Returns an analyzer for the given language.<p> 1160 * 1161 * The analyzer is selected according to the analyzer configuration.<p> 1162 * 1163 * @param locale the locale to get the analyzer for 1164 * @return the appropriate lucene analyzer 1165 * 1166 * @throws CmsSearchException if something goes wrong 1167 */ 1168 public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { 1169 1170 Analyzer analyzer = null; 1171 String className = null; 1172 1173 CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); 1174 if (analyzerConf == null) { 1175 throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); 1176 } 1177 1178 try { 1179 analyzer = getAnalyzer(analyzerConf.getClassName()); 1180 } catch (Exception e) { 1181 throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); 1182 } 1183 1184 return analyzer; 1185 } 1186 1187 /** 1188 * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p> 1189 * 1190 * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. 1191 * 1192 * @return an unmodifiable view of the Analyzers Map 1193 */ 1194 public Map<Locale, CmsSearchAnalyzer> getAnalyzers() { 1195 1196 return Collections.unmodifiableMap(m_analyzers); 1197 } 1198 1199 /** 1200 * Returns the search analyzer for the given locale.<p> 1201 * 1202 * @param locale the locale to get the analyzer for 1203 * 1204 * @return the search analyzer for the given locale 1205 */ 1206 public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { 1207 1208 return m_analyzers.get(locale); 1209 } 1210 1211 /** 1212 * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p> 1213 * 1214 * @return the name of the directory below WEB-INF/ where the search indexes are stored 1215 */ 1216 public String getDirectory() { 1217 1218 return m_path; 1219 } 1220 1221 /** 1222 * Returns the configured Solr home directory <code>null</code> if not set.<p> 1223 * 1224 * @return the Solr home directory 1225 */ 1226 public String getDirectorySolr() { 1227 1228 return m_solrConfig != null ? m_solrConfig.getHome() : null; 1229 } 1230 1231 /** 1232 * Returns the document factory configured under the provided name. 1233 * @param docTypeName the name of the document type. 1234 * @return the factory for the provided name. 1235 */ 1236 public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) { 1237 1238 Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName); 1239 if (factoryMap != null) { 1240 Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator(); 1241 if (factoryIt.hasNext()) { 1242 return factoryMap.values().iterator().next(); 1243 } 1244 } 1245 return null; 1246 } 1247 1248 /** 1249 * Returns a document type config.<p> 1250 * 1251 * @param name the name of the document type config 1252 * @return the document type config. 1253 */ 1254 public CmsSearchDocumentType getDocumentTypeConfig(String name) { 1255 1256 // this is really used only for the search manager GUI, 1257 // so performance is not an issue and no lookup map is generated 1258 for (int i = 0; i < m_documentTypeConfigs.size(); i++) { 1259 CmsSearchDocumentType type = m_documentTypeConfigs.get(i); 1260 if (type.getName().equals(name)) { 1261 return type; 1262 } 1263 } 1264 return null; 1265 } 1266 1267 /** 1268 * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p> 1269 * 1270 * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map 1271 */ 1272 public List<CmsSearchDocumentType> getDocumentTypeConfigs() { 1273 1274 return Collections.unmodifiableList(m_documentTypeConfigs); 1275 } 1276 1277 /** 1278 * Returns the document type keys used to specify the correct document factory. 1279 * 1280 * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys. 1281 * 1282 * @param resource the resource to generate the list of document type keys for. 1283 * @return the document type keys. 1284 */ 1285 public List<String> getDocumentTypeKeys(CmsResource resource) { 1286 1287 // first get the MIME type of the resource 1288 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); 1289 String resourceType = null; 1290 try { 1291 resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); 1292 } catch (CmsLoaderException e) { 1293 // ignore, unknown resource type, resource can not be indexed 1294 LOG.info(e.getLocalizedMessage(), e); 1295 } 1296 return getDocumentTypeKeys(resourceType, mimeType); 1297 } 1298 1299 /** 1300 * Returns the document type keys used to specify the correct document factory. 1301 * One resource typically has more than one key. The document factories are matched 1302 * in the provided order and the first matching factory is used. 1303 * 1304 * The keys for type name "typename" and mimetype "mimetype" would be a subset of: 1305 * <ul> 1306 * <li><code>typename_mimetype</code></li> 1307 * <li><code>typename</code></li> 1308 * <li>if <code>typename</code> is a sub-type of <code>containerpage</code> 1309 * <ul> 1310 * <li><code>containerpage_mimetype</code></li> 1311 * <li><code>containerpage</code></li> 1312 * </ul> 1313 * </li> 1314 * <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code> 1315 * <ul> 1316 * <li><code>xmlcontent_mimetype</code></li> 1317 * <li><code>xmlcontent</code></li> 1318 * </ul> 1319 * </li> 1320 * <li><code>__unconfigured___mimetype</code></li> 1321 * <li><code>__unconfigured__</code></li> 1322 * <li><code>__all___mimetype</code></li> 1323 * <li><code>__all__</code></li> 1324 * <ul> 1325 * Note that all keys except the "__all__"-keys are only added as long as globally 1326 * there is no matching factory for the key. 1327 * This in particular means that a factory matching "typename" will never be used 1328 * if you have a factory for "typename__mimetype" - even if this is not configured 1329 * for the used index source. Eventually, the content will not be indexed in such cases. 1330 * @param resourceType the resource type to generate the list of document type keys for. 1331 * @param mimeType the mime type to generate the list of document type keys for. 1332 * @return the document type keys. 1333 */ 1334 public List<String> getDocumentTypeKeys(String resourceType, String mimeType) { 1335 1336 List<String> result = new ArrayList<>(8); 1337 if (null != resourceType) { 1338 String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); 1339 result.add(currentKey); 1340 if (!m_extractionKeys.contains(currentKey)) { 1341 currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null); 1342 result.add(currentKey); 1343 if (!m_extractionKeys.contains(currentKey)) { 1344 boolean hasGlobalMatch = false; 1345 try { 1346 String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName(); 1347 I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType); 1348 if (!resourceType.equals(containerpageTypeName)) { 1349 if (type instanceof CmsResourceTypeXmlContainerPage) { 1350 if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) { 1351 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType); 1352 result.add(currentKey); 1353 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1354 if (!hasGlobalMatch) { 1355 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null); 1356 result.add(currentKey); 1357 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1358 } 1359 } 1360 } 1361 } 1362 String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName(); 1363 if (!resourceType.equals(containerpageTypeName)) { 1364 if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) { 1365 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType); 1366 result.add(currentKey); 1367 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1368 if (!hasGlobalMatch) { 1369 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null); 1370 result.add(currentKey); 1371 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1372 } 1373 } 1374 } 1375 } catch (Throwable t) { 1376 LOG.warn("Could not read type for name \"" + resourceType + "\".", t); 1377 } 1378 if (!hasGlobalMatch) { 1379 result.add( 1380 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType)); 1381 result.add( 1382 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null)); 1383 } 1384 } 1385 } 1386 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType)); 1387 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null)); 1388 } 1389 return result; 1390 1391 } 1392 1393 /** 1394 * Returns the map from document type keys to document factories with all entries for the provided document type names. 1395 * @param documentTypeNames list of document type names to generate the map for. 1396 * @return the map from document type keys to document factories. 1397 */ 1398 public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) { 1399 1400 Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>(); 1401 if (null != documentTypeNames) { 1402 // Iterate the list in reverse order to prefer factories that are added by document types listed earlier. 1403 ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size()); 1404 while (typesIterator.hasPrevious()) { 1405 Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous()); 1406 if (null != factories) { 1407 result.putAll(factories); 1408 } 1409 } 1410 } 1411 return result; 1412 } 1413 1414 /** 1415 * Returns the maximum age a text extraction result is kept in the cache (in hours).<p> 1416 * 1417 * @return the maximum age a text extraction result is kept in the cache (in hours) 1418 */ 1419 public float getExtractionCacheMaxAge() { 1420 1421 return m_extractionCacheMaxAge; 1422 } 1423 1424 /** 1425 * Returns the search field configuration with the given name.<p> 1426 * 1427 * In case no configuration is available with the given name, <code>null</code> is returned.<p> 1428 * 1429 * @param name the name to get the search field configuration for 1430 * 1431 * @return the search field configuration with the given name 1432 */ 1433 public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) { 1434 1435 return m_fieldConfigurations.get(name); 1436 } 1437 1438 /** 1439 * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p> 1440 * 1441 * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries 1442 */ 1443 public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() { 1444 1445 List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>( 1446 m_fieldConfigurations.values()); 1447 Collections.sort(result); 1448 return Collections.unmodifiableList(result); 1449 } 1450 1451 /** 1452 * Returns the Lucene search field configurations only.<p> 1453 * 1454 * @return the Lucene search field configurations 1455 */ 1456 public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() { 1457 1458 List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>(); 1459 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1460 if (conf instanceof CmsLuceneFieldConfiguration) { 1461 result.add((CmsLuceneFieldConfiguration)conf); 1462 } 1463 } 1464 Collections.sort(result); 1465 return Collections.unmodifiableList(result); 1466 } 1467 1468 /** 1469 * Returns the Solr search field configurations only.<p> 1470 * 1471 * @return the Solr search field configurations 1472 */ 1473 public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() { 1474 1475 List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>(); 1476 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1477 if (conf instanceof CmsSolrFieldConfiguration) { 1478 result.add((CmsSolrFieldConfiguration)conf); 1479 } 1480 } 1481 Collections.sort(result); 1482 return Collections.unmodifiableList(result); 1483 } 1484 1485 /** 1486 * Returns the force unlock mode during indexing.<p> 1487 * 1488 * @return the force unlock mode during indexing 1489 */ 1490 public CmsSearchForceUnlockMode getForceunlock() { 1491 1492 return m_forceUnlockMode; 1493 } 1494 1495 /** 1496 * Returns the highlighter.<p> 1497 * 1498 * @return the highlighter 1499 */ 1500 public I_CmsTermHighlighter getHighlighter() { 1501 1502 return m_highlighter; 1503 } 1504 1505 /** 1506 * Returns the Lucene search index configured with the given name.<p> 1507 * The index must exist, otherwise <code>null</code> is returned. 1508 * 1509 * @param indexName then name of the requested search index 1510 * 1511 * @return the Lucene search index configured with the given name 1512 */ 1513 public I_CmsSearchIndex getIndex(String indexName) { 1514 1515 for (I_CmsSearchIndex index : m_indexes) { 1516 if (indexName.equalsIgnoreCase(index.getName())) { 1517 return index; 1518 } 1519 } 1520 return null; 1521 } 1522 1523 /** 1524 * Returns the seconds to wait for an index lock during an update operation.<p> 1525 * 1526 * @return the seconds to wait for an index lock during an update operation 1527 */ 1528 public int getIndexLockMaxWaitSeconds() { 1529 1530 return m_indexLockMaxWaitSeconds; 1531 } 1532 1533 /** 1534 * Returns the names of all configured indexes.<p> 1535 * 1536 * @return list of names 1537 */ 1538 public List<String> getIndexNames() { 1539 1540 List<String> indexNames = new ArrayList<String>(); 1541 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1542 indexNames.add((m_indexes.get(i)).getName()); 1543 } 1544 1545 return indexNames; 1546 } 1547 1548 /** 1549 * Returns the Solr index configured with the given name.<p> 1550 * The index must exist, otherwise <code>null</code> is returned. 1551 * 1552 * @param indexName then name of the requested Solr index 1553 * @return the Solr index configured with the given name 1554 */ 1555 public CmsSolrIndex getIndexSolr(String indexName) { 1556 1557 I_CmsSearchIndex index = getIndex(indexName); 1558 if (index instanceof CmsSolrIndex) { 1559 return (CmsSolrIndex)index; 1560 } 1561 return null; 1562 } 1563 1564 /** 1565 * Returns a search index source for a specified source name.<p> 1566 * 1567 * @param sourceName the name of the index source 1568 * @return a search index source 1569 */ 1570 public CmsSearchIndexSource getIndexSource(String sourceName) { 1571 1572 return m_indexSources.get(sourceName); 1573 } 1574 1575 /** 1576 * Returns the max. excerpt length.<p> 1577 * 1578 * @return the max excerpt length 1579 */ 1580 public int getMaxExcerptLength() { 1581 1582 return m_maxExcerptLength; 1583 } 1584 1585 /** 1586 * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p> 1587 * 1588 * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) 1589 */ 1590 public long getMaxIndexWaitTime() { 1591 1592 return m_maxIndexWaitTime; 1593 } 1594 1595 /** 1596 * Returns the maximum number of modifications before a commit in the search index is triggered.<p> 1597 * 1598 * @return the maximum number of modifications before a commit in the search index is triggered 1599 */ 1600 public int getMaxModificationsBeforeCommit() { 1601 1602 return m_maxModificationsBeforeCommit; 1603 } 1604 1605 /** 1606 * Returns the update frequency of the offline indexer in milliseconds.<p> 1607 * 1608 * @return the update frequency of the offline indexer in milliseconds 1609 */ 1610 public long getOfflineUpdateFrequency() { 1611 1612 return m_offlineUpdateFrequency; 1613 } 1614 1615 /** 1616 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1617 * 1618 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1619 */ 1620 public List<I_CmsSearchIndex> getSearchIndexes() { 1621 1622 return Collections.unmodifiableList(m_indexes); 1623 } 1624 1625 /** 1626 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1627 * 1628 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1629 */ 1630 public List<I_CmsSearchIndex> getSearchIndexesAll() { 1631 1632 return Collections.unmodifiableList(m_indexes); 1633 } 1634 1635 /** 1636 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1637 * 1638 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1639 */ 1640 public List<CmsSolrIndex> getSearchIndexesSolr() { 1641 1642 List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>(); 1643 for (I_CmsSearchIndex index : m_indexes) { 1644 if (index instanceof CmsSolrIndex) { 1645 indexes.add((CmsSolrIndex)index); 1646 } 1647 } 1648 return Collections.unmodifiableList(indexes); 1649 } 1650 1651 /** 1652 * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p> 1653 * 1654 * @return an unmodifiable view (read-only) of the SearchIndexSources Map 1655 */ 1656 public Map<String, CmsSearchIndexSource> getSearchIndexSources() { 1657 1658 return Collections.unmodifiableMap(m_indexSources); 1659 } 1660 1661 /** 1662 * Return singleton instance of the OpenCms spellchecker.<p> 1663 * 1664 * @return instance of CmsSolrSpellchecker. 1665 */ 1666 public CmsSolrSpellchecker getSolrDictionary() { 1667 1668 // get the core container that contains one core for each configured index 1669 if (m_coreContainer == null) { 1670 m_coreContainer = createCoreContainer(); 1671 } 1672 return CmsSolrSpellchecker.getInstance(m_coreContainer); 1673 } 1674 1675 /** 1676 * Returns the Solr configuration.<p> 1677 * 1678 * @return the Solr configuration 1679 */ 1680 public CmsSolrConfiguration getSolrServerConfiguration() { 1681 1682 return m_solrConfig; 1683 } 1684 1685 /** 1686 * Returns the timeout to abandon threads indexing a resource.<p> 1687 * 1688 * @return the timeout to abandon threads indexing a resource 1689 */ 1690 public long getTimeout() { 1691 1692 return m_timeout; 1693 } 1694 1695 /** 1696 * Initializes the search manager.<p> 1697 * 1698 * @param cms the cms object 1699 * 1700 * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions 1701 */ 1702 public void initialize(CmsObject cms) throws CmsRoleViolationException { 1703 1704 OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); 1705 try { 1706 // store the Admin cms to index Cms resources 1707 m_adminCms = OpenCms.initCmsObject(cms); 1708 } catch (CmsException e) { 1709 // this should never happen 1710 LOG.error(e.getLocalizedMessage(), e); 1711 } 1712 // make sure the site root is the root site 1713 m_adminCms.getRequestContext().setSiteRoot("/"); 1714 1715 // create the extraction result cache 1716 m_extractionResultCache = new CmsExtractionResultCache( 1717 OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), 1718 "/extractCache"); 1719 initializeFieldConfigurations(); 1720 initializeIndexes(); 1721 initOfflineIndexes(); 1722 1723 // register this object as event listener 1724 OpenCms.addCmsEventListener( 1725 this, 1726 new int[] { 1727 I_CmsEventListener.EVENT_CLEAR_CACHES, 1728 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 1729 I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES, 1730 I_CmsEventListener.EVENT_REINDEX_OFFLINE, 1731 I_CmsEventListener.EVENT_REINDEX_ONLINE}); 1732 } 1733 1734 /** 1735 * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations. 1736 */ 1737 public void initializeFieldConfigurations() { 1738 1739 for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) { 1740 config.init(); 1741 } 1742 1743 } 1744 1745 /** 1746 * Initializes all configured document types, index sources and search indexes.<p> 1747 * 1748 * This methods needs to be called if after a change in the index configuration has been made. 1749 */ 1750 public void initializeIndexes() { 1751 1752 initAvailableDocumentTypes(); 1753 initIndexSources(); 1754 initSearchIndexes(); 1755 } 1756 1757 /** 1758 * Initialize the offline index handler, require after an offline index has been added.<p> 1759 */ 1760 public void initOfflineIndexes() { 1761 1762 // check which indexes are configured as offline indexes 1763 List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>(); 1764 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 1765 while (i.hasNext()) { 1766 I_CmsSearchIndex index = i.next(); 1767 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 1768 // this is an offline index 1769 offlineIndexes.add(index); 1770 } 1771 } 1772 m_offlineIndexes = offlineIndexes; 1773 m_offlineHandler.initialize(); 1774 1775 } 1776 1777 /** 1778 * Initializes the spell check index.<p> 1779 * 1780 * @param adminCms the ROOT_ADMIN cms context 1781 */ 1782 public void initSpellcheckIndex(CmsObject adminCms) { 1783 1784 if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) { 1785 final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary(); 1786 if (spellchecker != null) { 1787 1788 Runnable initRunner = new Runnable() { 1789 1790 public void run() { 1791 1792 try { 1793 spellchecker.parseAndAddDictionaries(adminCms); 1794 } catch (CmsRoleViolationException e) { 1795 LOG.error(e.getLocalizedMessage(), e); 1796 } 1797 } 1798 }; 1799 new Thread(initRunner).start(); 1800 } 1801 } 1802 } 1803 1804 /** 1805 * Returns if the offline indexing is paused.<p> 1806 * 1807 * @return <code>true</code> if the offline indexing is paused 1808 */ 1809 public boolean isOfflineIndexingPaused() { 1810 1811 return m_offlineUpdateFrequency == Long.MAX_VALUE; 1812 } 1813 1814 /** 1815 * Updates the indexes from as a scheduled job.<p> 1816 * 1817 * @param cms the OpenCms user context to use when reading resources from the VFS 1818 * @param parameters the parameters for the scheduled job 1819 * 1820 * @throws Exception if something goes wrong 1821 * 1822 * @return the String to write in the scheduler log 1823 * 1824 * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) 1825 */ 1826 public String launch(CmsObject cms, Map<String, String> parameters) throws Exception { 1827 1828 CmsSearchManager manager = OpenCms.getSearchManager(); 1829 1830 I_CmsReport report = null; 1831 boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); 1832 1833 if (writeLog) { 1834 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 1835 } 1836 1837 List<String> updateList = null; 1838 String indexList = parameters.get(JOB_PARAM_INDEXLIST); 1839 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { 1840 // index list has been provided as job parameter 1841 updateList = new ArrayList<String>(); 1842 String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); 1843 for (int i = 0; i < indexNames.length; i++) { 1844 // check if the index actually exists 1845 if (manager.getIndex(indexNames[i]) != null) { 1846 updateList.add(indexNames[i]); 1847 } else { 1848 if (LOG.isWarnEnabled()) { 1849 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); 1850 } 1851 } 1852 } 1853 } 1854 1855 long startTime = System.currentTimeMillis(); 1856 1857 if (updateList == null) { 1858 // all indexes need to be updated 1859 manager.rebuildAllIndexes(report); 1860 } else { 1861 // rebuild only the selected indexes 1862 manager.rebuildIndexes(updateList, report); 1863 } 1864 1865 long runTime = System.currentTimeMillis() - startTime; 1866 1867 String finishMessage = Messages.get().getBundle().key( 1868 Messages.LOG_REBUILD_INDEXES_FINISHED_1, 1869 CmsStringUtil.formatRuntime(runTime)); 1870 1871 if (LOG.isInfoEnabled()) { 1872 LOG.info(finishMessage); 1873 } 1874 return finishMessage; 1875 } 1876 1877 /** 1878 * Pauses the offline indexing.<p> 1879 * May take some time, because the indexes are updated first.<p> 1880 */ 1881 public void pauseOfflineIndexing() { 1882 1883 if (m_offlineUpdateFrequency != Long.MAX_VALUE) { 1884 m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; 1885 m_offlineUpdateFrequency = Long.MAX_VALUE; 1886 updateOfflineIndexes(0); 1887 } 1888 } 1889 1890 /** 1891 * Rebuilds (if required creates) all configured indexes.<p> 1892 * 1893 * @param report the report object to write messages (or <code>null</code>) 1894 * 1895 * @throws CmsException if something goes wrong 1896 */ 1897 public void rebuildAllIndexes(I_CmsReport report) throws CmsException { 1898 1899 try { 1900 SEARCH_MANAGER_LOCK.lock(); 1901 1902 CmsMessageContainer container = null; 1903 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1904 // iterate all configured search indexes 1905 I_CmsSearchIndex searchIndex = m_indexes.get(i); 1906 try { 1907 // update the index 1908 updateIndex(searchIndex, report, null); 1909 } catch (CmsException e) { 1910 container = new CmsMessageContainer( 1911 Messages.get(), 1912 Messages.ERR_INDEX_REBUILD_ALL_1, 1913 new Object[] {searchIndex.getName()}); 1914 LOG.error( 1915 Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), 1916 e); 1917 } 1918 } 1919 // clean up the extraction result cache 1920 cleanExtractionCache(); 1921 if (container != null) { 1922 // throw stored exception 1923 throw new CmsSearchException(container); 1924 } 1925 } finally { 1926 SEARCH_MANAGER_LOCK.unlock(); 1927 } 1928 } 1929 1930 /** 1931 * Rebuilds (if required creates) the index with the given name.<p> 1932 * 1933 * @param indexName the name of the index to rebuild 1934 * @param report the report object to write messages (or <code>null</code>) 1935 * 1936 * @throws CmsException if something goes wrong 1937 */ 1938 public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { 1939 1940 try { 1941 SEARCH_MANAGER_LOCK.lock(); 1942 // get the search index by name 1943 I_CmsSearchIndex index = getIndex(indexName); 1944 // update the index 1945 updateIndex(index, report, null); 1946 // clean up the extraction result cache 1947 cleanExtractionCache(); 1948 } finally { 1949 SEARCH_MANAGER_LOCK.unlock(); 1950 } 1951 } 1952 1953 /** 1954 * Rebuilds (if required creates) the List of indexes with the given name.<p> 1955 * 1956 * @param indexNames the names (String) of the index to rebuild 1957 * @param report the report object to write messages (or <code>null</code>) 1958 * 1959 * @throws CmsException if something goes wrong 1960 */ 1961 public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException { 1962 1963 try { 1964 SEARCH_MANAGER_LOCK.lock(); 1965 Iterator<String> i = indexNames.iterator(); 1966 while (i.hasNext()) { 1967 String indexName = i.next(); 1968 // get the search index by name 1969 I_CmsSearchIndex index = getIndex(indexName); 1970 if (index != null) { 1971 // update the index 1972 updateIndex(index, report, null); 1973 } else { 1974 if (LOG.isWarnEnabled()) { 1975 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 1976 } 1977 } 1978 } 1979 // clean up the extraction result cache 1980 cleanExtractionCache(); 1981 } finally { 1982 SEARCH_MANAGER_LOCK.unlock(); 1983 } 1984 } 1985 1986 /** 1987 * Registers a new Solr core for the given index.<p> 1988 * 1989 * @param index the index to register a new Solr core for 1990 * 1991 * @throws CmsConfigurationException if no Solr server is configured 1992 */ 1993 @SuppressWarnings("resource") 1994 public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { 1995 1996 if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { 1997 // No solr server configured 1998 throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); 1999 } 2000 2001 if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present. 2002 index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build()); 2003 } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present. 2004 // HTTP Server configured 2005 // TODO Implement multi core support for HTTP server 2006 // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml 2007 index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build()); 2008 } else { // Default to the embedded Solr Server 2009 2010 // get the core container that contains one core for each configured index 2011 if (m_coreContainer == null) { 2012 m_coreContainer = createCoreContainer(); 2013 } 2014 2015 // unload the existing core if it exists to avoid problems with forced unlock. 2016 if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) { 2017 m_coreContainer.unload(index.getCoreName(), false, false, true); 2018 } 2019 // ensure that all locks on the index are gone 2020 ensureIndexIsUnlocked(index.getPath()); 2021 2022 // load the core to the container 2023 File dataDir = new File(index.getPath()); 2024 if (!dataDir.exists()) { 2025 dataDir.mkdirs(); 2026 if (CmsLog.INIT.isInfoEnabled()) { 2027 CmsLog.INIT.info( 2028 Messages.get().getBundle().key( 2029 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2030 index.getName(), 2031 index.getPath())); 2032 } 2033 } 2034 File instanceDir = new File( 2035 m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); 2036 if (!instanceDir.exists()) { 2037 instanceDir.mkdirs(); 2038 if (CmsLog.INIT.isInfoEnabled()) { 2039 CmsLog.INIT.info( 2040 Messages.get().getBundle().key( 2041 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2042 index.getName(), 2043 index.getPath())); 2044 } 2045 } 2046 2047 // create the core 2048 // TODO: suboptimal - forces always the same schema 2049 SolrCore core = null; 2050 try { 2051 // creation includes registration. 2052 // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); 2053 Map<String, String> properties = new HashMap<String, String>(3); 2054 properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); 2055 properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); 2056 core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false); 2057 } catch (NullPointerException e) { 2058 if (core != null) { 2059 core.close(); 2060 } 2061 throw new CmsConfigurationException( 2062 Messages.get().container( 2063 Messages.ERR_SOLR_SERVER_NOT_CREATED_3, 2064 index.getName() + " (" + index.getCoreName() + ")", 2065 index.getPath(), 2066 m_solrConfig.getSolrConfigFile().getAbsolutePath()), 2067 e); 2068 } 2069 2070 if (index.isNoSolrServerSet()) { 2071 index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); 2072 } 2073 if (CmsLog.INIT.isInfoEnabled()) { 2074 CmsLog.INIT.info( 2075 Messages.get().getBundle().key( 2076 Messages.INIT_SOLR_SERVER_CREATED_1, 2077 index.getName() + " (" + index.getCoreName() + ")")); 2078 } 2079 } 2080 } 2081 2082 /** 2083 * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p> 2084 * 2085 * @param fieldConfiguration the field configuration to remove from the configuration 2086 * 2087 * @return true if remove was successful, false if preconditions for removal are ok but the given 2088 * field configuration was unknown to the manager. 2089 * 2090 * @throws CmsIllegalStateException if the given field configuration is still used by at least one 2091 * <code>{@link I_CmsSearchIndex}</code>. 2092 * 2093 */ 2094 public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) 2095 throws CmsIllegalStateException { 2096 2097 // never remove the standard field configuration 2098 if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { 2099 throw new CmsIllegalStateException( 2100 Messages.get().container( 2101 Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, 2102 fieldConfiguration.getName())); 2103 } 2104 // validation if removal will be granted 2105 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2106 I_CmsSearchIndex idx; 2107 // the list for collecting indexes that use the given field configuration 2108 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2109 I_CmsSearchFieldConfiguration refFieldConfig; 2110 while (itIndexes.hasNext()) { 2111 idx = itIndexes.next(); 2112 refFieldConfig = idx.getFieldConfiguration(); 2113 if (refFieldConfig.equals(fieldConfiguration)) { 2114 referrers.add(idx); 2115 } 2116 } 2117 if (referrers.size() > 0) { 2118 throw new CmsIllegalStateException( 2119 Messages.get().container( 2120 Messages.ERR_INDEX_CONFIGURATION_DELETE_2, 2121 fieldConfiguration.getName(), 2122 referrers.toString())); 2123 } 2124 2125 // remove operation (no exception) 2126 return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; 2127 2128 } 2129 2130 /** 2131 * Removes a search field from the field configuration.<p> 2132 * 2133 * @param fieldConfiguration the field configuration 2134 * @param field field to remove from the field configuration 2135 * 2136 * @return true if remove was successful, false if preconditions for removal are ok but the given 2137 * field was unknown. 2138 */ 2139 public boolean removeSearchFieldConfigurationField( 2140 I_CmsSearchFieldConfiguration fieldConfiguration, 2141 CmsSearchField field) { 2142 2143 if (LOG.isInfoEnabled()) { 2144 LOG.info( 2145 Messages.get().getBundle().key( 2146 Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, 2147 field.getName(), 2148 fieldConfiguration.getName())); 2149 } 2150 2151 return fieldConfiguration.getFields().remove(field); 2152 } 2153 2154 /** 2155 * Removes a search field mapping from the given field.<p> 2156 * 2157 * @param field the field 2158 * @param mapping mapping to remove from the field 2159 * 2160 * @return true if remove was successful, false if preconditions for removal are ok but the given 2161 * mapping was unknown. 2162 * 2163 * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. 2164 */ 2165 public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) 2166 throws CmsIllegalStateException { 2167 2168 if (field.getMappings().size() < 2) { 2169 throw new CmsIllegalStateException( 2170 Messages.get().container( 2171 Messages.ERR_FIELD_MAPPING_DELETE_2, 2172 mapping.getType().toString(), 2173 field.getName())); 2174 } else { 2175 2176 if (LOG.isInfoEnabled()) { 2177 LOG.info( 2178 Messages.get().getBundle().key( 2179 Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, 2180 mapping.toString(), 2181 field.getName())); 2182 } 2183 return field.getMappings().remove(mapping); 2184 } 2185 } 2186 2187 /** 2188 * Removes a search index from the configuration.<p> 2189 * 2190 * @param searchIndex the search index to remove 2191 */ 2192 public void removeSearchIndex(I_CmsSearchIndex searchIndex) { 2193 2194 // shut down index to remove potential config files of Solr indexes 2195 searchIndex.shutDown(); 2196 if (searchIndex instanceof CmsSolrIndex) { 2197 CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; 2198 m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); 2199 } 2200 m_indexes.remove(searchIndex); 2201 initOfflineIndexes(); 2202 2203 if (LOG.isInfoEnabled()) { 2204 LOG.info( 2205 Messages.get().getBundle().key( 2206 Messages.LOG_REMOVE_SEARCH_INDEX_2, 2207 searchIndex.getName(), 2208 searchIndex.getProject())); 2209 } 2210 } 2211 2212 /** 2213 * Removes all indexes included in the given list (which must contain the name of an index to remove).<p> 2214 * 2215 * @param indexNames the names of the index to remove 2216 */ 2217 public void removeSearchIndexes(List<String> indexNames) { 2218 2219 Iterator<String> i = indexNames.iterator(); 2220 while (i.hasNext()) { 2221 String indexName = i.next(); 2222 // get the search index by name 2223 I_CmsSearchIndex index = getIndex(indexName); 2224 if (index != null) { 2225 // remove the index 2226 removeSearchIndex(index); 2227 } else { 2228 if (LOG.isWarnEnabled()) { 2229 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 2230 } 2231 } 2232 } 2233 } 2234 2235 /** 2236 * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p> 2237 * 2238 * @param indexsource the indexsource to remove from the configuration 2239 * 2240 * @return true if remove was successful, false if preconditions for removal are ok but the given 2241 * searchindex was unknown to the manager. 2242 * 2243 * @throws CmsIllegalStateException if the given indexsource is still used by at least one 2244 * <code>{@link I_CmsSearchIndex}</code>. 2245 * 2246 */ 2247 public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { 2248 2249 // validation if removal will be granted 2250 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2251 I_CmsSearchIndex idx; 2252 // the list for collecting indexes that use the given index source 2253 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2254 // the current list of referred index sources of the iterated index 2255 List<CmsSearchIndexSource> refsources; 2256 while (itIndexes.hasNext()) { 2257 idx = itIndexes.next(); 2258 refsources = idx.getSources(); 2259 if (refsources != null) { 2260 if (refsources.contains(indexsource)) { 2261 referrers.add(idx); 2262 } 2263 } 2264 } 2265 if (referrers.size() > 0) { 2266 throw new CmsIllegalStateException( 2267 Messages.get().container( 2268 Messages.ERR_INDEX_SOURCE_DELETE_2, 2269 indexsource.getName(), 2270 referrers.toString())); 2271 } 2272 2273 // remove operation (no exception) 2274 return m_indexSources.remove(indexsource.getName()) != null; 2275 2276 } 2277 2278 /** 2279 * Resumes offline indexing if it was paused.<p> 2280 */ 2281 public void resumeOfflineIndexing() { 2282 2283 if (m_offlineUpdateFrequency == Long.MAX_VALUE) { 2284 setOfflineUpdateFrequency( 2285 m_configuredOfflineIndexingFrequency > 0 2286 ? m_configuredOfflineIndexingFrequency 2287 : DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2288 } 2289 } 2290 2291 /** 2292 * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p> 2293 * 2294 * @param value the name of the directory below WEB-INF/ where the search indexes are stored 2295 */ 2296 public void setDirectory(String value) { 2297 2298 m_path = value; 2299 } 2300 2301 /** 2302 * Sets the maximum age a text extraction result is kept in the cache (in hours).<p> 2303 * 2304 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2305 */ 2306 public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { 2307 2308 m_extractionCacheMaxAge = extractionCacheMaxAge; 2309 } 2310 2311 /** 2312 * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p> 2313 * 2314 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2315 */ 2316 public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { 2317 2318 try { 2319 setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); 2320 } catch (NumberFormatException e) { 2321 LOG.error( 2322 Messages.get().getBundle().key( 2323 Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, 2324 extractionCacheMaxAge, 2325 new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), 2326 e); 2327 setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); 2328 } 2329 } 2330 2331 /** 2332 * Sets the unlock mode during indexing.<p> 2333 * 2334 * @param value the value 2335 */ 2336 public void setForceunlock(String value) { 2337 2338 m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); 2339 } 2340 2341 /** 2342 * Sets the highlighter.<p> 2343 * 2344 * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p> 2345 * 2346 * @param highlighter the package/class name of the highlighter 2347 */ 2348 public void setHighlighter(String highlighter) { 2349 2350 try { 2351 m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); 2352 } catch (Exception e) { 2353 m_highlighter = null; 2354 LOG.error(e.getLocalizedMessage(), e); 2355 } 2356 } 2357 2358 /** 2359 * Sets the seconds to wait for an index lock during an update operation.<p> 2360 * 2361 * @param value the seconds to wait for an index lock during an update operation 2362 */ 2363 public void setIndexLockMaxWaitSeconds(int value) { 2364 2365 m_indexLockMaxWaitSeconds = value; 2366 } 2367 2368 /** 2369 * Sets the max. excerpt length.<p> 2370 * 2371 * @param maxExcerptLength the max. excerpt length to set 2372 */ 2373 public void setMaxExcerptLength(int maxExcerptLength) { 2374 2375 m_maxExcerptLength = maxExcerptLength; 2376 } 2377 2378 /** 2379 * Sets the max. excerpt length as a String.<p> 2380 * 2381 * @param maxExcerptLength the max. excerpt length to set 2382 */ 2383 public void setMaxExcerptLength(String maxExcerptLength) { 2384 2385 try { 2386 setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); 2387 } catch (Exception e) { 2388 LOG.error( 2389 Messages.get().getBundle().key( 2390 Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, 2391 maxExcerptLength, 2392 new Integer(DEFAULT_EXCERPT_LENGTH)), 2393 e); 2394 setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); 2395 } 2396 } 2397 2398 /** 2399 * Sets the maximal wait time for offline index updates after edit operations.<p> 2400 * 2401 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2402 */ 2403 public void setMaxIndexWaitTime(long maxIndexWaitTime) { 2404 2405 m_maxIndexWaitTime = maxIndexWaitTime; 2406 } 2407 2408 /** 2409 * Sets the maximal wait time for offline index updates after edit operations.<p> 2410 * 2411 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2412 */ 2413 public void setMaxIndexWaitTime(String maxIndexWaitTime) { 2414 2415 try { 2416 setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); 2417 } catch (Exception e) { 2418 LOG.error( 2419 Messages.get().getBundle().key( 2420 Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, 2421 maxIndexWaitTime, 2422 new Long(DEFAULT_MAX_INDEX_WAITTIME)), 2423 e); 2424 setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); 2425 } 2426 } 2427 2428 /** 2429 * Sets the maximum number of modifications before a commit in the search index is triggered.<p> 2430 * 2431 * @param maxModificationsBeforeCommit the maximum number of modifications to set 2432 */ 2433 public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { 2434 2435 m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; 2436 } 2437 2438 /** 2439 * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p> 2440 * 2441 * @param value the maximum number of modifications to set 2442 */ 2443 public void setMaxModificationsBeforeCommit(String value) { 2444 2445 try { 2446 setMaxModificationsBeforeCommit(Integer.parseInt(value)); 2447 } catch (Exception e) { 2448 LOG.error( 2449 Messages.get().getBundle().key( 2450 Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, 2451 value, 2452 new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), 2453 e); 2454 setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); 2455 } 2456 } 2457 2458 /** 2459 * Sets the update frequency of the offline indexer in milliseconds.<p> 2460 * 2461 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2462 */ 2463 public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { 2464 2465 m_offlineUpdateFrequency = offlineUpdateFrequency; 2466 updateOfflineIndexes(0); 2467 } 2468 2469 /** 2470 * Sets the update frequency of the offline indexer in milliseconds.<p> 2471 * 2472 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2473 */ 2474 public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { 2475 2476 try { 2477 setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); 2478 } catch (Exception e) { 2479 LOG.error( 2480 Messages.get().getBundle().key( 2481 Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, 2482 offlineUpdateFrequency, 2483 new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), 2484 e); 2485 setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2486 } 2487 } 2488 2489 /** 2490 * Sets the Solr configuration.<p> 2491 * 2492 * @param config the Solr configuration 2493 */ 2494 public void setSolrServerConfiguration(CmsSolrConfiguration config) { 2495 2496 m_solrConfig = config; 2497 } 2498 2499 /** 2500 * Sets the timeout to abandon threads indexing a resource.<p> 2501 * 2502 * @param value the timeout in milliseconds 2503 */ 2504 public void setTimeout(long value) { 2505 2506 m_timeout = value; 2507 } 2508 2509 /** 2510 * Sets the timeout to abandon threads indexing a resource as a String.<p> 2511 * 2512 * @param value the timeout in milliseconds 2513 */ 2514 public void setTimeout(String value) { 2515 2516 try { 2517 setTimeout(Long.parseLong(value)); 2518 } catch (Exception e) { 2519 LOG.error( 2520 Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)), 2521 e); 2522 setTimeout(DEFAULT_TIMEOUT); 2523 } 2524 } 2525 2526 /** 2527 * Shuts down the search manager.<p> 2528 * 2529 * This will cause all search indices to be shut down.<p> 2530 */ 2531 public void shutDown() { 2532 2533 if (m_offlineIndexThread != null) { 2534 m_offlineIndexThread.shutDown(); 2535 } 2536 2537 if (m_offlineHandler != null) { 2538 OpenCms.removeCmsEventListener(m_offlineHandler); 2539 } 2540 2541 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 2542 while (i.hasNext()) { 2543 I_CmsSearchIndex index = i.next(); 2544 index.shutDown(); 2545 index = null; 2546 } 2547 m_indexes.clear(); 2548 2549 shutDownSolrContainer(); 2550 2551 if (CmsLog.INIT.isInfoEnabled()) { 2552 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); 2553 } 2554 } 2555 2556 /** 2557 * Updates all offline indexes.<p> 2558 * 2559 * Can be used to force an index update when it's not convenient to wait until the 2560 * offline update interval has eclipsed.<p> 2561 * 2562 * Since the offline indexes still need some time to update the new resources, 2563 * the method waits for at most the configurable <code>maxIndexWaitTime</code> 2564 * to ensure that updating is finished. 2565 * 2566 * @see #updateOfflineIndexes(long) 2567 * 2568 */ 2569 public void updateOfflineIndexes() { 2570 2571 updateOfflineIndexes(getMaxIndexWaitTime()); 2572 } 2573 2574 /** 2575 * Updates all offline indexes.<p> 2576 * 2577 * Can be used to force an index update when it's not convenient to wait until the 2578 * offline update interval has eclipsed.<p> 2579 * 2580 * Since the offline index will still need some time to update the new resources even if it runs directly, 2581 * a wait time of 2500 or so should be given in order to make sure the index finished updating. 2582 * 2583 * @param waitTime milliseconds to wait after the offline update index was notified of the changes 2584 */ 2585 public void updateOfflineIndexes(long waitTime) { 2586 2587 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 2588 // notify existing thread of update frequency change 2589 if (LOG.isDebugEnabled()) { 2590 LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); 2591 } 2592 m_offlineIndexThread.interrupt(); 2593 if (waitTime > 0) { 2594 m_offlineIndexThread.getWaitHandle().enter(waitTime); 2595 } 2596 } 2597 } 2598 2599 /** 2600 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2601 * We take transitive dependencies into account and handle cyclic dependencies correctly as well. 2602 * 2603 * @param adminCms an OpenCms user context with Admin permissions 2604 * @param updateResources the resources to be re-indexed 2605 * 2606 * @return the updated list of resource to re-index 2607 */ 2608 protected List<CmsPublishedResource> addAdditionallyAffectedResources( 2609 CmsObject adminCms, 2610 List<CmsPublishedResource> updateResources) { 2611 2612 if (updateResources.size() > 0) { 2613 Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources); 2614 Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet; 2615 Collection<CmsPublishedResource> additionalResources = Collections.emptySet(); 2616 do { 2617 additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck); 2618 additionalResources.addAll( 2619 addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck)); 2620 updateResources.addAll(additionalResources); 2621 updateResourceSet.addAll(additionalResources); 2622 resourcesToCheck = additionalResources; 2623 } while (resourcesToCheck.size() > 0); 2624 } 2625 return updateResources; 2626 } 2627 2628 /** 2629 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2630 * 2631 * @param adminCms an OpenCms user context with Admin permissions 2632 * @param updateResources the resources to be re-indexed 2633 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2634 * 2635 * @return the list of resources that need to be additionally re-index 2636 */ 2637 protected Collection<CmsPublishedResource> addIndexContentRelatedResources( 2638 CmsObject adminCms, 2639 Collection<CmsPublishedResource> updateResources, 2640 Collection<CmsPublishedResource> updateResourcesToCheck) { 2641 2642 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2643 for (CmsPublishedResource checkedRes : updateResourcesToCheck) { 2644 try { 2645 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId()); 2646 filter = filter.filterType(CmsRelationType.INDEX_CONTENT); 2647 List<CmsRelation> relations = adminCms.readRelations(filter); 2648 for (CmsRelation relation : relations) { 2649 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2650 CmsPublishedResource additionalPubRes = new CmsPublishedResource(res); 2651 if (!updateResources.contains(additionalPubRes)) { 2652 additionalUpdateResources.add(additionalPubRes); 2653 } 2654 } 2655 } catch (CmsException e) { 2656 LOG.error(e.getLocalizedMessage(), e); 2657 } 2658 } 2659 return additionalUpdateResources; 2660 } 2661 2662 /** 2663 * Cleans up the extraction result cache.<p> 2664 */ 2665 protected void cleanExtractionCache() { 2666 2667 // clean up the extraction result cache 2668 m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); 2669 } 2670 2671 /** 2672 * Collects the related containerpages to the resources that have been published.<p> 2673 * 2674 * @param adminCms an OpenCms user context with Admin permissions 2675 * @param updateResources the resources to be re-indexed 2676 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2677 * 2678 * @return the list of resources that need to be additionally re-index 2679 */ 2680 protected Collection<CmsPublishedResource> findRelatedContainerPages( 2681 CmsObject adminCms, 2682 Collection<CmsPublishedResource> updateResources, 2683 Collection<CmsPublishedResource> updateResourcesToCheck) { 2684 2685 CmsResourceManager resMan = OpenCms.getResourceManager(); 2686 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2687 2688 Set<CmsResource> containerPages = new HashSet<CmsResource>(); 2689 int containerPageTypeId = -1; 2690 try { 2691 containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); 2692 } catch (CmsLoaderException e) { 2693 // will happen during setup, when container page type is not available yet 2694 LOG.info(e.getLocalizedMessage(), e); 2695 } 2696 if (containerPageTypeId != -1) { 2697 for (CmsPublishedResource pubRes : updateResourcesToCheck) { 2698 try { 2699 if (resMan.getResourceType(pubRes.getType()) instanceof CmsResourceTypeXmlContent) { 2700 if (!isGroup(pubRes.getType())) { 2701 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId( 2702 pubRes.getStructureId()).filterStrong(); 2703 List<CmsRelation> relations = adminCms.readRelations(filter); 2704 for (CmsRelation relation : relations) { 2705 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2706 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2707 containerPages.add(res); 2708 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2709 adminCms, 2710 adminCms.getSitePath(res))) { 2711 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2712 } 2713 } 2714 } 2715 } 2716 } 2717 if (containerPageTypeId == pubRes.getType()) { 2718 addDetailContent( 2719 adminCms, 2720 containerPages, 2721 adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); 2722 } 2723 } catch (CmsException e) { 2724 LOG.error(e.getLocalizedMessage(), e); 2725 } 2726 } 2727 // add all found container pages as published resource objects to the list 2728 for (CmsResource page : containerPages) { 2729 CmsPublishedResource pubCont = new CmsPublishedResource(page); 2730 if (!updateResources.contains(pubCont)) { 2731 // ensure container page is added only once 2732 additionalUpdateResources.add(pubCont); 2733 } 2734 } 2735 } 2736 return additionalUpdateResources; 2737 } 2738 2739 /** 2740 * Returns the set of names of all configured document types.<p> 2741 * 2742 * @return the set of names of all configured document types 2743 */ 2744 protected List<String> getDocumentTypes() { 2745 2746 return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet())); 2747 } 2748 2749 /** 2750 * Returns the a offline project used for offline indexing.<p> 2751 * 2752 * @return the offline project if available 2753 */ 2754 protected CmsProject getOfflineIndexProject() { 2755 2756 CmsProject result = null; 2757 for (I_CmsSearchIndex index : m_offlineIndexes) { 2758 try { 2759 result = m_adminCms.readProject(index.getProject()); 2760 2761 if (!result.isOnlineProject()) { 2762 break; 2763 } 2764 } catch (Exception e) { 2765 // may be a missconfigured index, ignore 2766 LOG.error(e.getLocalizedMessage(), e); 2767 } 2768 } 2769 return result; 2770 } 2771 2772 /** 2773 * Returns a new thread manager for the indexing threads.<p> 2774 * 2775 * @return a new thread manager for the indexing threads 2776 */ 2777 protected CmsIndexingThreadManager getThreadManager() { 2778 2779 return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); 2780 } 2781 2782 /** 2783 * Initializes the available Cms resource types to be indexed.<p> 2784 * 2785 * A map stores document factories keyed by a string representing 2786 * a colon separated list of Cms resource types and/or mimetypes.<p> 2787 * 2788 * The keys of this map are used to trigger a document factory to convert 2789 * a Cms resource into a Lucene index document.<p> 2790 * 2791 * A document factory is a class implementing the interface 2792 * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p> 2793 */ 2794 protected void initAvailableDocumentTypes() { 2795 2796 CmsSearchDocumentType documenttype = null; 2797 String className = null; 2798 String name = null; 2799 I_CmsDocumentFactory documentFactory = null; 2800 List<String> resourceTypes = null; 2801 List<String> mimeTypes = null; 2802 Class<?> c = null; 2803 2804 m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>(); 2805 2806 for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { 2807 2808 documenttype = m_documentTypeConfigs.get(i); 2809 name = documenttype.getName(); 2810 2811 try { 2812 className = documenttype.getClassName(); 2813 resourceTypes = documenttype.getResourceTypes(); 2814 mimeTypes = documenttype.getMimeTypes(); 2815 2816 if (name == null) { 2817 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); 2818 } 2819 if (className == null) { 2820 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); 2821 } 2822 if (resourceTypes.size() == 0) { 2823 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); 2824 } 2825 2826 try { 2827 c = Class.forName(className); 2828 documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( 2829 new Object[] {name}); 2830 } catch (ClassNotFoundException exc) { 2831 throw new CmsIndexException( 2832 Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), 2833 exc); 2834 } catch (Exception exc) { 2835 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); 2836 } 2837 2838 if (documentFactory.isUsingCache()) { 2839 // init cache if used by the factory 2840 documentFactory.setCache(m_extractionResultCache); 2841 } 2842 2843 Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>(); 2844 for (Iterator<String> keyIt = documentFactory.getDocumentKeys( 2845 resourceTypes, 2846 mimeTypes).iterator(); keyIt.hasNext();) { 2847 String key = keyIt.next(); 2848 matchingTypes.put(key, documentFactory); 2849 m_extractionKeys.add(key); 2850 } 2851 m_documentTypes.put(name, matchingTypes); 2852 2853 } catch (CmsException e) { 2854 if (LOG.isWarnEnabled()) { 2855 LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); 2856 } 2857 } 2858 } 2859 } 2860 2861 /** 2862 * Initializes the index sources. 2863 */ 2864 protected void initIndexSources() { 2865 2866 for (CmsSearchIndexSource source : m_indexSources.values()) { 2867 source.init(); 2868 } 2869 } 2870 2871 /** 2872 * Initializes the configured search indexes.<p> 2873 * 2874 * This initializes also the list of Cms resources types 2875 * to be indexed by an index source.<p> 2876 */ 2877 protected void initSearchIndexes() { 2878 2879 I_CmsSearchIndex index = null; 2880 for (int i = 0, n = m_indexes.size(); i < n; i++) { 2881 index = m_indexes.get(i); 2882 // reset disabled flag 2883 index.setEnabled(true); 2884 // check if the index has been configured correctly 2885 if (index.checkConfiguration(m_adminCms)) { 2886 // the index is configured correctly 2887 try { 2888 index.initialize(); 2889 } catch (Exception e) { 2890 if (CmsLog.INIT.isWarnEnabled()) { 2891 // in this case the index will be disabled 2892 CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); 2893 } 2894 } 2895 } 2896 // output a log message if the index was successfully configured or not 2897 if (CmsLog.INIT.isInfoEnabled()) { 2898 if (index.isEnabled()) { 2899 CmsLog.INIT.info( 2900 Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); 2901 } else { 2902 CmsLog.INIT.warn( 2903 Messages.get().getBundle().key( 2904 Messages.INIT_INDEX_NOT_CONFIGURED_2, 2905 index, 2906 index.getProject())); 2907 } 2908 } 2909 } 2910 } 2911 2912 /** 2913 * Checks, if the index should be rebuilt/updated at all by the search manager. 2914 * @param index the index to check. 2915 * @return a flag, indicating if the index should be rebuilt/updated at all. 2916 */ 2917 protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) { 2918 2919 if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) { 2920 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName())); 2921 return false; 2922 } else { 2923 return true; 2924 } 2925 2926 } 2927 2928 /** 2929 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code> 2930 * after resources have been published.<p> 2931 * 2932 * @param adminCms an OpenCms user context with Admin permissions 2933 * @param publishHistoryId the history ID of the published project 2934 * @param report the report to write the output to 2935 */ 2936 protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { 2937 2938 int oldPriority = Thread.currentThread().getPriority(); 2939 try { 2940 SEARCH_MANAGER_LOCK.lock(); 2941 Thread.currentThread().setPriority(Thread.MIN_PRIORITY); 2942 List<CmsPublishedResource> publishedResources; 2943 try { 2944 // read the list of all published resources 2945 publishedResources = adminCms.readPublishedResources(publishHistoryId); 2946 } catch (CmsException e) { 2947 LOG.error( 2948 Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), 2949 e); 2950 return; 2951 } 2952 Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); 2953 // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved 2954 2955 List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>(); 2956 for (CmsPublishedResource res : publishedResources) { 2957 if (res.isFolder() || res.getState().isUnchanged()) { 2958 // folders and unchanged resources don't need to be indexed after publish 2959 continue; 2960 } 2961 if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { 2962 if (updateResources.contains(res)) { 2963 // resource may have been added as a sibling of another resource 2964 // in this case we make sure to use the value from the publish list because of the "deleted" flag 2965 boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) 2966 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) 2967 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); 2968 // check it this is a moved resource with source / target info, in this case we need both entries 2969 if (!hasMoved) { 2970 // if the resource was moved, we must contain both entries 2971 updateResources.remove(res); 2972 } 2973 // "equals()" implementation of published resource checks for id, 2974 // so the removed value may have a different "deleted" or "modified" status value 2975 updateResources.add(res); 2976 } else { 2977 // resource not yet contained in the list 2978 updateResources.add(res); 2979 // check for the siblings (not for deleted resources, these are already gone) 2980 if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { 2981 // this resource has siblings 2982 try { 2983 // read siblings from the online project 2984 List<CmsResource> siblings = adminCms.readSiblings( 2985 res.getRootPath(), 2986 CmsResourceFilter.ALL); 2987 Iterator<CmsResource> itSib = siblings.iterator(); 2988 while (itSib.hasNext()) { 2989 // check all siblings 2990 CmsResource sibling = itSib.next(); 2991 CmsPublishedResource sib = new CmsPublishedResource(sibling); 2992 if (!updateResources.contains(sib)) { 2993 // ensure sibling is added only once 2994 updateResources.add(sib); 2995 } 2996 } 2997 } catch (CmsException e) { 2998 // ignore, just use the original resource 2999 if (LOG.isWarnEnabled()) { 3000 LOG.warn( 3001 Messages.get().getBundle().key( 3002 Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, 3003 res.getRootPath()), 3004 e); 3005 } 3006 } 3007 } 3008 } 3009 } 3010 } 3011 3012 addAdditionallyAffectedResources(adminCms, updateResources); 3013 updateAllIndexes(adminCms, updateResources, report); 3014 } finally { 3015 SEARCH_MANAGER_LOCK.unlock(); 3016 Thread.currentThread().setPriority(oldPriority); 3017 } 3018 } 3019 3020 /** 3021 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p> 3022 * 3023 * @param adminCms an OpenCms user context with Admin permissions 3024 * @param updateResources the resources to update 3025 * @param report the report to write the output to 3026 */ 3027 protected void updateAllIndexes( 3028 CmsObject adminCms, 3029 List<CmsPublishedResource> updateResources, 3030 I_CmsReport report) { 3031 3032 try { 3033 SEARCH_MANAGER_LOCK.lock(); 3034 if (!updateResources.isEmpty()) { 3035 // sort the resource to update 3036 Collections.sort(updateResources); 3037 // only update the indexes if the list of remaining published resources is not empty 3038 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 3039 while (i.hasNext()) { 3040 I_CmsSearchIndex index = i.next(); 3041 if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { 3042 // only update indexes which have the rebuild mode set to "auto" 3043 try { 3044 updateIndex(index, report, updateResources); 3045 } catch (CmsException e) { 3046 LOG.error( 3047 Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), 3048 e); 3049 } 3050 } 3051 } 3052 } 3053 // clean up the extraction result cache 3054 cleanExtractionCache(); 3055 } finally { 3056 SEARCH_MANAGER_LOCK.unlock(); 3057 } 3058 3059 } 3060 3061 /** 3062 * Updates (if required creates) the index with the given name.<p> 3063 * 3064 * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be 3065 * incrementally updated for these resources only. If this List is <code>null</code> or empty, 3066 * the index will be fully rebuild.<p> 3067 * 3068 * @param index the index to update or rebuild 3069 * @param report the report to write output messages to 3070 * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3071 * 3072 * @throws CmsException if something goes wrong 3073 */ 3074 protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) 3075 throws CmsException { 3076 3077 if (shouldUpdateAtAll(index)) { 3078 try { 3079 SEARCH_MANAGER_LOCK.lock(); 3080 3081 // copy the stored admin context for the indexing 3082 CmsObject cms = OpenCms.initCmsObject(m_adminCms); 3083 // make sure a report is available 3084 if (report == null) { 3085 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 3086 } 3087 3088 // check if the index has been configured correctly 3089 if (!index.checkConfiguration(cms)) { 3090 // the index is disabled 3091 return; 3092 } 3093 3094 // set site root and project for this index 3095 cms.getRequestContext().setSiteRoot("/"); 3096 // switch to the index project 3097 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3098 3099 if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { 3100 // rebuild the complete index 3101 3102 updateIndexCompletely(cms, index, report); 3103 } else { 3104 updateIndexIncremental(cms, index, report, resourcesToIndex); 3105 } 3106 } finally { 3107 SEARCH_MANAGER_LOCK.unlock(); 3108 } 3109 } 3110 } 3111 3112 /** 3113 * The method updates all OpenCms documents that are indexed. 3114 * @param cms the OpenCms user context to use for accessing the VFS 3115 * @param index the index to update 3116 * @param report the report to write output messages to 3117 * @throws CmsIndexException thrown if indexing fails for some reason 3118 */ 3119 @SuppressWarnings("null") 3120 protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report) 3121 throws CmsIndexException { 3122 3123 // create a new thread manager for the indexing threads 3124 CmsIndexingThreadManager threadManager = getThreadManager(); 3125 3126 boolean isOfflineIndex = false; 3127 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 3128 // disable offline indexing while the complete index is rebuild 3129 isOfflineIndex = true; 3130 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL); 3131 // re-initialize the offline indexes, this will disable this offline index 3132 initOfflineIndexes(); 3133 } 3134 3135 I_CmsIndexWriter writer = null; 3136 try { 3137 // create a backup of the existing index 3138 CmsSearchIndex indexInternal = null; 3139 String backup = null; 3140 if (index instanceof CmsSearchIndex) { 3141 indexInternal = (CmsSearchIndex)index; 3142 backup = indexInternal.createIndexBackup(); 3143 if (backup != null) { 3144 indexInternal.indexSearcherOpen(backup); 3145 } 3146 } 3147 3148 // create a new index writer 3149 writer = index.getIndexWriter(report, true); 3150 if (writer instanceof I_CmsSolrIndexWriter) { 3151 try { 3152 ((I_CmsSolrIndexWriter)writer).deleteAllDocuments(); 3153 } catch (IOException e) { 3154 LOG.error(e.getMessage(), e); 3155 } 3156 } 3157 3158 // output start information on the report 3159 report.println( 3160 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), 3161 I_CmsReport.FORMAT_HEADLINE); 3162 3163 // iterate all configured index sources of this index 3164 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3165 while (sources.hasNext()) { 3166 // get the next index source 3167 CmsSearchIndexSource source = sources.next(); 3168 // create the indexer 3169 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3170 // new index creation, use all resources from the index source 3171 indexer.rebuildIndex(writer, threadManager, source); 3172 3173 // wait for indexing threads to finish 3174 while (threadManager.isRunning()) { 3175 try { 3176 Thread.sleep(500); 3177 } catch (InterruptedException e) { 3178 // just continue with the loop after interruption 3179 LOG.info(e.getLocalizedMessage(), e); 3180 } 3181 } 3182 3183 // commit and optimize the index after each index source has been finished 3184 try { 3185 writer.commit(); 3186 } catch (IOException e) { 3187 if (LOG.isWarnEnabled()) { 3188 LOG.warn( 3189 Messages.get().getBundle().key( 3190 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3191 index.getName(), 3192 index.getPath()), 3193 e); 3194 } 3195 } 3196 try { 3197 writer.optimize(); 3198 } catch (IOException e) { 3199 if (LOG.isWarnEnabled()) { 3200 LOG.warn( 3201 Messages.get().getBundle().key( 3202 Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, 3203 index.getName(), 3204 index.getPath()), 3205 e); 3206 } 3207 } 3208 } 3209 3210 // we are sure here that indexInternal is not null 3211 if (backup != null) { 3212 // remove the backup after the files have been re-indexed 3213 indexInternal.indexSearcherClose(); 3214 indexInternal.removeIndexBackup(backup); 3215 } 3216 3217 // output finish information on the report 3218 report.println( 3219 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), 3220 I_CmsReport.FORMAT_HEADLINE); 3221 3222 } finally { 3223 if (writer != null) { 3224 try { 3225 writer.close(); 3226 } catch (IOException e) { 3227 if (LOG.isWarnEnabled()) { 3228 LOG.warn( 3229 Messages.get().getBundle().key( 3230 Messages.LOG_IO_INDEX_WRITER_CLOSE_2, 3231 index.getPath(), 3232 index.getName()), 3233 e); 3234 } 3235 } 3236 } 3237 if (isOfflineIndex) { 3238 // reset the mode of the offline index 3239 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE); 3240 // re-initialize the offline indexes, this will re-enable this index 3241 initOfflineIndexes(); 3242 } 3243 // index has changed - initialize the index searcher instance 3244 index.onIndexChanged(true); 3245 } 3246 3247 // show information about indexing runtime 3248 threadManager.reportStatistics(report); 3249 } 3250 3251 /** 3252 * Incrementally updates the given index.<p> 3253 * 3254 * @param cms the OpenCms user context to use for accessing the VFS 3255 * @param index the index to update 3256 * @param report the report to write output messages to 3257 * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3258 * 3259 * @throws CmsException if something goes wrong 3260 */ 3261 protected void updateIndexIncremental( 3262 CmsObject cms, 3263 I_CmsSearchIndex index, 3264 I_CmsReport report, 3265 List<CmsPublishedResource> resourcesToIndex) 3266 throws CmsException { 3267 3268 try { 3269 SEARCH_MANAGER_LOCK.lock(); 3270 3271 // update the existing index 3272 List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>(); 3273 3274 boolean hasResourcesToDelete = false; 3275 boolean hasResourcesToUpdate = false; 3276 3277 // iterate all configured index sources of this index 3278 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3279 while (sources.hasNext()) { 3280 // get the next index source 3281 CmsSearchIndexSource source = sources.next(); 3282 // create the indexer 3283 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3284 // collect the resources to update 3285 CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); 3286 if (!updateData.isEmpty()) { 3287 // add the update collection to the internal pipeline 3288 updateCollections.add(updateData); 3289 hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); 3290 hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); 3291 } 3292 } 3293 3294 // only start index modification if required 3295 if (hasResourcesToDelete || hasResourcesToUpdate) { 3296 // output start information on the report 3297 report.println( 3298 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), 3299 I_CmsReport.FORMAT_HEADLINE); 3300 3301 I_CmsIndexWriter writer = null; 3302 try { 3303 // obtain an index writer that updates the current index 3304 writer = index.getIndexWriter(report, false); 3305 3306 if (hasResourcesToDelete) { 3307 // delete the resource from the index 3308 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3309 while (i.hasNext()) { 3310 CmsSearchIndexUpdateData updateCollection = i.next(); 3311 if (updateCollection.hasResourcesToDelete()) { 3312 updateCollection.getIndexer().deleteResources( 3313 writer, 3314 updateCollection.getResourcesToDelete()); 3315 } 3316 } 3317 } 3318 3319 if (hasResourcesToUpdate) { 3320 // create a new thread manager 3321 CmsIndexingThreadManager threadManager = getThreadManager(); 3322 3323 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3324 while (i.hasNext()) { 3325 CmsSearchIndexUpdateData updateCollection = i.next(); 3326 if (updateCollection.hasResourceToUpdate()) { 3327 updateCollection.getIndexer().updateResources( 3328 writer, 3329 threadManager, 3330 updateCollection.getResourcesToUpdate()); 3331 } 3332 } 3333 3334 // wait for indexing threads to finish 3335 while (threadManager.isRunning()) { 3336 try { 3337 Thread.sleep(500); 3338 } catch (InterruptedException e) { 3339 // just continue with the loop after interruption 3340 LOG.info(e.getLocalizedMessage(), e); 3341 } 3342 } 3343 } 3344 } finally { 3345 // close the index writer 3346 if (writer != null) { 3347 try { 3348 writer.commit(); 3349 } catch (IOException e) { 3350 LOG.error( 3351 Messages.get().getBundle().key( 3352 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3353 index.getName(), 3354 index.getPath()), 3355 e); 3356 } 3357 } 3358 // index has changed - initialize the index searcher instance 3359 index.onIndexChanged(false); 3360 } 3361 3362 // output finish information on the report 3363 report.println( 3364 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), 3365 I_CmsReport.FORMAT_HEADLINE); 3366 } 3367 } finally { 3368 SEARCH_MANAGER_LOCK.unlock(); 3369 } 3370 } 3371 3372 /** 3373 * Updates the offline search indexes for the given list of resources.<p> 3374 * 3375 * @param report the report to write the index information to 3376 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 3377 */ 3378 protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 3379 3380 CmsObject cms = m_adminCms; 3381 try { 3382 // copy the administration context for the indexing 3383 cms = OpenCms.initCmsObject(m_adminCms); 3384 // set site root and project for this index 3385 cms.getRequestContext().setSiteRoot("/"); 3386 } catch (CmsException e) { 3387 LOG.error(e.getLocalizedMessage(), e); 3388 } 3389 3390 Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator(); 3391 while (j.hasNext()) { 3392 I_CmsSearchIndex index = j.next(); 3393 if (index.getSources() != null) { 3394 try { 3395 // switch to the index project 3396 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3397 updateIndexIncremental(cms, index, report, resourcesToIndex); 3398 } catch (CmsException e) { 3399 LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); 3400 } 3401 } 3402 } 3403 } 3404 3405 /** 3406 * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p> 3407 * 3408 * @param adminCms the cms context 3409 * @param containerPages the containerpages 3410 * @param containerPage the container page site path 3411 */ 3412 private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) { 3413 3414 if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) { 3415 3416 try { 3417 CmsResource detailRes = adminCms.readResource( 3418 CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage), 3419 CmsResourceFilter.IGNORE_EXPIRATION); 3420 containerPages.add(detailRes); 3421 } catch (Throwable e) { 3422 if (LOG.isWarnEnabled()) { 3423 LOG.warn(e.getLocalizedMessage(), e); 3424 } 3425 } 3426 } 3427 } 3428 3429 /** 3430 * Creates the Solr core container.<p> 3431 * 3432 * @return the created core container 3433 */ 3434 private CoreContainer createCoreContainer() { 3435 3436 CoreContainer container = null; 3437 try { 3438 // get the core container 3439 // still no core container: create it 3440 container = CoreContainer.createAndLoad( 3441 Paths.get(m_solrConfig.getHome()), 3442 m_solrConfig.getSolrFile().toPath()); 3443 if (CmsLog.INIT.isInfoEnabled()) { 3444 CmsLog.INIT.info( 3445 Messages.get().getBundle().key( 3446 Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, 3447 m_solrConfig.getHome(), 3448 m_solrConfig.getSolrFile().getName())); 3449 } 3450 } catch (Exception e) { 3451 LOG.error( 3452 Messages.get().getBundle().key( 3453 Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, 3454 m_solrConfig.getSolrFile().getAbsolutePath()), 3455 e); 3456 } 3457 return container; 3458 3459 } 3460 3461 /** 3462 * Remove write.lock file in the data directory to ensure the index is unlocked. 3463 * @param dataDir the data directory of the Solr index that should be unlocked. 3464 */ 3465 private void ensureIndexIsUnlocked(String dataDir) { 3466 3467 Collection<File> lockFiles = new ArrayList<File>(2); 3468 lockFiles.add( 3469 new File( 3470 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock")); 3471 lockFiles.add( 3472 new File( 3473 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck") 3474 + "write.lock")); 3475 for (File lockFile : lockFiles) { 3476 if (lockFile.exists()) { 3477 lockFile.delete(); 3478 LOG.warn( 3479 "Forcely unlocking index with data dir \"" 3480 + dataDir 3481 + "\" by removing file \"" 3482 + lockFile.getAbsolutePath() 3483 + "\"."); 3484 } 3485 } 3486 } 3487 3488 /** 3489 * Returns the report in the given event data, if <code>null</code> 3490 * a new log report is used.<p> 3491 * 3492 * @param event the event to get the report for 3493 * 3494 * @return the report 3495 */ 3496 private I_CmsReport getEventReport(CmsEvent event) { 3497 3498 I_CmsReport report = null; 3499 if (event.getData() != null) { 3500 report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); 3501 } 3502 if (report == null) { 3503 report = new CmsLogReport(Locale.ENGLISH, getClass()); 3504 } 3505 return report; 3506 } 3507 3508 /** 3509 * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p> 3510 * 3511 * @param publishedResources a list of published resources 3512 * 3513 * @return the set of structure ids that satisfy the condition above 3514 */ 3515 private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted( 3516 List<CmsPublishedResource> publishedResources) { 3517 3518 Set<CmsUUID> result = new HashSet<CmsUUID>(); 3519 Set<CmsUUID> deletedSet = new HashSet<CmsUUID>(); 3520 for (CmsPublishedResource pubRes : publishedResources) { 3521 if (pubRes.getState().isNew()) { 3522 result.add(pubRes.getStructureId()); 3523 } 3524 if (pubRes.getState().isDeleted()) { 3525 deletedSet.add(pubRes.getStructureId()); 3526 } 3527 } 3528 result.retainAll(deletedSet); 3529 return result; 3530 } 3531 3532 /** 3533 * Checks if the given type id belongs to a group type. 3534 * 3535 * @param type the type id to check 3536 * @return true if the type is a group type 3537 */ 3538 private boolean isGroup(int type) { 3539 3540 for (String groupType : groupTypes) { 3541 if (OpenCms.getResourceManager().matchResourceType(groupType, type)) { 3542 return true; 3543 } 3544 } 3545 return false; 3546 3547 } 3548 3549 /** 3550 * Shuts down the Solr core container.<p> 3551 */ 3552 private void shutDownSolrContainer() { 3553 3554 if (m_coreContainer != null) { 3555 for (SolrCore core : m_coreContainer.getCores()) { 3556 // do not unload spellcheck core because otherwise the core.properties file is removed 3557 // even when calling m_coreContainer.unload(core.getName(), false, false, false); 3558 if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { 3559 m_coreContainer.unload(core.getName(), false, false, true); 3560 } 3561 } 3562 m_coreContainer.shutdown(); 3563 if (CmsLog.INIT.isInfoEnabled()) { 3564 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); 3565 } 3566 m_coreContainer = null; 3567 } 3568 } 3569 3570}