001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (https://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: https://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: https://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.ade.containerpage.CmsDetailOnlyContainerUtil; 031import org.opencms.configuration.CmsConfigurationException; 032import org.opencms.db.CmsDriverManager; 033import org.opencms.db.CmsModificationContext; 034import org.opencms.db.CmsPublishedResource; 035import org.opencms.db.CmsResourceState; 036import org.opencms.file.CmsObject; 037import org.opencms.file.CmsProject; 038import org.opencms.file.CmsResource; 039import org.opencms.file.CmsResourceFilter; 040import org.opencms.file.CmsUser; 041import org.opencms.file.types.CmsResourceTypeXmlContainerPage; 042import org.opencms.file.types.CmsResourceTypeXmlContent; 043import org.opencms.file.types.I_CmsResourceType; 044import org.opencms.i18n.CmsLocaleManager; 045import org.opencms.i18n.CmsMessageContainer; 046import org.opencms.loader.CmsLoaderException; 047import org.opencms.loader.CmsResourceManager; 048import org.opencms.main.CmsBroadcast.ContentMode; 049import org.opencms.main.CmsEvent; 050import org.opencms.main.CmsException; 051import org.opencms.main.CmsIllegalArgumentException; 052import org.opencms.main.CmsIllegalStateException; 053import org.opencms.main.CmsLog; 054import org.opencms.main.I_CmsEventListener; 055import org.opencms.main.OpenCms; 056import org.opencms.main.OpenCmsSolrHandler; 057import org.opencms.relations.CmsRelation; 058import org.opencms.relations.CmsRelationFilter; 059import org.opencms.relations.CmsRelationType; 060import org.opencms.report.CmsLogReport; 061import org.opencms.report.CmsShellLogReport; 062import org.opencms.report.I_CmsReport; 063import org.opencms.scheduler.I_CmsScheduledJob; 064import org.opencms.search.documents.A_CmsVfsDocument; 065import org.opencms.search.documents.CmsExtractionResultCache; 066import org.opencms.search.documents.I_CmsDocumentFactory; 067import org.opencms.search.documents.I_CmsTermHighlighter; 068import org.opencms.search.fields.CmsLuceneField; 069import org.opencms.search.fields.CmsLuceneFieldConfiguration; 070import org.opencms.search.fields.CmsSearchField; 071import org.opencms.search.fields.CmsSearchFieldConfiguration; 072import org.opencms.search.fields.CmsSearchFieldMapping; 073import org.opencms.search.fields.I_CmsSearchFieldConfiguration; 074import org.opencms.search.solr.CmsSolrConfiguration; 075import org.opencms.search.solr.CmsSolrFieldConfiguration; 076import org.opencms.search.solr.CmsSolrIndex; 077import org.opencms.search.solr.I_CmsSolrIndexWriter; 078import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker; 079import org.opencms.search.solr.spellchecking.CmsSpellcheckDictionaryIndexer; 080import org.opencms.security.CmsRole; 081import org.opencms.security.CmsRoleViolationException; 082import org.opencms.util.A_CmsModeStringEnumeration; 083import org.opencms.util.CmsFileUtil; 084import org.opencms.util.CmsPriorityLock; 085import org.opencms.util.CmsStringUtil; 086import org.opencms.util.CmsUUID; 087import org.opencms.util.CmsWaitHandle; 088 089import java.io.File; 090import java.io.IOException; 091import java.nio.file.FileSystems; 092import java.nio.file.Paths; 093import java.util.ArrayList; 094import java.util.Collection; 095import java.util.Collections; 096import java.util.HashMap; 097import java.util.HashSet; 098import java.util.Iterator; 099import java.util.LinkedHashMap; 100import java.util.List; 101import java.util.ListIterator; 102import java.util.Locale; 103import java.util.Map; 104import java.util.Set; 105import java.util.TreeMap; 106import java.util.concurrent.LinkedBlockingQueue; 107import java.util.concurrent.ScheduledFuture; 108import java.util.concurrent.ScheduledThreadPoolExecutor; 109import java.util.concurrent.ThreadPoolExecutor; 110import java.util.concurrent.TimeUnit; 111import java.util.stream.Collectors; 112 113import org.apache.commons.logging.Log; 114import org.apache.lucene.analysis.Analyzer; 115import org.apache.lucene.analysis.CharArraySet; 116import org.apache.lucene.analysis.standard.StandardAnalyzer; 117import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; 118import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder; 119import org.apache.solr.core.CoreContainer; 120import org.apache.solr.core.CoreDescriptor; 121import org.apache.solr.core.SolrCore; 122 123import com.google.common.util.concurrent.ThreadFactoryBuilder; 124 125/** 126 * Implements the general management and configuration of the search and 127 * indexing facilities in OpenCms.<p> 128 * 129 * @since 6.0.0 130 */ 131public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { 132 133 /** 134 * Enumeration class for force unlock types.<p> 135 */ 136 public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { 137 138 /** Force unlock type "always". */ 139 public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); 140 141 /** Force unlock type "never". */ 142 public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); 143 144 /** Force unlock type "only full". */ 145 public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); 146 147 /** Serializable version id. */ 148 private static final long serialVersionUID = 74746076708908673L; 149 150 /** 151 * Creates a new force unlock type with the given name.<p> 152 * 153 * @param mode the mode id to use 154 */ 155 protected CmsSearchForceUnlockMode(String mode) { 156 157 super(mode); 158 } 159 160 /** 161 * Returns the lock type for the given type value.<p> 162 * 163 * @param type the type value to get the lock type for 164 * 165 * @return the lock type for the given type value 166 */ 167 public static CmsSearchForceUnlockMode valueOf(String type) { 168 169 if (type.equals(ALWAYS.toString())) { 170 return ALWAYS; 171 } else if (type.equals(NEVER.toString())) { 172 return NEVER; 173 } else { 174 return ONLYFULL; 175 } 176 } 177 } 178 179 /** 180 * Handles offline index generation.<p> 181 */ 182 protected class CmsSearchOfflineHandler implements I_CmsEventListener { 183 184 /** Indicates if the event handlers for the offline search have been already registered. */ 185 private boolean m_isEventRegistered; 186 187 /** The list of resources to index. */ 188 private List<CmsPublishedResource> m_resourcesToIndex; 189 190 /** 191 * Initializes the offline index handler.<p> 192 */ 193 protected CmsSearchOfflineHandler() { 194 195 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 196 } 197 198 /** 199 * Implements the event listener of this class.<p> 200 * 201 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 202 */ 203 @SuppressWarnings("unchecked") 204 public void cmsEvent(CmsEvent event) { 205 206 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 207 switch (event.getType()) { 208 case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: 209 case I_CmsEventListener.EVENT_RESOURCE_CREATED: 210 case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: 211 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 212 if ((change != null) && change.equals(Integer.valueOf(CmsDriverManager.NOTHING_CHANGED))) { 213 // skip lock & unlock 214 return; 215 } 216 // skip indexing if flag is set in event 217 Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); 218 if (skip != null) { 219 return; 220 } 221 222 // a resource has been modified - offline indexes require (re)indexing 223 List<CmsResource> resources = Collections.singletonList( 224 (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); 225 reIndexResources(resources); 226 break; 227 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 228 List<CmsResource> eventResources = (List<CmsResource>)event.getData().get( 229 I_CmsEventListener.KEY_RESOURCES); 230 List<CmsResource> resourcesToDelete = new ArrayList<CmsResource>(eventResources); 231 for (CmsResource res : resourcesToDelete) { 232 if (res.getState().isNew()) { 233 // if the resource is new and a delete action was performed 234 // --> set the state of the resource to deleted 235 res.setState(CmsResourceState.STATE_DELETED); 236 } 237 } 238 reIndexResources(resourcesToDelete); 239 break; 240 case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: 241 if (I_CmsEventListener.VALUE_CREATE_SIBLING.equals(change)) { 242 List<CmsResource> resList = (List<CmsResource>)event.getData().get( 243 I_CmsEventListener.KEY_RESOURCES); 244 if ((resList != null) && (resList.size() >= 3)) { 245 System.out.println("Sibling creation case, resource = " + resList.get(1).getRootPath()); 246 reIndexResources(Collections.singletonList(resList.get(1))); 247 248 } 249 } else { 250 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 251 } 252 break; 253 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 254 case I_CmsEventListener.EVENT_RESOURCE_COPIED: 255 case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: 256 257 // a list of resources has been modified - offline indexes require (re)indexing 258 reIndexResources((List<CmsResource>)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 259 break; 260 default: 261 // no operation 262 } 263 } 264 265 /** 266 * Adds a list of {@link CmsPublishedResource} objects to be indexed.<p> 267 * 268 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed 269 */ 270 protected synchronized void addResourcesToIndex(List<CmsPublishedResource> resourcesToIndex) { 271 272 m_resourcesToIndex.addAll(resourcesToIndex); 273 } 274 275 /** 276 * Returns the list of {@link CmsPublishedResource} objects to index.<p> 277 * 278 * @return the resources to index 279 */ 280 protected List<CmsPublishedResource> getResourcesToIndex() { 281 282 List<CmsPublishedResource> result; 283 synchronized (this) { 284 result = m_resourcesToIndex; 285 m_resourcesToIndex = new ArrayList<CmsPublishedResource>(); 286 } 287 try { 288 CmsObject cms = m_adminCms; 289 CmsProject offline = getOfflineIndexProject(); 290 if (offline != null) { 291 // switch to the offline project if available 292 cms = OpenCms.initCmsObject(m_adminCms); 293 cms.getRequestContext().setCurrentProject(offline); 294 } 295 addAdditionallyAffectedResources(cms, result); 296 } catch (CmsException e) { 297 LOG.error(e.getLocalizedMessage(), e); 298 } 299 return result; 300 } 301 302 /** 303 * Initializes this offline search handler, registering the event handlers if required.<p> 304 */ 305 protected void initialize() { 306 307 if (m_offlineIndexes.size() > 0) { 308 // there is at least one offline index configured 309 if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { 310 // create the offline indexing thread 311 m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); 312 // start the offline index thread 313 m_offlineIndexThread.start(); 314 } 315 } else { 316 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 317 // no offline indexes but thread still running, stop the thread 318 m_offlineIndexThread.shutDown(); 319 m_offlineIndexThread = null; 320 } 321 } 322 // do this only in case there are offline indexes configured 323 if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { 324 m_isEventRegistered = true; 325 // register this object as event listener 326 OpenCms.addCmsEventListener( 327 this, 328 new int[] { 329 I_CmsEventListener.EVENT_PROPERTY_MODIFIED, 330 I_CmsEventListener.EVENT_RESOURCE_CREATED, 331 I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, 332 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 333 I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, 334 I_CmsEventListener.EVENT_RESOURCE_MOVED, 335 I_CmsEventListener.EVENT_RESOURCE_DELETED, 336 I_CmsEventListener.EVENT_RESOURCE_COPIED, 337 I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); 338 } 339 } 340 341 /** 342 * Updates all offline indexes for the given list of {@link CmsResource} objects.<p> 343 * 344 * @param resources a list of {@link CmsResource} objects to update in the offline indexes 345 */ 346 protected synchronized void reIndexResources(List<CmsResource> resources) { 347 348 List<CmsPublishedResource> resourcesToIndex = new ArrayList<CmsPublishedResource>(resources.size()); 349 for (CmsResource res : resources) { 350 CmsPublishedResource pubRes = new CmsPublishedResource(res); 351 resourcesToIndex.add(pubRes); 352 } 353 if (resourcesToIndex.size() > 0) { 354 // add the resources found to the offline index thread 355 addResourcesToIndex(resourcesToIndex); 356 } 357 } 358 } 359 360 /** 361 * The offline indexer thread runs periodically and indexes all resources added by the event handler.<p> 362 */ 363 protected class CmsSearchOfflineIndexThread extends Thread { 364 365 /** The event handler that triggers this thread. */ 366 CmsSearchOfflineHandler m_handler; 367 368 /** Indicates if this thread is still alive. */ 369 boolean m_isAlive; 370 371 /** Indicates that an index update thread is currently running. */ 372 private boolean m_isUpdating; 373 374 /** If true a manual update (after file upload) was triggered. */ 375 private boolean m_updateTriggered; 376 377 /** The wait handle used for signalling when the worker thread has finished. */ 378 private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); 379 380 /** 381 * Constructor.<p> 382 * 383 * @param handler the offline index event handler 384 */ 385 protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { 386 387 super("OpenCms: Offline Search Indexer"); 388 m_handler = handler; 389 } 390 391 /** 392 * Gets the wait handle used for signalling when the worker thread has finished. 393 * 394 * @return the wait handle 395 **/ 396 public CmsWaitHandle getWaitHandle() { 397 398 return m_waitHandle; 399 } 400 401 /** 402 * @see java.lang.Thread#interrupt() 403 */ 404 @Override 405 public void interrupt() { 406 407 super.interrupt(); 408 m_updateTriggered = true; 409 } 410 411 /** 412 * @see java.lang.Thread#run() 413 */ 414 @Override 415 public void run() { 416 417 // create a log report for the output 418 I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); 419 long offlineUpdateFrequency = getOfflineUpdateFrequency(); 420 m_updateTriggered = false; 421 try { 422 while (m_isAlive) { 423 if (!m_updateTriggered) { 424 try { 425 sleep(offlineUpdateFrequency); 426 } catch (InterruptedException e) { 427 // continue the thread after interruption 428 if (!m_isAlive) { 429 // the thread has been shut down while sleeping 430 continue; 431 } 432 if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { 433 // offline update frequency change - clear interrupt status 434 offlineUpdateFrequency = getOfflineUpdateFrequency(); 435 } 436 LOG.info(e.getLocalizedMessage(), e); 437 } 438 } 439 if (m_isAlive) { 440 // set update trigger to false since we do the update now 441 m_updateTriggered = false; 442 // get list of resource to update 443 List<CmsPublishedResource> resourcesToIndex = getResourcesToIndex(); 444 if (resourcesToIndex.size() > 0) { 445 // only start indexing if there is at least one resource 446 startOfflineUpdateThread(report, resourcesToIndex); 447 } else { 448 getWaitHandle().release(); 449 } 450 // this is just called to clear the interrupt status of the thread 451 interrupted(); 452 } 453 } 454 } finally { 455 // make sure that live status is reset in case of Exceptions 456 m_isAlive = false; 457 } 458 459 } 460 461 /** 462 * @see java.lang.Thread#start() 463 */ 464 @Override 465 public synchronized void start() { 466 467 m_isAlive = true; 468 super.start(); 469 } 470 471 /** 472 * Obtains the list of resource to update in the offline index, 473 * then optimizes the list by removing duplicate entries.<p> 474 * 475 * @return the list of resource to update in the offline index 476 */ 477 protected List<CmsPublishedResource> getResourcesToIndex() { 478 479 List<CmsPublishedResource> resourcesToIndex = m_handler.getResourcesToIndex(); 480 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(resourcesToIndex.size()); 481 482 // Reverse to always keep the last list entries 483 Collections.reverse(resourcesToIndex); 484 for (CmsPublishedResource pubRes : resourcesToIndex) { 485 boolean addResource = true; 486 for (CmsPublishedResource resRes : result) { 487 if (pubRes.equals(resRes) 488 && (pubRes.getState() == resRes.getState()) 489 && (pubRes.getMovedState() == resRes.getMovedState()) 490 && pubRes.getRootPath().equals(resRes.getRootPath())) { 491 // resource already in the update list 492 addResource = false; 493 break; 494 } 495 } 496 if (addResource) { 497 result.add(pubRes); 498 } 499 500 } 501 Collections.reverse(result); 502 return changeStateOfMoveOriginsToDeleted(result); 503 } 504 505 /** 506 * Shuts down this offline index thread.<p> 507 */ 508 protected void shutDown() { 509 510 m_isAlive = false; 511 interrupt(); 512 if (m_isUpdating) { 513 long waitTime = getOfflineUpdateFrequency() / 2; 514 int waitSteps = 0; 515 do { 516 try { 517 // wait half the time of the offline index frequency for the thread to finish 518 Thread.sleep(waitTime); 519 } catch (InterruptedException e) { 520 // continue 521 LOG.info(e.getLocalizedMessage(), e); 522 } 523 waitSteps++; 524 // wait 5 times then stop waiting 525 } while ((waitSteps < 5) && m_isUpdating); 526 } 527 } 528 529 /** 530 * Updates the offline search indexes for the given list of resources.<p> 531 * 532 * @param report the report to write the index information to 533 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 534 */ 535 protected void startOfflineUpdateThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 536 537 CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); 538 long startTime = System.currentTimeMillis(); 539 long waitTime = getOfflineUpdateFrequency() / 2; 540 if (LOG.isDebugEnabled()) { 541 LOG.debug( 542 Messages.get().getBundle().key( 543 Messages.LOG_OI_UPDATE_START_1, 544 Integer.valueOf(resourcesToIndex.size()))); 545 } 546 547 m_isUpdating = true; 548 thread.start(); 549 550 do { 551 try { 552 // wait half the time of the offline index frequency for the thread to finish 553 thread.join(waitTime); 554 } catch (InterruptedException e) { 555 // continue 556 LOG.info(e.getLocalizedMessage(), e); 557 } 558 if (thread.isAlive()) { 559 LOG.warn( 560 Messages.get().getBundle().key( 561 Messages.LOG_OI_UPDATE_LONG_2, 562 Integer.valueOf(resourcesToIndex.size()), 563 Long.valueOf(System.currentTimeMillis() - startTime))); 564 } 565 } while (thread.isAlive()); 566 m_isUpdating = false; 567 568 if (LOG.isDebugEnabled()) { 569 LOG.debug( 570 Messages.get().getBundle().key( 571 Messages.LOG_OI_UPDATE_FINISH_2, 572 Integer.valueOf(resourcesToIndex.size()), 573 Long.valueOf(System.currentTimeMillis() - startTime))); 574 } 575 } 576 577 /** 578 * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. 579 * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, 580 * 581 * @param resourcesToIndex the resources to index 582 * 583 * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths 584 */ 585 private List<CmsPublishedResource> changeStateOfMoveOriginsToDeleted( 586 List<CmsPublishedResource> resourcesToIndex) { 587 588 Map<CmsUUID, String> lastValidPaths = new HashMap<CmsUUID, String>(); 589 for (CmsPublishedResource resource : resourcesToIndex) { 590 if (resource.getState().isDeleted()) { 591 // we don't want the last path to be from a deleted resource 592 continue; 593 } 594 lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); 595 } 596 List<CmsPublishedResource> result = new ArrayList<CmsPublishedResource>(); 597 for (CmsPublishedResource resource : resourcesToIndex) { 598 if (resource.getState().isDeleted()) { 599 result.add(resource); 600 continue; 601 } 602 String lastValidPath = lastValidPaths.get(resource.getStructureId()); 603 if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { 604 result.add(resource); 605 } else { 606 result.add( 607 new CmsPublishedResource( 608 resource.getStructureId(), 609 resource.getResourceId(), 610 resource.getPublishTag(), 611 resource.getRootPath(), 612 resource.getType(), 613 resource.isFolder(), 614 CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted 615 resource.getSiblingCount())); 616 } 617 } 618 return result; 619 } 620 } 621 622 /** 623 * An offline index worker Thread runs each time for every offline index update action.<p> 624 * 625 * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid 626 * problems if a single operation "hangs" the Tread.<p> 627 */ 628 protected class CmsSearchOfflineIndexWorkThread extends Thread { 629 630 /** The report to write the index information to. */ 631 I_CmsReport m_report; 632 633 /** The list of {@link CmsPublishedResource} objects to index. */ 634 List<CmsPublishedResource> m_resourcesToIndex; 635 636 /** 637 * Updates the offline search indexes for the given list of resources.<p> 638 * 639 * @param report the report to write the index information to 640 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 641 */ 642 protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 643 644 super("OpenCms: Offline Search Index Worker"); 645 m_report = report; 646 m_resourcesToIndex = resourcesToIndex; 647 } 648 649 /** 650 * @see java.lang.Thread#run() 651 */ 652 @Override 653 public void run() { 654 655 updateIndexOffline(m_report, m_resourcesToIndex); 656 if (m_offlineIndexThread != null) { 657 m_offlineIndexThread.getWaitHandle().release(); 658 } 659 } 660 661 } 662 663 /** 664 * Helper class for batching resources arising from multiple independent 'instant publish' operations for indexing. 665 * <p>This is to reduce overhead for indexing, while still limiting the indexing batch size to not block 'interactive' publishing too much. 666 * <p>The batching is time-based, i.e. file changes in a given time span (currently 2 seconds) are collected and then indexed together. 667 * <p>However, changes that include publish resources with state 'deleted' (actual deletions or move operations) are never batched together with others, to avoid complications. 668 */ 669 protected class InstantPublishIndexingQueue { 670 671 /** Current map of batched resources to publish, grouped by id. */ 672 private Map<CmsUUID, List<CmsPublishedResource>> m_currentBatch = new HashMap<>(); 673 674 /** The task for flushing the queue. */ 675 private ScheduledFuture<?> m_flushTask; 676 677 /** The executor used to do the actual indexing. */ 678 private ThreadPoolExecutor m_executor; 679 680 private ScheduledThreadPoolExecutor m_flushExecutor; 681 682 private LinkedBlockingQueue<Runnable> m_workQueue = new LinkedBlockingQueue<>(); 683 684 /** 685 * Creates a new instance. 686 */ 687 public InstantPublishIndexingQueue() { 688 689 m_executor = new ThreadPoolExecutor( 690 0, 691 1, 692 10, 693 TimeUnit.SECONDS, 694 m_workQueue, 695 new ThreadFactoryBuilder().setNameFormat("instant-publish-indexer-%d").build()); 696 m_flushExecutor = new ScheduledThreadPoolExecutor( 697 1, 698 new ThreadFactoryBuilder().setNameFormat("instant-publish-flush-%d").build()); 699 } 700 701 /** 702 * Adds the resources from a publish job to the queue. 703 * 704 * @param publishJobResources the publish job resources 705 */ 706 public synchronized void addPublishJob(List<CmsPublishedResource> publishJobResources) { 707 708 boolean needToFlush = false; 709 Map<CmsUUID, List<CmsPublishedResource>> publishMap = new HashMap<>(); 710 for (CmsPublishedResource resource : publishJobResources) { 711 publishMap.computeIfAbsent(resource.getStructureId(), id -> new ArrayList<>()).add(resource); 712 } 713 for (CmsUUID id : publishMap.keySet()) { 714 if (isMove(m_currentBatch.get(id))) { 715 needToFlush = true; 716 } 717 } 718 if (needToFlush) { 719 if (m_flushTask != null) { 720 m_flushTask.cancel(false); 721 m_flushTask = null; 722 } 723 flush(); 724 } 725 m_currentBatch.putAll(publishMap); 726 if (m_flushTask == null) { 727 m_flushTask = m_flushExecutor.schedule( 728 this::flush, 729 CmsModificationContext.getOnlineFolderOptions().getIndexingInterval(), 730 TimeUnit.MILLISECONDS); 731 } 732 733 } 734 735 /** 736 * Checks if there is currently any work left to do for the instant publish indexing queue. 737 */ 738 public synchronized boolean hasWorkToDo() { 739 740 return (m_currentBatch.size() > 0) || (m_executor.getActiveCount() > 0) || !m_workQueue.isEmpty(); 741 } 742 743 /** 744 * Shuts down the queue. 745 */ 746 public void shutdown() { 747 748 // Tasks running in the flush executor produce tasks for the indexing executor, so we shut down the former before the latter to avoid skipping indexing during shutdown 749 m_flushExecutor.shutdown(); 750 try { 751 m_flushExecutor.awaitTermination(30, TimeUnit.SECONDS); 752 } catch (InterruptedException e) { 753 LOG.error(e.getLocalizedMessage(), e); 754 } 755 m_executor.shutdown(); 756 try { 757 m_executor.awaitTermination(30, TimeUnit.SECONDS); 758 } catch (InterruptedException e) { 759 LOG.error(e.getLocalizedMessage(), e); 760 } 761 } 762 763 /** 764 * Flushes the currently collected batch of published resources and submits them for indexing. 765 */ 766 protected synchronized void flush() { 767 768 m_flushTask = null; 769 770 List<CmsPublishedResource> resources = new ArrayList<>(); 771 for (List<CmsPublishedResource> entriesForId : m_currentBatch.values()) { 772 resources.addAll(entriesForId); 773 } 774 m_currentBatch.clear(); 775 if (resources.size() > 0) { 776 m_executor.submit(() -> tryIndex(resources)); 777 } 778 } 779 780 /** 781 * Indexes the given list of published resources. 782 * 783 * @param resourceList the resources to index 784 */ 785 protected void tryIndex(List<CmsPublishedResource> resourceList) { 786 787 try { 788 List<CmsPublishedResource> resourcesToIndex = computeUpdateResources(m_adminCms, resourceList); 789 long start = System.currentTimeMillis(); 790 ONLINE_LOCK.lock(false); 791 try { 792 updateAllIndexes(m_adminCms, resourcesToIndex, null); 793 } finally { 794 ONLINE_LOCK.unlock(); 795 long end = System.currentTimeMillis(); 796 LOG.info( 797 "Instant publish indexing of a batch of size " 798 + resourcesToIndex.size() 799 + " took " 800 + (end - start) 801 + "ms"); 802 } 803 } catch (Exception e) { 804 LOG.error(e.getLocalizedMessage(), e); 805 } 806 } 807 808 private boolean isMove(Collection<CmsPublishedResource> publishedResources) { 809 810 return (publishedResources != null) 811 && (publishedResources.size() == 2) 812 && publishedResources.stream().map(res -> res.getState()).collect(Collectors.toSet()).equals( 813 Set.of(CmsResource.STATE_DELETED, CmsResource.STATE_NEW)); 814 } 815 816 } 817 818 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 819 private static final CmsPriorityLock OFFLINE_LOCK = new CmsPriorityLock(); 820 821 /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ 822 private static final CmsPriorityLock ONLINE_LOCK = new CmsPriorityLock(); 823 824 /** The default value used for generating search result excerpts (1024 chars). */ 825 public static final int DEFAULT_EXCERPT_LENGTH = 1024; 826 827 /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ 828 public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; 829 830 /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ 831 public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; 832 833 /** The default update frequency for offline indexes (15000 msec = 15 sec). */ 834 public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; 835 836 /** The default maximal wait time for re-indexing after editing a content. */ 837 public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; 838 839 /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ 840 public static final int DEFAULT_TIMEOUT = 60000; 841 842 /** Scheduler parameter: Update only a specified list of indexes. */ 843 public static final String JOB_PARAM_INDEXLIST = "indexList"; 844 845 /** Scheduler parameter: Write the output of the update to the logfile. */ 846 public static final String JOB_PARAM_WRITELOG = "writeLog"; 847 848 /** Prefix for Lucene default analyzers package (<code>org.apache.lucene.analysis.</code>). */ 849 public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; 850 851 /** The log object for this class. */ 852 protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); 853 854 /** List of resource types which represent groups of elements. */ 855 private static final String[] groupTypes = { 856 CmsResourceTypeXmlContainerPage.MODEL_GROUP_TYPE_NAME, 857 CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME, 858 CmsResourceTypeXmlContainerPage.INHERIT_CONTAINER_TYPE_NAME}; 859 860 /** The indexing queue for the 'instant publish' feature. */ 861 private InstantPublishIndexingQueue m_instantPublishIndexQueue = new InstantPublishIndexingQueue(); 862 863 /** The administrator OpenCms user context to access OpenCms VFS resources. */ 864 protected CmsObject m_adminCms; 865 866 /** The list of indexes that are configured for offline index mode. */ 867 protected List<I_CmsSearchIndex> m_offlineIndexes; 868 869 /** The thread used of offline indexing. */ 870 protected CmsSearchOfflineIndexThread m_offlineIndexThread; 871 872 /** Configured analyzers for languages using <analyzer>. */ 873 private HashMap<Locale, CmsSearchAnalyzer> m_analyzers; 874 875 /** Stores the offline update frequency while indexing is paused. */ 876 private long m_configuredOfflineIndexingFrequency; 877 878 /** The Solr core container. */ 879 private CoreContainer m_coreContainer; 880 881 /** A map of document factory configurations. */ 882 private List<CmsSearchDocumentType> m_documentTypeConfigs; 883 884 /** A map of document factories keyed first by their name and then by their extraction keys. */ 885 private Map<String, Map<String, I_CmsDocumentFactory>> m_documentTypes; 886 887 /** The set of all globally available extraction keys for document factories. */ 888 private Set<String> m_extractionKeys; 889 890 /** The max age for extraction results to remain in the cache. */ 891 private float m_extractionCacheMaxAge; 892 893 /** The cache for the extraction results. */ 894 private CmsExtractionResultCache m_extractionResultCache; 895 896 /** Contains the available field configurations. */ 897 private Map<String, I_CmsSearchFieldConfiguration> m_fieldConfigurations; 898 899 /** The force unlock type. */ 900 private CmsSearchForceUnlockMode m_forceUnlockMode; 901 902 /** The class used to highlight the search terms in the excerpt of a search result. */ 903 private I_CmsTermHighlighter m_highlighter; 904 905 /** A list of search indexes. */ 906 private List<I_CmsSearchIndex> m_indexes; 907 908 /** Seconds to wait for an index lock. */ 909 private int m_indexLockMaxWaitSeconds = 10; 910 911 /** Configured index sources. */ 912 private Map<String, CmsSearchIndexSource> m_indexSources; 913 914 /** The max. char. length of the excerpt in the search result. */ 915 private int m_maxExcerptLength; 916 917 /** The maximum number of modifications before a commit in the search index is triggered. */ 918 private int m_maxModificationsBeforeCommit; 919 920 /** The offline index search handler. */ 921 private CmsSearchOfflineHandler m_offlineHandler; 922 923 /** The update frequency of the offline indexer in milliseconds. */ 924 private long m_offlineUpdateFrequency; 925 926 /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ 927 private long m_maxIndexWaitTime; 928 929 /** Path to index files below WEB-INF/. */ 930 private String m_path; 931 932 /** The Solr configuration. */ 933 private CmsSolrConfiguration m_solrConfig; 934 935 /** Timeout for abandoning indexing thread. */ 936 private long m_timeout; 937 938 /** Offline indexing pause requests */ 939 private final Set<CmsUUID> m_pauseRequests = new HashSet<>(); 940 941 /** 942 * Default constructor when called as cron job.<p> 943 */ 944 public CmsSearchManager() { 945 946 m_documentTypes = new HashMap<String, Map<String, I_CmsDocumentFactory>>(); 947 m_extractionKeys = new HashSet<String>(); 948 m_documentTypeConfigs = new ArrayList<CmsSearchDocumentType>(); 949 m_analyzers = new HashMap<Locale, CmsSearchAnalyzer>(); 950 m_indexes = new ArrayList<I_CmsSearchIndex>(); 951 m_indexSources = new TreeMap<String, CmsSearchIndexSource>(); 952 m_offlineHandler = new CmsSearchOfflineHandler(); 953 m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; 954 m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; 955 m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; 956 m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; 957 m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; 958 959 m_fieldConfigurations = new HashMap<String, I_CmsSearchFieldConfiguration>(); 960 // make sure we have a "standard" field configuration 961 addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); 962 963 if (CmsLog.INIT.isInfoEnabled()) { 964 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); 965 } 966 } 967 968 /** 969 * Returns an analyzer for the given class name.<p> 970 * 971 * @param className the class name of the analyzer 972 * 973 * @return the appropriate lucene analyzer 974 * 975 * @throws Exception if something goes wrong 976 */ 977 public static Analyzer getAnalyzer(String className) throws Exception { 978 979 Analyzer analyzer = null; 980 Class<?> analyzerClass; 981 try { 982 analyzerClass = Class.forName(className); 983 } catch (ClassNotFoundException e) { 984 // allow Lucene standard classes to be written in a short form 985 analyzerClass = Class.forName(LUCENE_ANALYZER + className); 986 } 987 988 // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor 989 if (StandardAnalyzer.class.equals(analyzerClass)) { 990 // the Lucene standard analyzer is used - but without any stopwords. 991 analyzer = new StandardAnalyzer(new CharArraySet(0, false)); 992 } else { 993 analyzer = (Analyzer)analyzerClass.newInstance(); 994 } 995 return analyzer; 996 } 997 998 /** 999 * Returns the Solr index configured with the parameters name. 1000 * The parameters must contain a key/value pair with an existing 1001 * Solr index, otherwise <code>null</code> is returned.<p> 1002 * 1003 * @param cms the current context 1004 * @param params the parameter map 1005 * 1006 * @return the best matching Solr index 1007 */ 1008 public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map<String, String[]> params) { 1009 1010 String indexName = null; 1011 CmsSolrIndex index = null; 1012 // try to get the index name from the parameters: 'core' or 'index' 1013 if (params != null) { 1014 indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null 1015 ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] 1016 : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null 1017 ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] 1018 : null); 1019 } 1020 if (indexName == null) { 1021 // if no parameter is specified try to use the default online/offline indexes by context 1022 indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() 1023 ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE 1024 : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; 1025 } 1026 // try to get the index 1027 index = OpenCms.getSearchManager().getIndexSolr(indexName); 1028 if (index == null) { 1029 // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. 1030 List<CmsSolrIndex> solrs = OpenCms.getSearchManager().getAllSolrIndexes(); 1031 if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { 1032 index = solrs.get(0); 1033 } 1034 } 1035 return index; 1036 } 1037 1038 /** 1039 * Returns <code>true</code> if the index for the given name is a Lucene index, <code>false</code> otherwise.<p> 1040 * 1041 * @param indexName the name of the index to check 1042 * 1043 * @return <code>true</code> if the index for the given name is a Lucene index 1044 */ 1045 public static boolean isLuceneIndex(String indexName) { 1046 1047 I_CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); 1048 return (i instanceof CmsSearchIndex) && (!(i instanceof CmsSolrIndex)); 1049 } 1050 1051 /** 1052 * Adds an analyzer.<p> 1053 * 1054 * @param analyzer an analyzer 1055 */ 1056 public void addAnalyzer(CmsSearchAnalyzer analyzer) { 1057 1058 m_analyzers.put(analyzer.getLocale(), analyzer); 1059 1060 if (CmsLog.INIT.isInfoEnabled()) { 1061 CmsLog.INIT.info( 1062 Messages.get().getBundle().key( 1063 Messages.INIT_ADD_ANALYZER_2, 1064 analyzer.getLocale(), 1065 analyzer.getClassName())); 1066 } 1067 } 1068 1069 /** 1070 * Adds a document type.<p> 1071 * 1072 * @param documentType a document type 1073 */ 1074 public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { 1075 1076 m_documentTypeConfigs.add(documentType); 1077 1078 if (CmsLog.INIT.isInfoEnabled()) { 1079 CmsLog.INIT.info( 1080 Messages.get().getBundle().key( 1081 Messages.INIT_SEARCH_DOC_TYPES_2, 1082 documentType.getName(), 1083 documentType.getClassName())); 1084 } 1085 } 1086 1087 /** 1088 * Adds a search field configuration to the search manager.<p> 1089 * 1090 * @param fieldConfiguration the search field configuration to add 1091 */ 1092 public void addFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) { 1093 1094 m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); 1095 } 1096 1097 /** 1098 * Adds a search index to the configuration.<p> 1099 * 1100 * @param searchIndex the search index to add 1101 */ 1102 public void addSearchIndex(I_CmsSearchIndex searchIndex) { 1103 1104 if (!searchIndex.isInitialized()) { 1105 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 1106 try { 1107 searchIndex.initialize(); 1108 } catch (CmsException e) { 1109 // should never happen 1110 LOG.error(e.getMessage(), e); 1111 } 1112 } 1113 } 1114 1115 // name: not null or emtpy and unique 1116 String name = searchIndex.getName(); 1117 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 1118 throw new CmsIllegalArgumentException( 1119 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 1120 } 1121 if (m_indexSources.keySet().contains(name)) { 1122 throw new CmsIllegalArgumentException( 1123 Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); 1124 } 1125 1126 m_indexes.add(searchIndex); 1127 if (m_adminCms != null) { 1128 initOfflineIndexes(); 1129 } 1130 1131 if (CmsLog.INIT.isInfoEnabled()) { 1132 CmsLog.INIT.info( 1133 Messages.get().getBundle().key( 1134 Messages.INIT_ADD_SEARCH_INDEX_2, 1135 searchIndex.getName(), 1136 searchIndex.getProject())); 1137 } 1138 } 1139 1140 /** 1141 * Adds a search index source configuration.<p> 1142 * 1143 * @param searchIndexSource a search index source configuration 1144 */ 1145 public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { 1146 1147 m_indexSources.put(searchIndexSource.getName(), searchIndexSource); 1148 1149 if (CmsLog.INIT.isInfoEnabled()) { 1150 CmsLog.INIT.info( 1151 Messages.get().getBundle().key( 1152 Messages.INIT_SEARCH_INDEX_SOURCE_2, 1153 searchIndexSource.getName(), 1154 searchIndexSource.getIndexerClassName())); 1155 } 1156 } 1157 1158 /** 1159 * Implements the event listener of this class.<p> 1160 * 1161 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 1162 */ 1163 public void cmsEvent(CmsEvent event) { 1164 1165 switch (event.getType()) { 1166 case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: 1167 List<String> indexNames = null; 1168 if ((event.getData() != null) 1169 && CmsStringUtil.isNotEmptyOrWhitespaceOnly( 1170 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { 1171 indexNames = CmsStringUtil.splitAsList( 1172 (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), 1173 ",", 1174 true); 1175 } 1176 try { 1177 if (LOG.isDebugEnabled()) { 1178 LOG.debug( 1179 Messages.get().getBundle().key( 1180 Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, 1181 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1182 new Exception()); 1183 } 1184 if (indexNames == null) { 1185 rebuildAllIndexes(getEventReport(event)); 1186 } else { 1187 rebuildIndexes(indexNames, getEventReport(event)); 1188 } 1189 } catch (CmsException e) { 1190 if (LOG.isErrorEnabled()) { 1191 LOG.error( 1192 Messages.get().getBundle().key( 1193 Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, 1194 indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), 1195 e); 1196 } 1197 } 1198 break; 1199 case I_CmsEventListener.EVENT_CLEAR_CACHES: 1200 if (LOG.isDebugEnabled()) { 1201 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); 1202 } 1203 break; 1204 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 1205 // event data contains a list of the published resources 1206 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 1207 if (LOG.isDebugEnabled()) { 1208 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); 1209 } 1210 boolean instantPublish = Boolean.TRUE.equals( 1211 event.getData().get(I_CmsEventListener.KEY_INSTANT_PUBLISH)); 1212 if (instantPublish) { 1213 String publishIdStr = (String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID); 1214 if (CmsUUID.isValidUUID(publishIdStr)) { 1215 CmsUUID publishId = new CmsUUID(publishIdStr); 1216 List<CmsPublishedResource> publishedResources; 1217 try { 1218 publishedResources = m_adminCms.readPublishedResources(publishId); 1219 m_instantPublishIndexQueue.addPublishJob(publishedResources); 1220 } catch (CmsException e) { 1221 LOG.error(e.getLocalizedMessage(), e); 1222 } 1223 } 1224 } else { 1225 updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); 1226 if (LOG.isDebugEnabled()) { 1227 LOG.debug( 1228 Messages.get().getBundle().key( 1229 Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, 1230 publishHistoryId)); 1231 } 1232 } 1233 break; 1234 case I_CmsEventListener.EVENT_REINDEX_OFFLINE: 1235 case I_CmsEventListener.EVENT_REINDEX_ONLINE: 1236 boolean isOnline = I_CmsEventListener.EVENT_REINDEX_ONLINE == event.getType(); 1237 CmsPriorityLock lock = isOnline ? ONLINE_LOCK : OFFLINE_LOCK; 1238 Map<String, Object> eventData = event.getData(); 1239 CmsUUID userId = (CmsUUID)eventData.get(I_CmsEventListener.KEY_USER_ID); 1240 CmsUser user = null; 1241 if (userId != null) { 1242 try { 1243 user = m_adminCms.readUser(userId); 1244 } catch (Throwable t) { 1245 // should not normally happen 1246 LOG.debug(t.getMessage(), t); 1247 } 1248 } 1249 lock.lock(true); 1250 try { 1251 1252 if (LOG.isDebugEnabled()) { 1253 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_STARTED_0)); 1254 } 1255 CmsObject cms = m_adminCms; 1256 if (!isOnline) { 1257 OpenCms.initCmsObject(m_adminCms); 1258 cms.getRequestContext().setCurrentProject( 1259 cms.readProject((CmsUUID)eventData.get(I_CmsEventListener.KEY_PROJECTID))); 1260 } 1261 @SuppressWarnings("unchecked") 1262 List<CmsResource> resources = (List<CmsResource>)eventData.get(I_CmsEventListener.KEY_RESOURCES); 1263 I_CmsReport report = (I_CmsReport)eventData.get(I_CmsEventListener.KEY_REPORT); 1264 List<CmsResource> resourcesToIndex = new ArrayList<>(); 1265 for (CmsResource res : resources) { 1266 if (res.isFile()) { 1267 resourcesToIndex.add(res); 1268 } else { 1269 try { 1270 resourcesToIndex.addAll( 1271 cms.readResources(res, CmsResourceFilter.IGNORE_EXPIRATION, true)); 1272 } catch (CmsException e) { 1273 LOG.error(e, e); 1274 } 1275 } 1276 } 1277 // we reindex and prevent using cached results 1278 cleanExtractionCache(); 1279 List<CmsPublishedResource> publishedResourcesToIndex = resourcesToIndex.stream().map( 1280 res -> new CmsPublishedResource(res)).collect(Collectors.toList()); 1281 if (Boolean.TRUE.equals(eventData.get(I_CmsEventListener.KEY_REINDEX_RELATED))) { 1282 addAdditionallyAffectedResources(cms, publishedResourcesToIndex); 1283 } 1284 if (isOnline) { 1285 updateAllIndexes( 1286 m_adminCms, 1287 publishedResourcesToIndex, 1288 new CmsShellLogReport(CmsLocaleManager.MASTER_LOCALE)); 1289 } else { 1290 updateIndexOffline(report, publishedResourcesToIndex); 1291 } 1292 cms = null; 1293 if (null != user) { 1294 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1295 OpenCms.getSessionManager().sendBroadcast( 1296 null, 1297 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_SUCCESS_0), 1298 user, 1299 ContentMode.html); 1300 } 1301 if (LOG.isDebugEnabled()) { 1302 LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_REINDEX_FINISHED_0)); 1303 } 1304 1305 } catch (Throwable e) { 1306 if (null != user) { 1307 Locale l = OpenCms.getWorkplaceManager().getWorkplaceLocale(user); 1308 OpenCms.getSessionManager().sendBroadcast( 1309 null, 1310 Messages.get().getBundle(l).key(Messages.GUI_REINDEXING_FAILED_0), 1311 user, 1312 ContentMode.html); 1313 } 1314 if (LOG.isDebugEnabled()) { 1315 LOG.error( 1316 Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData()), 1317 e); 1318 } else if (LOG.isErrorEnabled()) { 1319 LOG.error(Messages.get().getBundle().key(Messages.ERR_EVENT_REINDEX_FAILED_1, event.getData())); 1320 } 1321 } finally { 1322 lock.unlock(); 1323 } 1324 break; 1325 default: 1326 // no operation 1327 } 1328 } 1329 1330 /** 1331 * Returns all Solr index.<p> 1332 * 1333 * @return all Solr indexes 1334 */ 1335 public List<CmsSolrIndex> getAllSolrIndexes() { 1336 1337 List<CmsSolrIndex> result = new ArrayList<CmsSolrIndex>(); 1338 for (String indexName : getIndexNames()) { 1339 CmsSolrIndex index = getIndexSolr(indexName); 1340 if (index != null) { 1341 result.add(index); 1342 } 1343 } 1344 return result; 1345 } 1346 1347 /** 1348 * Returns an analyzer for the given language.<p> 1349 * 1350 * The analyzer is selected according to the analyzer configuration.<p> 1351 * 1352 * @param locale the locale to get the analyzer for 1353 * @return the appropriate lucene analyzer 1354 * 1355 * @throws CmsSearchException if something goes wrong 1356 */ 1357 public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { 1358 1359 Analyzer analyzer = null; 1360 String className = null; 1361 1362 CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); 1363 if (analyzerConf == null) { 1364 throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); 1365 } 1366 1367 try { 1368 analyzer = getAnalyzer(analyzerConf.getClassName()); 1369 } catch (Exception e) { 1370 throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); 1371 } 1372 1373 return analyzer; 1374 } 1375 1376 /** 1377 * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p> 1378 * 1379 * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. 1380 * 1381 * @return an unmodifiable view of the Analyzers Map 1382 */ 1383 public Map<Locale, CmsSearchAnalyzer> getAnalyzers() { 1384 1385 return Collections.unmodifiableMap(m_analyzers); 1386 } 1387 1388 /** 1389 * Returns the search analyzer for the given locale.<p> 1390 * 1391 * @param locale the locale to get the analyzer for 1392 * 1393 * @return the search analyzer for the given locale 1394 */ 1395 public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { 1396 1397 return m_analyzers.get(locale); 1398 } 1399 1400 /** 1401 * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p> 1402 * 1403 * @return the name of the directory below WEB-INF/ where the search indexes are stored 1404 */ 1405 public String getDirectory() { 1406 1407 return m_path; 1408 } 1409 1410 /** 1411 * Returns the configured Solr home directory <code>null</code> if not set.<p> 1412 * 1413 * @return the Solr home directory 1414 */ 1415 public String getDirectorySolr() { 1416 1417 return m_solrConfig != null ? m_solrConfig.getHome() : null; 1418 } 1419 1420 /** 1421 * Returns the document factory configured under the provided name. 1422 * @param docTypeName the name of the document type. 1423 * @return the factory for the provided name. 1424 */ 1425 public I_CmsDocumentFactory getDocumentFactoryForName(String docTypeName) { 1426 1427 Map<String, I_CmsDocumentFactory> factoryMap = m_documentTypes.get(docTypeName); 1428 if (factoryMap != null) { 1429 Iterator<I_CmsDocumentFactory> factoryIt = factoryMap.values().iterator(); 1430 if (factoryIt.hasNext()) { 1431 return factoryMap.values().iterator().next(); 1432 } 1433 } 1434 return null; 1435 } 1436 1437 /** 1438 * Returns a document type config.<p> 1439 * 1440 * @param name the name of the document type config 1441 * @return the document type config. 1442 */ 1443 public CmsSearchDocumentType getDocumentTypeConfig(String name) { 1444 1445 // this is really used only for the search manager GUI, 1446 // so performance is not an issue and no lookup map is generated 1447 for (int i = 0; i < m_documentTypeConfigs.size(); i++) { 1448 CmsSearchDocumentType type = m_documentTypeConfigs.get(i); 1449 if (type.getName().equals(name)) { 1450 return type; 1451 } 1452 } 1453 return null; 1454 } 1455 1456 /** 1457 * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p> 1458 * 1459 * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map 1460 */ 1461 public List<CmsSearchDocumentType> getDocumentTypeConfigs() { 1462 1463 return Collections.unmodifiableList(m_documentTypeConfigs); 1464 } 1465 1466 /** 1467 * Returns the document type keys used to specify the correct document factory. 1468 * 1469 * @see #getDocumentTypeKeys(String, String) for detailed information on the returned keys. 1470 * 1471 * @param resource the resource to generate the list of document type keys for. 1472 * @return the document type keys. 1473 */ 1474 public List<String> getDocumentTypeKeys(CmsResource resource) { 1475 1476 // first get the MIME type of the resource 1477 String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); 1478 String resourceType = null; 1479 try { 1480 resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); 1481 } catch (CmsLoaderException e) { 1482 // ignore, unknown resource type, resource can not be indexed 1483 LOG.info(e.getLocalizedMessage(), e); 1484 } 1485 return getDocumentTypeKeys(resourceType, mimeType); 1486 } 1487 1488 /** 1489 * Returns the document type keys used to specify the correct document factory. 1490 * One resource typically has more than one key. The document factories are matched 1491 * in the provided order and the first matching factory is used. 1492 * 1493 * The keys for type name "typename" and mimetype "mimetype" would be a subset of: 1494 * <ul> 1495 * <li><code>typename_mimetype</code></li> 1496 * <li><code>typename</code></li> 1497 * <li>if <code>typename</code> is a sub-type of <code>containerpage</code> 1498 * <ul> 1499 * <li><code>containerpage_mimetype</code></li> 1500 * <li><code>containerpage</code></li> 1501 * </ul> 1502 * </li> 1503 * <li>if <code>typename</code> is a sub-type of <code>xmlcontent</code> 1504 * <ul> 1505 * <li><code>xmlcontent_mimetype</code></li> 1506 * <li><code>xmlcontent</code></li> 1507 * </ul> 1508 * </li> 1509 * <li><code>__unconfigured___mimetype</code></li> 1510 * <li><code>__unconfigured__</code></li> 1511 * <li><code>__all___mimetype</code></li> 1512 * <li><code>__all__</code></li> 1513 * <ul> 1514 * Note that all keys except the "__all__"-keys are only added as long as globally 1515 * there is no matching factory for the key. 1516 * This in particular means that a factory matching "typename" will never be used 1517 * if you have a factory for "typename__mimetype" - even if this is not configured 1518 * for the used index source. Eventually, the content will not be indexed in such cases. 1519 * @param resourceType the resource type to generate the list of document type keys for. 1520 * @param mimeType the mime type to generate the list of document type keys for. 1521 * @return the document type keys. 1522 */ 1523 public List<String> getDocumentTypeKeys(String resourceType, String mimeType) { 1524 1525 List<String> result = new ArrayList<>(8); 1526 if (null != resourceType) { 1527 String currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); 1528 result.add(currentKey); 1529 if (!m_extractionKeys.contains(currentKey)) { 1530 currentKey = A_CmsVfsDocument.getDocumentKey(resourceType, null); 1531 result.add(currentKey); 1532 if (!m_extractionKeys.contains(currentKey)) { 1533 boolean hasGlobalMatch = false; 1534 try { 1535 String containerpageTypeName = CmsResourceTypeXmlContainerPage.getStaticTypeName(); 1536 I_CmsResourceType type = OpenCms.getResourceManager().getResourceType(resourceType); 1537 if (!resourceType.equals(containerpageTypeName)) { 1538 if (type instanceof CmsResourceTypeXmlContainerPage) { 1539 if (!resourceType.equals(CmsResourceTypeXmlContainerPage.getStaticTypeName())) { 1540 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, mimeType); 1541 result.add(currentKey); 1542 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1543 if (!hasGlobalMatch) { 1544 currentKey = A_CmsVfsDocument.getDocumentKey(containerpageTypeName, null); 1545 result.add(currentKey); 1546 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1547 } 1548 } 1549 } 1550 } 1551 String xmlcontentTypeName = CmsResourceTypeXmlContent.getStaticTypeName(); 1552 if (!resourceType.equals(containerpageTypeName)) { 1553 if (!hasGlobalMatch && (type instanceof CmsResourceTypeXmlContent)) { 1554 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, mimeType); 1555 result.add(currentKey); 1556 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1557 if (!hasGlobalMatch) { 1558 currentKey = A_CmsVfsDocument.getDocumentKey(xmlcontentTypeName, null); 1559 result.add(currentKey); 1560 hasGlobalMatch = m_extractionKeys.contains(currentKey); 1561 } 1562 } 1563 } 1564 } catch (Throwable t) { 1565 LOG.warn("Could not read type for name \"" + resourceType + "\".", t); 1566 } 1567 if (!hasGlobalMatch) { 1568 result.add( 1569 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, mimeType)); 1570 result.add( 1571 A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_UNCONFIGURED_TYPES, null)); 1572 } 1573 } 1574 } 1575 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, mimeType)); 1576 result.add(A_CmsVfsDocument.getDocumentKey(A_CmsVfsDocument.DEFAULT_ALL_TYPES, null)); 1577 } 1578 return result; 1579 1580 } 1581 1582 /** 1583 * Returns the map from document type keys to document factories with all entries for the provided document type names. 1584 * @param documentTypeNames list of document type names to generate the map for. 1585 * @return the map from document type keys to document factories. 1586 */ 1587 public Map<String, I_CmsDocumentFactory> getDocumentTypeMapForTypeNames(List<String> documentTypeNames) { 1588 1589 Map<String, I_CmsDocumentFactory> result = new LinkedHashMap<>(); 1590 if (null != documentTypeNames) { 1591 // Iterate the list in reverse order to prefer factories that are added by document types listed earlier. 1592 ListIterator<String> typesIterator = documentTypeNames.listIterator(documentTypeNames.size()); 1593 while (typesIterator.hasPrevious()) { 1594 Map<String, I_CmsDocumentFactory> factories = m_documentTypes.get(typesIterator.previous()); 1595 if (null != factories) { 1596 result.putAll(factories); 1597 } 1598 } 1599 } 1600 return result; 1601 } 1602 1603 /** 1604 * Returns the maximum age a text extraction result is kept in the cache (in hours).<p> 1605 * 1606 * @return the maximum age a text extraction result is kept in the cache (in hours) 1607 */ 1608 public float getExtractionCacheMaxAge() { 1609 1610 return m_extractionCacheMaxAge; 1611 } 1612 1613 /** 1614 * Returns the search field configuration with the given name.<p> 1615 * 1616 * In case no configuration is available with the given name, <code>null</code> is returned.<p> 1617 * 1618 * @param name the name to get the search field configuration for 1619 * 1620 * @return the search field configuration with the given name 1621 */ 1622 public I_CmsSearchFieldConfiguration getFieldConfiguration(String name) { 1623 1624 return m_fieldConfigurations.get(name); 1625 } 1626 1627 /** 1628 * Returns the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries.<p> 1629 * 1630 * @return the unmodifieable List of configured {@link I_CmsSearchFieldConfiguration} entries 1631 */ 1632 public List<I_CmsSearchFieldConfiguration> getFieldConfigurations() { 1633 1634 List<I_CmsSearchFieldConfiguration> result = new ArrayList<I_CmsSearchFieldConfiguration>( 1635 m_fieldConfigurations.values()); 1636 Collections.sort(result); 1637 return Collections.unmodifiableList(result); 1638 } 1639 1640 /** 1641 * Returns the Lucene search field configurations only.<p> 1642 * 1643 * @return the Lucene search field configurations 1644 */ 1645 public List<CmsLuceneFieldConfiguration> getFieldConfigurationsLucene() { 1646 1647 List<CmsLuceneFieldConfiguration> result = new ArrayList<CmsLuceneFieldConfiguration>(); 1648 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1649 if (conf instanceof CmsLuceneFieldConfiguration) { 1650 result.add((CmsLuceneFieldConfiguration)conf); 1651 } 1652 } 1653 Collections.sort(result); 1654 return Collections.unmodifiableList(result); 1655 } 1656 1657 /** 1658 * Returns the Solr search field configurations only.<p> 1659 * 1660 * @return the Solr search field configurations 1661 */ 1662 public List<CmsSolrFieldConfiguration> getFieldConfigurationsSolr() { 1663 1664 List<CmsSolrFieldConfiguration> result = new ArrayList<CmsSolrFieldConfiguration>(); 1665 for (I_CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { 1666 if (conf instanceof CmsSolrFieldConfiguration) { 1667 result.add((CmsSolrFieldConfiguration)conf); 1668 } 1669 } 1670 Collections.sort(result); 1671 return Collections.unmodifiableList(result); 1672 } 1673 1674 /** 1675 * Returns the force unlock mode during indexing.<p> 1676 * 1677 * @return the force unlock mode during indexing 1678 */ 1679 public CmsSearchForceUnlockMode getForceunlock() { 1680 1681 return m_forceUnlockMode; 1682 } 1683 1684 /** 1685 * Returns the highlighter.<p> 1686 * 1687 * @return the highlighter 1688 */ 1689 public I_CmsTermHighlighter getHighlighter() { 1690 1691 return m_highlighter; 1692 } 1693 1694 /** 1695 * Returns the Lucene search index configured with the given name.<p> 1696 * The index must exist, otherwise <code>null</code> is returned. 1697 * 1698 * @param indexName then name of the requested search index 1699 * 1700 * @return the Lucene search index configured with the given name 1701 */ 1702 public I_CmsSearchIndex getIndex(String indexName) { 1703 1704 for (I_CmsSearchIndex index : m_indexes) { 1705 if (indexName.equalsIgnoreCase(index.getName())) { 1706 return index; 1707 } 1708 } 1709 return null; 1710 } 1711 1712 /** 1713 * Returns the seconds to wait for an index lock during an update operation.<p> 1714 * 1715 * @return the seconds to wait for an index lock during an update operation 1716 */ 1717 public int getIndexLockMaxWaitSeconds() { 1718 1719 return m_indexLockMaxWaitSeconds; 1720 } 1721 1722 /** 1723 * Returns the names of all configured indexes.<p> 1724 * 1725 * @return list of names 1726 */ 1727 public List<String> getIndexNames() { 1728 1729 List<String> indexNames = new ArrayList<String>(); 1730 for (int i = 0, n = m_indexes.size(); i < n; i++) { 1731 indexNames.add((m_indexes.get(i)).getName()); 1732 } 1733 1734 return indexNames; 1735 } 1736 1737 /** 1738 * Returns the Solr index configured with the given name.<p> 1739 * The index must exist, otherwise <code>null</code> is returned. 1740 * 1741 * @param indexName then name of the requested Solr index 1742 * @return the Solr index configured with the given name 1743 */ 1744 public CmsSolrIndex getIndexSolr(String indexName) { 1745 1746 I_CmsSearchIndex index = getIndex(indexName); 1747 if (index instanceof CmsSolrIndex) { 1748 return (CmsSolrIndex)index; 1749 } 1750 return null; 1751 } 1752 1753 /** 1754 * Returns a search index source for a specified source name.<p> 1755 * 1756 * @param sourceName the name of the index source 1757 * @return a search index source 1758 */ 1759 public CmsSearchIndexSource getIndexSource(String sourceName) { 1760 1761 return m_indexSources.get(sourceName); 1762 } 1763 1764 /** 1765 * Returns the max. excerpt length.<p> 1766 * 1767 * @return the max excerpt length 1768 */ 1769 public int getMaxExcerptLength() { 1770 1771 return m_maxExcerptLength; 1772 } 1773 1774 /** 1775 * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).<p> 1776 * 1777 * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) 1778 */ 1779 public long getMaxIndexWaitTime() { 1780 1781 return m_maxIndexWaitTime; 1782 } 1783 1784 /** 1785 * Returns the maximum number of modifications before a commit in the search index is triggered.<p> 1786 * 1787 * @return the maximum number of modifications before a commit in the search index is triggered 1788 */ 1789 public int getMaxModificationsBeforeCommit() { 1790 1791 return m_maxModificationsBeforeCommit; 1792 } 1793 1794 /** 1795 * Returns the update frequency of the offline indexer in milliseconds.<p> 1796 * 1797 * @return the update frequency of the offline indexer in milliseconds 1798 */ 1799 public long getOfflineUpdateFrequency() { 1800 1801 return m_offlineUpdateFrequency; 1802 } 1803 1804 /** 1805 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1806 * 1807 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1808 */ 1809 public List<I_CmsSearchIndex> getSearchIndexes() { 1810 1811 return Collections.unmodifiableList(m_indexes); 1812 } 1813 1814 /** 1815 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1816 * 1817 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1818 */ 1819 public List<I_CmsSearchIndex> getSearchIndexesAll() { 1820 1821 return Collections.unmodifiableList(m_indexes); 1822 } 1823 1824 /** 1825 * Returns an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances.<p> 1826 * 1827 * @return an unmodifiable list of all configured <code>{@link I_CmsSearchIndex}</code> instances 1828 */ 1829 public List<CmsSolrIndex> getSearchIndexesSolr() { 1830 1831 List<CmsSolrIndex> indexes = new ArrayList<CmsSolrIndex>(); 1832 for (I_CmsSearchIndex index : m_indexes) { 1833 if (index instanceof CmsSolrIndex) { 1834 indexes.add((CmsSolrIndex)index); 1835 } 1836 } 1837 return Collections.unmodifiableList(indexes); 1838 } 1839 1840 /** 1841 * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p> 1842 * 1843 * @return an unmodifiable view (read-only) of the SearchIndexSources Map 1844 */ 1845 public Map<String, CmsSearchIndexSource> getSearchIndexSources() { 1846 1847 return Collections.unmodifiableMap(m_indexSources); 1848 } 1849 1850 /** 1851 * Return singleton instance of the OpenCms spellchecker.<p> 1852 * 1853 * @return instance of CmsSolrSpellchecker. 1854 */ 1855 public CmsSolrSpellchecker getSolrDictionary() { 1856 1857 // get the core container that contains one core for each configured index 1858 if (m_coreContainer == null) { 1859 m_coreContainer = createCoreContainer(); 1860 } 1861 return CmsSolrSpellchecker.getInstance(m_coreContainer); 1862 } 1863 1864 /** 1865 * Returns the Solr configuration.<p> 1866 * 1867 * @return the Solr configuration 1868 */ 1869 public CmsSolrConfiguration getSolrServerConfiguration() { 1870 1871 return m_solrConfig; 1872 } 1873 1874 /** 1875 * Returns the timeout to abandon threads indexing a resource.<p> 1876 * 1877 * @return the timeout to abandon threads indexing a resource 1878 */ 1879 public long getTimeout() { 1880 1881 return m_timeout; 1882 } 1883 1884 /** 1885 * Initializes the search manager.<p> 1886 * 1887 * @param cms the cms object 1888 * 1889 * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions 1890 */ 1891 public void initialize(CmsObject cms) throws CmsRoleViolationException { 1892 1893 OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); 1894 try { 1895 // store the Admin cms to index Cms resources 1896 m_adminCms = OpenCms.initCmsObject(cms); 1897 } catch (CmsException e) { 1898 // this should never happen 1899 LOG.error(e.getLocalizedMessage(), e); 1900 } 1901 // make sure the site root is the root site 1902 m_adminCms.getRequestContext().setSiteRoot("/"); 1903 1904 // create the extraction result cache 1905 m_extractionResultCache = new CmsExtractionResultCache( 1906 OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), 1907 "/extractCache"); 1908 initializeFieldConfigurations(); 1909 initializeIndexes(); 1910 initOfflineIndexes(); 1911 1912 // register this object as event listener 1913 OpenCms.addCmsEventListener( 1914 this, 1915 new int[] { 1916 I_CmsEventListener.EVENT_CLEAR_CACHES, 1917 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 1918 I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES, 1919 I_CmsEventListener.EVENT_REINDEX_OFFLINE, 1920 I_CmsEventListener.EVENT_REINDEX_ONLINE}); 1921 } 1922 1923 /** 1924 * Calls {@link I_CmsSearchFieldConfiguration#init()} for all registered field configurations. 1925 */ 1926 public void initializeFieldConfigurations() { 1927 1928 for (I_CmsSearchFieldConfiguration config : m_fieldConfigurations.values()) { 1929 config.init(); 1930 } 1931 1932 } 1933 1934 /** 1935 * Initializes all configured document types, index sources and search indexes.<p> 1936 * 1937 * This method needs to be called if after a change in the index configuration has been made. 1938 */ 1939 public void initializeIndexes() { 1940 1941 initAvailableDocumentTypes(); 1942 initIndexSources(); 1943 initSearchIndexes(); 1944 } 1945 1946 /** 1947 * Initialize the offline index handler, require after an offline index has been added.<p> 1948 */ 1949 public void initOfflineIndexes() { 1950 1951 // check which indexes are configured as offline indexes 1952 List<I_CmsSearchIndex> offlineIndexes = new ArrayList<I_CmsSearchIndex>(); 1953 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 1954 while (i.hasNext()) { 1955 I_CmsSearchIndex index = i.next(); 1956 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 1957 // this is an offline index 1958 offlineIndexes.add(index); 1959 } 1960 } 1961 m_offlineIndexes = offlineIndexes; 1962 m_offlineHandler.initialize(); 1963 1964 } 1965 1966 /** 1967 * Initializes the spell check index.<p> 1968 * 1969 * @param adminCms the ROOT_ADMIN cms context 1970 */ 1971 public void initSpellcheckIndex(CmsObject adminCms) { 1972 1973 if (CmsSpellcheckDictionaryIndexer.updatingIndexNecessesary(adminCms)) { 1974 final CmsSolrSpellchecker spellchecker = OpenCms.getSearchManager().getSolrDictionary(); 1975 if (spellchecker != null) { 1976 1977 Runnable initRunner = new Runnable() { 1978 1979 public void run() { 1980 1981 try { 1982 spellchecker.parseAndAddDictionaries(adminCms); 1983 } catch (CmsRoleViolationException e) { 1984 LOG.error(e.getLocalizedMessage(), e); 1985 } 1986 } 1987 }; 1988 new Thread(initRunner).start(); 1989 } 1990 } 1991 } 1992 1993 /** 1994 * Returns if the offline indexing is paused.<p> 1995 * 1996 * @return <code>true</code> if the offline indexing is paused 1997 */ 1998 public boolean isOfflineIndexingPaused() { 1999 2000 return m_offlineUpdateFrequency == Long.MAX_VALUE; 2001 } 2002 2003 /** 2004 * Updates the indexes from as a scheduled job.<p> 2005 * 2006 * @param cms the OpenCms user context to use when reading resources from the VFS 2007 * @param parameters the parameters for the scheduled job 2008 * 2009 * @throws Exception if something goes wrong 2010 * 2011 * @return the String to write in the scheduler log 2012 * 2013 * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) 2014 */ 2015 public String launch(CmsObject cms, Map<String, String> parameters) throws Exception { 2016 2017 CmsSearchManager manager = OpenCms.getSearchManager(); 2018 2019 I_CmsReport report = null; 2020 boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); 2021 2022 if (writeLog) { 2023 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 2024 } 2025 2026 List<String> updateList = null; 2027 String indexList = parameters.get(JOB_PARAM_INDEXLIST); 2028 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { 2029 // index list has been provided as job parameter 2030 updateList = new ArrayList<String>(); 2031 String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); 2032 for (int i = 0; i < indexNames.length; i++) { 2033 // check if the index actually exists 2034 if (manager.getIndex(indexNames[i]) != null) { 2035 updateList.add(indexNames[i]); 2036 } else { 2037 if (LOG.isWarnEnabled()) { 2038 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); 2039 } 2040 } 2041 } 2042 } 2043 2044 long startTime = System.currentTimeMillis(); 2045 2046 if (updateList == null) { 2047 // all indexes need to be updated 2048 manager.rebuildAllIndexes(report); 2049 } else { 2050 // rebuild only the selected indexes 2051 manager.rebuildIndexes(updateList, report); 2052 } 2053 2054 long runTime = System.currentTimeMillis() - startTime; 2055 2056 String finishMessage = Messages.get().getBundle().key( 2057 Messages.LOG_REBUILD_INDEXES_FINISHED_1, 2058 CmsStringUtil.formatRuntime(runTime)); 2059 2060 if (LOG.isInfoEnabled()) { 2061 LOG.info(finishMessage); 2062 } 2063 return finishMessage; 2064 } 2065 2066 /** 2067 * Pauses the offline indexing and returns a pause request id that has to be used for resuming offline indexing again.<p> 2068 * May take some time, because the indexes are updated first.<p> 2069 * 2070 *@return the pause request id. The id has to be given to the {@link #resumeOfflineIndexing(CmsUUID)} method to resume offline indexing. 2071 */ 2072 public CmsUUID pauseOfflineIndexing() { 2073 2074 CmsUUID pauseId = new CmsUUID(); 2075 synchronized (m_pauseRequests) { 2076 if (m_pauseRequests.isEmpty()) { 2077 LOG.info("Pausing offline indexing."); 2078 m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; 2079 m_offlineUpdateFrequency = Long.MAX_VALUE; 2080 updateOfflineIndexes(0); 2081 } 2082 m_pauseRequests.add(pauseId); 2083 if (LOG.isDebugEnabled()) { 2084 LOG.debug("Added pause request with id " + pauseId); 2085 } 2086 } 2087 return pauseId; 2088 } 2089 2090 /** 2091 * Rebuilds (if required creates) all configured indexes.<p> 2092 * 2093 * @param report the report object to write messages (or <code>null</code>) 2094 * 2095 * @throws CmsException if something goes wrong 2096 */ 2097 public void rebuildAllIndexes(I_CmsReport report) throws CmsException { 2098 2099 OFFLINE_LOCK.lock(true); 2100 try { 2101 ONLINE_LOCK.lock(true); 2102 try { 2103 2104 CmsMessageContainer container = null; 2105 for (int i = 0, n = m_indexes.size(); i < n; i++) { 2106 // iterate all configured search indexes 2107 I_CmsSearchIndex searchIndex = m_indexes.get(i); 2108 try { 2109 // update the index 2110 updateIndex(searchIndex, report, null); 2111 } catch (CmsException e) { 2112 container = new CmsMessageContainer( 2113 Messages.get(), 2114 Messages.ERR_INDEX_REBUILD_ALL_1, 2115 new Object[] {searchIndex.getName()}); 2116 LOG.error( 2117 Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), 2118 e); 2119 } 2120 } 2121 // clean up the extraction result cache 2122 cleanExtractionCache(); 2123 if (container != null) { 2124 // throw stored exception 2125 throw new CmsSearchException(container); 2126 } 2127 } finally { 2128 ONLINE_LOCK.unlock(); 2129 } 2130 } finally { 2131 OFFLINE_LOCK.unlock(); 2132 } 2133 } 2134 2135 /** 2136 * Rebuilds (if required creates) the index with the given name.<p> 2137 * 2138 * @param indexName the name of the index to rebuild 2139 * @param report the report object to write messages (or <code>null</code>) 2140 * 2141 * @throws CmsException if something goes wrong 2142 */ 2143 public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { 2144 2145 I_CmsSearchIndex index = getIndex(indexName); 2146 CmsPriorityLock lock = I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode()) 2147 ? OFFLINE_LOCK 2148 : ONLINE_LOCK; 2149 lock.lock(true); 2150 try { 2151 // update the index 2152 updateIndex(index, report, null); 2153 // clean up the extraction result cache 2154 cleanExtractionCache(); 2155 } finally { 2156 lock.unlock(); 2157 2158 } 2159 } 2160 2161 /** 2162 * Rebuilds (if required creates) the List of indexes with the given name.<p> 2163 * 2164 * @param indexNames the names (String) of the index to rebuild 2165 * @param report the report object to write messages (or <code>null</code>) 2166 * 2167 * @throws CmsException if something goes wrong 2168 */ 2169 public void rebuildIndexes(List<String> indexNames, I_CmsReport report) throws CmsException { 2170 2171 Iterator<String> i = indexNames.iterator(); 2172 while (i.hasNext()) { 2173 String indexName = i.next(); 2174 // get the search index by name 2175 I_CmsSearchIndex index = getIndex(indexName); 2176 if (index != null) { 2177 CmsPriorityLock lock = I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode()) 2178 ? OFFLINE_LOCK 2179 : ONLINE_LOCK; 2180 try { 2181 lock.lock(true); 2182 updateIndex(index, report, null); 2183 } finally { 2184 lock.unlock(); 2185 2186 } 2187 } else { 2188 if (LOG.isWarnEnabled()) { 2189 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 2190 } 2191 } 2192 } 2193 // clean up the extraction result cache 2194 cleanExtractionCache(); 2195 } 2196 2197 /** 2198 * Registers a new Solr core for the given index.<p> 2199 * 2200 * @param index the index to register a new Solr core for 2201 * 2202 * @throws CmsConfigurationException if no Solr server is configured 2203 */ 2204 @SuppressWarnings("resource") 2205 public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { 2206 2207 if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { 2208 // No solr server configured 2209 throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); 2210 } 2211 2212 if (index.getServerUrl() != null) { // Use the index-specific Solr-Server if present. 2213 index.setSolrServer(new Builder().withBaseSolrUrl(index.getServerUrl()).build()); 2214 } else if (m_solrConfig.getServerUrl() != null) { // Use the globally configured external Solr-Server if present. 2215 // HTTP Server configured 2216 // TODO Implement multi core support for HTTP server 2217 // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml 2218 index.setSolrServer(new Builder().withBaseSolrUrl(m_solrConfig.getServerUrl()).build()); 2219 } else { // Default to the embedded Solr Server 2220 2221 // get the core container that contains one core for each configured index 2222 if (m_coreContainer == null) { 2223 m_coreContainer = createCoreContainer(); 2224 } 2225 2226 // unload the existing core if it exists to avoid problems with forced unlock. 2227 if (m_coreContainer.getAllCoreNames().contains(index.getCoreName())) { 2228 m_coreContainer.unload(index.getCoreName(), false, false, true); 2229 } 2230 // ensure that all locks on the index are gone 2231 ensureIndexIsUnlocked(index.getPath()); 2232 2233 // load the core to the container 2234 File dataDir = new File(index.getPath()); 2235 if (!dataDir.exists()) { 2236 dataDir.mkdirs(); 2237 if (CmsLog.INIT.isInfoEnabled()) { 2238 CmsLog.INIT.info( 2239 Messages.get().getBundle().key( 2240 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2241 index.getName(), 2242 index.getPath())); 2243 } 2244 } 2245 File instanceDir = new File( 2246 m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); 2247 if (!instanceDir.exists()) { 2248 instanceDir.mkdirs(); 2249 if (CmsLog.INIT.isInfoEnabled()) { 2250 CmsLog.INIT.info( 2251 Messages.get().getBundle().key( 2252 Messages.INIT_SOLR_INDEX_DIR_CREATED_2, 2253 index.getName(), 2254 index.getPath())); 2255 } 2256 } 2257 2258 // create the core 2259 // TODO: suboptimal - forces always the same schema 2260 SolrCore core = null; 2261 try { 2262 // creation includes registration. 2263 // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); 2264 Map<String, String> properties = new HashMap<String, String>(3); 2265 properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); 2266 properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); 2267 core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties, false); 2268 } catch (NullPointerException e) { 2269 if (core != null) { 2270 core.close(); 2271 } 2272 throw new CmsConfigurationException( 2273 Messages.get().container( 2274 Messages.ERR_SOLR_SERVER_NOT_CREATED_3, 2275 index.getName() + " (" + index.getCoreName() + ")", 2276 index.getPath(), 2277 m_solrConfig.getSolrConfigFile().getAbsolutePath()), 2278 e); 2279 } 2280 2281 if (index.isNoSolrServerSet()) { 2282 index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); 2283 } 2284 if (CmsLog.INIT.isInfoEnabled()) { 2285 CmsLog.INIT.info( 2286 Messages.get().getBundle().key( 2287 Messages.INIT_SOLR_SERVER_CREATED_1, 2288 index.getName() + " (" + index.getCoreName() + ")")); 2289 } 2290 } 2291 } 2292 2293 /** 2294 * Removes this field configuration from the OpenCms configuration (if it is not used any more).<p> 2295 * 2296 * @param fieldConfiguration the field configuration to remove from the configuration 2297 * 2298 * @return true if remove was successful, false if preconditions for removal are ok but the given 2299 * field configuration was unknown to the manager. 2300 * 2301 * @throws CmsIllegalStateException if the given field configuration is still used by at least one 2302 * <code>{@link I_CmsSearchIndex}</code>. 2303 * 2304 */ 2305 public boolean removeSearchFieldConfiguration(I_CmsSearchFieldConfiguration fieldConfiguration) 2306 throws CmsIllegalStateException { 2307 2308 // never remove the standard field configuration 2309 if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { 2310 throw new CmsIllegalStateException( 2311 Messages.get().container( 2312 Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, 2313 fieldConfiguration.getName())); 2314 } 2315 // validation if removal will be granted 2316 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2317 I_CmsSearchIndex idx; 2318 // the list for collecting indexes that use the given field configuration 2319 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2320 I_CmsSearchFieldConfiguration refFieldConfig; 2321 while (itIndexes.hasNext()) { 2322 idx = itIndexes.next(); 2323 refFieldConfig = idx.getFieldConfiguration(); 2324 if (refFieldConfig.equals(fieldConfiguration)) { 2325 referrers.add(idx); 2326 } 2327 } 2328 if (referrers.size() > 0) { 2329 throw new CmsIllegalStateException( 2330 Messages.get().container( 2331 Messages.ERR_INDEX_CONFIGURATION_DELETE_2, 2332 fieldConfiguration.getName(), 2333 referrers.toString())); 2334 } 2335 2336 // remove operation (no exception) 2337 return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; 2338 2339 } 2340 2341 /** 2342 * Removes a search field from the field configuration.<p> 2343 * 2344 * @param fieldConfiguration the field configuration 2345 * @param field field to remove from the field configuration 2346 * 2347 * @return true if remove was successful, false if preconditions for removal are ok but the given 2348 * field was unknown. 2349 */ 2350 public boolean removeSearchFieldConfigurationField( 2351 I_CmsSearchFieldConfiguration fieldConfiguration, 2352 CmsSearchField field) { 2353 2354 if (LOG.isInfoEnabled()) { 2355 LOG.info( 2356 Messages.get().getBundle().key( 2357 Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, 2358 field.getName(), 2359 fieldConfiguration.getName())); 2360 } 2361 2362 return fieldConfiguration.getFields().remove(field); 2363 } 2364 2365 /** 2366 * Removes a search field mapping from the given field.<p> 2367 * 2368 * @param field the field 2369 * @param mapping mapping to remove from the field 2370 * 2371 * @return true if remove was successful, false if preconditions for removal are ok but the given 2372 * mapping was unknown. 2373 * 2374 * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. 2375 */ 2376 public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) 2377 throws CmsIllegalStateException { 2378 2379 if (field.getMappings().size() < 2) { 2380 throw new CmsIllegalStateException( 2381 Messages.get().container( 2382 Messages.ERR_FIELD_MAPPING_DELETE_2, 2383 mapping.getType().toString(), 2384 field.getName())); 2385 } else { 2386 2387 if (LOG.isInfoEnabled()) { 2388 LOG.info( 2389 Messages.get().getBundle().key( 2390 Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, 2391 mapping.toString(), 2392 field.getName())); 2393 } 2394 return field.getMappings().remove(mapping); 2395 } 2396 } 2397 2398 /** 2399 * Removes a search index from the configuration.<p> 2400 * 2401 * @param searchIndex the search index to remove 2402 */ 2403 public void removeSearchIndex(I_CmsSearchIndex searchIndex) { 2404 2405 // shut down index to remove potential config files of Solr indexes 2406 searchIndex.shutDown(); 2407 if (searchIndex instanceof CmsSolrIndex) { 2408 CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; 2409 m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); 2410 } 2411 m_indexes.remove(searchIndex); 2412 initOfflineIndexes(); 2413 2414 if (LOG.isInfoEnabled()) { 2415 LOG.info( 2416 Messages.get().getBundle().key( 2417 Messages.LOG_REMOVE_SEARCH_INDEX_2, 2418 searchIndex.getName(), 2419 searchIndex.getProject())); 2420 } 2421 } 2422 2423 /** 2424 * Removes all indexes included in the given list (which must contain the name of an index to remove).<p> 2425 * 2426 * @param indexNames the names of the index to remove 2427 */ 2428 public void removeSearchIndexes(List<String> indexNames) { 2429 2430 Iterator<String> i = indexNames.iterator(); 2431 while (i.hasNext()) { 2432 String indexName = i.next(); 2433 // get the search index by name 2434 I_CmsSearchIndex index = getIndex(indexName); 2435 if (index != null) { 2436 // remove the index 2437 removeSearchIndex(index); 2438 } else { 2439 if (LOG.isWarnEnabled()) { 2440 LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); 2441 } 2442 } 2443 } 2444 } 2445 2446 /** 2447 * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p> 2448 * 2449 * @param indexsource the indexsource to remove from the configuration 2450 * 2451 * @return true if remove was successful, false if preconditions for removal are ok but the given 2452 * searchindex was unknown to the manager. 2453 * 2454 * @throws CmsIllegalStateException if the given indexsource is still used by at least one 2455 * <code>{@link I_CmsSearchIndex}</code>. 2456 * 2457 */ 2458 public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { 2459 2460 // validation if removal will be granted 2461 Iterator<I_CmsSearchIndex> itIndexes = m_indexes.iterator(); 2462 I_CmsSearchIndex idx; 2463 // the list for collecting indexes that use the given index source 2464 List<I_CmsSearchIndex> referrers = new ArrayList<I_CmsSearchIndex>(); 2465 // the current list of referred index sources of the iterated index 2466 List<CmsSearchIndexSource> refsources; 2467 while (itIndexes.hasNext()) { 2468 idx = itIndexes.next(); 2469 refsources = idx.getSources(); 2470 if (refsources != null) { 2471 if (refsources.contains(indexsource)) { 2472 referrers.add(idx); 2473 } 2474 } 2475 } 2476 if (referrers.size() > 0) { 2477 throw new CmsIllegalStateException( 2478 Messages.get().container( 2479 Messages.ERR_INDEX_SOURCE_DELETE_2, 2480 indexsource.getName(), 2481 referrers.toString())); 2482 } 2483 2484 // remove operation (no exception) 2485 return m_indexSources.remove(indexsource.getName()) != null; 2486 2487 } 2488 2489 /** 2490 * Resumes offline indexing if it was paused and no pause for another pauseId is still present.<p> 2491 * @param pauseId the id of the pause request, which now allows for resuming. 2492 */ 2493 public void resumeOfflineIndexing(CmsUUID pauseId) { 2494 2495 synchronized (m_pauseRequests) { 2496 if (!m_pauseRequests.contains(pauseId)) { 2497 try { 2498 throw new IllegalArgumentException(); 2499 } catch (IllegalArgumentException e) { 2500 LOG.warn("Cannot resume for pause request " + pauseId + ". The request id is unknown.", e); 2501 } 2502 } else { 2503 m_pauseRequests.remove(pauseId); 2504 if (LOG.isDebugEnabled()) { 2505 LOG.debug( 2506 "Removed pause request " 2507 + pauseId 2508 + " from pause requests. Remaining pauses are: " 2509 + m_pauseRequests); 2510 } 2511 if (m_pauseRequests.isEmpty()) { 2512 LOG.info("Resuming offline indexing."); 2513 setOfflineUpdateFrequency( 2514 m_configuredOfflineIndexingFrequency > 0 2515 ? m_configuredOfflineIndexingFrequency 2516 : DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2517 } 2518 } 2519 } 2520 } 2521 2522 /** 2523 * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p> 2524 * 2525 * @param value the name of the directory below WEB-INF/ where the search indexes are stored 2526 */ 2527 public void setDirectory(String value) { 2528 2529 m_path = value; 2530 } 2531 2532 /** 2533 * Sets the maximum age a text extraction result is kept in the cache (in hours).<p> 2534 * 2535 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2536 */ 2537 public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { 2538 2539 m_extractionCacheMaxAge = extractionCacheMaxAge; 2540 } 2541 2542 /** 2543 * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p> 2544 * 2545 * @param extractionCacheMaxAge the maximum age for a text extraction result to set 2546 */ 2547 public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { 2548 2549 try { 2550 setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); 2551 } catch (NumberFormatException e) { 2552 LOG.error( 2553 Messages.get().getBundle().key( 2554 Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, 2555 extractionCacheMaxAge, 2556 Float.valueOf(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), 2557 e); 2558 setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); 2559 } 2560 } 2561 2562 /** 2563 * Sets the unlock mode during indexing.<p> 2564 * 2565 * @param value the value 2566 */ 2567 public void setForceunlock(String value) { 2568 2569 m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); 2570 } 2571 2572 /** 2573 * Sets the highlighter.<p> 2574 * 2575 * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p> 2576 * 2577 * @param highlighter the package/class name of the highlighter 2578 */ 2579 public void setHighlighter(String highlighter) { 2580 2581 try { 2582 m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); 2583 } catch (Exception e) { 2584 m_highlighter = null; 2585 LOG.error(e.getLocalizedMessage(), e); 2586 } 2587 } 2588 2589 /** 2590 * Sets the seconds to wait for an index lock during an update operation.<p> 2591 * 2592 * @param value the seconds to wait for an index lock during an update operation 2593 */ 2594 public void setIndexLockMaxWaitSeconds(int value) { 2595 2596 m_indexLockMaxWaitSeconds = value; 2597 } 2598 2599 /** 2600 * Sets the max. excerpt length.<p> 2601 * 2602 * @param maxExcerptLength the max. excerpt length to set 2603 */ 2604 public void setMaxExcerptLength(int maxExcerptLength) { 2605 2606 m_maxExcerptLength = maxExcerptLength; 2607 } 2608 2609 /** 2610 * Sets the max. excerpt length as a String.<p> 2611 * 2612 * @param maxExcerptLength the max. excerpt length to set 2613 */ 2614 public void setMaxExcerptLength(String maxExcerptLength) { 2615 2616 try { 2617 setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); 2618 } catch (Exception e) { 2619 LOG.error( 2620 Messages.get().getBundle().key( 2621 Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, 2622 maxExcerptLength, 2623 Integer.valueOf(DEFAULT_EXCERPT_LENGTH)), 2624 e); 2625 setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); 2626 } 2627 } 2628 2629 /** 2630 * Sets the maximal wait time for offline index updates after edit operations.<p> 2631 * 2632 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2633 */ 2634 public void setMaxIndexWaitTime(long maxIndexWaitTime) { 2635 2636 m_maxIndexWaitTime = maxIndexWaitTime; 2637 } 2638 2639 /** 2640 * Sets the maximal wait time for offline index updates after edit operations.<p> 2641 * 2642 * @param maxIndexWaitTime the maximal wait time to set in milliseconds 2643 */ 2644 public void setMaxIndexWaitTime(String maxIndexWaitTime) { 2645 2646 try { 2647 setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); 2648 } catch (Exception e) { 2649 LOG.error( 2650 Messages.get().getBundle().key( 2651 Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, 2652 maxIndexWaitTime, 2653 Long.valueOf(DEFAULT_MAX_INDEX_WAITTIME)), 2654 e); 2655 setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); 2656 } 2657 } 2658 2659 /** 2660 * Sets the maximum number of modifications before a commit in the search index is triggered.<p> 2661 * 2662 * @param maxModificationsBeforeCommit the maximum number of modifications to set 2663 */ 2664 public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { 2665 2666 m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; 2667 } 2668 2669 /** 2670 * Sets the maximum number of modifications before a commit in the search index is triggered as a string.<p> 2671 * 2672 * @param value the maximum number of modifications to set 2673 */ 2674 public void setMaxModificationsBeforeCommit(String value) { 2675 2676 try { 2677 setMaxModificationsBeforeCommit(Integer.parseInt(value)); 2678 } catch (Exception e) { 2679 LOG.error( 2680 Messages.get().getBundle().key( 2681 Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, 2682 value, 2683 Integer.valueOf(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), 2684 e); 2685 setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); 2686 } 2687 } 2688 2689 /** 2690 * Sets the update frequency of the offline indexer in milliseconds.<p> 2691 * 2692 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2693 */ 2694 public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { 2695 2696 m_offlineUpdateFrequency = offlineUpdateFrequency; 2697 updateOfflineIndexes(0); 2698 } 2699 2700 /** 2701 * Sets the update frequency of the offline indexer in milliseconds.<p> 2702 * 2703 * @param offlineUpdateFrequency the update frequency in milliseconds to set 2704 */ 2705 public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { 2706 2707 try { 2708 setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); 2709 } catch (Exception e) { 2710 LOG.error( 2711 Messages.get().getBundle().key( 2712 Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, 2713 offlineUpdateFrequency, 2714 Long.valueOf(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), 2715 e); 2716 setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); 2717 } 2718 } 2719 2720 /** 2721 * Sets the Solr configuration.<p> 2722 * 2723 * @param config the Solr configuration 2724 */ 2725 public void setSolrServerConfiguration(CmsSolrConfiguration config) { 2726 2727 m_solrConfig = config; 2728 } 2729 2730 /** 2731 * Sets the timeout to abandon threads indexing a resource.<p> 2732 * 2733 * @param value the timeout in milliseconds 2734 */ 2735 public void setTimeout(long value) { 2736 2737 m_timeout = value; 2738 } 2739 2740 /** 2741 * Sets the timeout to abandon threads indexing a resource as a String.<p> 2742 * 2743 * @param value the timeout in milliseconds 2744 */ 2745 public void setTimeout(String value) { 2746 2747 try { 2748 setTimeout(Long.parseLong(value)); 2749 } catch (Exception e) { 2750 LOG.error( 2751 Messages.get().getBundle().key( 2752 Messages.LOG_PARSE_TIMEOUT_FAILED_2, 2753 value, 2754 Long.valueOf(DEFAULT_TIMEOUT)), 2755 e); 2756 setTimeout(DEFAULT_TIMEOUT); 2757 } 2758 } 2759 2760 /** 2761 * Shuts down the search manager.<p> 2762 * 2763 * This will cause all search indices to be shut down.<p> 2764 */ 2765 public void shutDown() { 2766 2767 m_instantPublishIndexQueue.shutdown(); 2768 2769 if (m_offlineIndexThread != null) { 2770 m_offlineIndexThread.shutDown(); 2771 } 2772 2773 if (m_offlineHandler != null) { 2774 OpenCms.removeCmsEventListener(m_offlineHandler); 2775 } 2776 2777 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 2778 while (i.hasNext()) { 2779 I_CmsSearchIndex index = i.next(); 2780 index.shutDown(); 2781 index = null; 2782 } 2783 m_indexes.clear(); 2784 2785 shutDownSolrContainer(); 2786 2787 if (CmsLog.INIT.isInfoEnabled()) { 2788 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); 2789 } 2790 } 2791 2792 /** 2793 * Updates all offline indexes.<p> 2794 * 2795 * Can be used to force an index update when it's not convenient to wait until the 2796 * offline update interval has eclipsed.<p> 2797 * 2798 * Since the offline indexes still need some time to update the new resources, 2799 * the method waits for at most the configurable <code>maxIndexWaitTime</code> 2800 * to ensure that updating is finished. 2801 * 2802 * @see #updateOfflineIndexes(long) 2803 * 2804 */ 2805 public void updateOfflineIndexes() { 2806 2807 updateOfflineIndexes(getMaxIndexWaitTime()); 2808 } 2809 2810 /** 2811 * Updates all offline indexes.<p> 2812 * 2813 * Can be used to force an index update when it's not convenient to wait until the 2814 * offline update interval has eclipsed.<p> 2815 * 2816 * Since the offline index will still need some time to update the new resources even if it runs directly, 2817 * a wait time of 2500 or so should be given in order to make sure the index finished updating. 2818 * 2819 * @param waitTime milliseconds to wait after the offline update index was notified of the changes 2820 */ 2821 public void updateOfflineIndexes(long waitTime) { 2822 2823 if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { 2824 // notify existing thread of update frequency change 2825 if (LOG.isDebugEnabled()) { 2826 LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); 2827 } 2828 m_offlineIndexThread.interrupt(); 2829 if (waitTime > 0) { 2830 m_offlineIndexThread.getWaitHandle().enter(waitTime); 2831 } 2832 } 2833 } 2834 2835 /** 2836 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2837 * We take transitive dependencies into account and handle cyclic dependencies correctly as well. 2838 * 2839 * @param adminCms an OpenCms user context with Admin permissions 2840 * @param updateResources the resources to be re-indexed 2841 * 2842 * @return the updated list of resource to re-index 2843 */ 2844 protected List<CmsPublishedResource> addAdditionallyAffectedResources( 2845 CmsObject adminCms, 2846 List<CmsPublishedResource> updateResources) { 2847 2848 if (updateResources.size() > 0) { 2849 Set<CmsPublishedResource> updateResourceSet = new HashSet<>(updateResources); 2850 Collection<CmsPublishedResource> resourcesToCheck = updateResourceSet; 2851 Collection<CmsPublishedResource> additionalResources = Collections.emptySet(); 2852 do { 2853 additionalResources = findRelatedContainerPages(adminCms, updateResourceSet, resourcesToCheck); 2854 additionalResources.addAll( 2855 addIndexContentRelatedResources(adminCms, updateResourceSet, resourcesToCheck)); 2856 updateResources.addAll(additionalResources); 2857 updateResourceSet.addAll(additionalResources); 2858 resourcesToCheck = additionalResources; 2859 } while (resourcesToCheck.size() > 0); 2860 } 2861 return updateResources; 2862 } 2863 2864 /** 2865 * Collects the resources whose indexed document depends on one of the updated resources.<p> 2866 * 2867 * @param adminCms an OpenCms user context with Admin permissions 2868 * @param updateResources the resources to be re-indexed 2869 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2870 * 2871 * @return the list of resources that need to be additionally re-index 2872 */ 2873 protected Collection<CmsPublishedResource> addIndexContentRelatedResources( 2874 CmsObject adminCms, 2875 Collection<CmsPublishedResource> updateResources, 2876 Collection<CmsPublishedResource> updateResourcesToCheck) { 2877 2878 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2879 for (CmsPublishedResource checkedRes : updateResourcesToCheck) { 2880 try { 2881 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(checkedRes.getStructureId()); 2882 filter = filter.filterType(CmsRelationType.INDEX_CONTENT); 2883 List<CmsRelation> relations = adminCms.readRelations(filter); 2884 for (CmsRelation relation : relations) { 2885 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2886 CmsPublishedResource additionalPubRes = new CmsPublishedResource(res); 2887 if (!updateResources.contains(additionalPubRes)) { 2888 additionalUpdateResources.add(additionalPubRes); 2889 } 2890 } 2891 } catch (CmsException e) { 2892 LOG.error(e.getLocalizedMessage(), e); 2893 } 2894 } 2895 return additionalUpdateResources; 2896 } 2897 2898 /** 2899 * Cleans up the extraction result cache.<p> 2900 */ 2901 protected void cleanExtractionCache() { 2902 2903 // clean up the extraction result cache 2904 m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); 2905 } 2906 2907 /** 2908 * Collects the related containerpages to the resources that have been published.<p> 2909 * 2910 * @param adminCms an OpenCms user context with Admin permissions 2911 * @param updateResources the resources to be re-indexed 2912 * @param updateResourcesToCheck the resources to check additionally affected resources for, subset of updateResources 2913 * 2914 * @return the list of resources that need to be additionally re-index 2915 */ 2916 protected Collection<CmsPublishedResource> findRelatedContainerPages( 2917 CmsObject adminCms, 2918 Collection<CmsPublishedResource> updateResources, 2919 Collection<CmsPublishedResource> updateResourcesToCheck) { 2920 2921 CmsResourceManager resMan = OpenCms.getResourceManager(); 2922 Collection<CmsPublishedResource> additionalUpdateResources = new HashSet<>(); 2923 2924 Set<CmsResource> containerPages = new HashSet<CmsResource>(); 2925 int containerPageTypeId = -1; 2926 try { 2927 containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); 2928 } catch (CmsLoaderException e) { 2929 // will happen during setup, when container page type is not available yet 2930 LOG.info(e.getLocalizedMessage(), e); 2931 } 2932 if (containerPageTypeId != -1) { 2933 for (CmsPublishedResource pubRes : updateResourcesToCheck) { 2934 try { 2935 if (resMan.getResourceType(pubRes.getType()) instanceof CmsResourceTypeXmlContent) { 2936 if (!isGroup(pubRes.getType())) { 2937 CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId( 2938 pubRes.getStructureId()).filterStrong(); 2939 List<CmsRelation> relations = adminCms.readRelations(filter); 2940 for (CmsRelation relation : relations) { 2941 CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); 2942 if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { 2943 containerPages.add(res); 2944 if (CmsDetailOnlyContainerUtil.isDetailContainersPage( 2945 adminCms, 2946 adminCms.getSitePath(res))) { 2947 addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); 2948 } 2949 } 2950 } 2951 } 2952 } 2953 if (containerPageTypeId == pubRes.getType()) { 2954 addDetailContent( 2955 adminCms, 2956 containerPages, 2957 adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); 2958 } 2959 } catch (CmsException e) { 2960 LOG.error(e.getLocalizedMessage(), e); 2961 } 2962 } 2963 // add all found container pages as published resource objects to the list 2964 for (CmsResource page : containerPages) { 2965 CmsPublishedResource pubCont = new CmsPublishedResource(page); 2966 if (!updateResources.contains(pubCont)) { 2967 // ensure container page is added only once 2968 additionalUpdateResources.add(pubCont); 2969 } 2970 } 2971 } 2972 return additionalUpdateResources; 2973 } 2974 2975 /** 2976 * Returns the set of names of all configured document types.<p> 2977 * 2978 * @return the set of names of all configured document types 2979 */ 2980 protected List<String> getDocumentTypes() { 2981 2982 return Collections.unmodifiableList(new ArrayList<String>(m_documentTypes.keySet())); 2983 } 2984 2985 /** 2986 * Returns the a offline project used for offline indexing.<p> 2987 * 2988 * @return the offline project if available 2989 */ 2990 protected CmsProject getOfflineIndexProject() { 2991 2992 CmsProject result = null; 2993 for (I_CmsSearchIndex index : m_offlineIndexes) { 2994 try { 2995 result = m_adminCms.readProject(index.getProject()); 2996 2997 if (!result.isOnlineProject()) { 2998 break; 2999 } 3000 } catch (Exception e) { 3001 // may be a missconfigured index, ignore 3002 LOG.error(e.getLocalizedMessage(), e); 3003 } 3004 } 3005 return result; 3006 } 3007 3008 /** 3009 * Returns a new thread manager for the indexing threads.<p> 3010 * 3011 * @return a new thread manager for the indexing threads 3012 */ 3013 protected CmsIndexingThreadManager getThreadManager() { 3014 3015 return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); 3016 } 3017 3018 /** 3019 * Initializes the available Cms resource types to be indexed.<p> 3020 * 3021 * A map stores document factories keyed by a string representing 3022 * a colon separated list of Cms resource types and/or mimetypes.<p> 3023 * 3024 * The keys of this map are used to trigger a document factory to convert 3025 * a Cms resource into a Lucene index document.<p> 3026 * 3027 * A document factory is a class implementing the interface 3028 * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p> 3029 */ 3030 protected void initAvailableDocumentTypes() { 3031 3032 CmsSearchDocumentType documenttype = null; 3033 String className = null; 3034 String name = null; 3035 I_CmsDocumentFactory documentFactory = null; 3036 List<String> resourceTypes = null; 3037 List<String> mimeTypes = null; 3038 Class<?> c = null; 3039 3040 m_documentTypes = new LinkedHashMap<String, Map<String, I_CmsDocumentFactory>>(); 3041 3042 for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { 3043 3044 documenttype = m_documentTypeConfigs.get(i); 3045 name = documenttype.getName(); 3046 3047 try { 3048 className = documenttype.getClassName(); 3049 resourceTypes = documenttype.getResourceTypes(); 3050 mimeTypes = documenttype.getMimeTypes(); 3051 3052 if (name == null) { 3053 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); 3054 } 3055 if (className == null) { 3056 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); 3057 } 3058 if (resourceTypes.size() == 0) { 3059 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); 3060 } 3061 3062 try { 3063 c = Class.forName(className); 3064 documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( 3065 new Object[] {name}); 3066 } catch (ClassNotFoundException exc) { 3067 throw new CmsIndexException( 3068 Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), 3069 exc); 3070 } catch (Exception exc) { 3071 throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); 3072 } 3073 3074 if (documentFactory.isUsingCache()) { 3075 // init cache if used by the factory 3076 documentFactory.setCache(m_extractionResultCache); 3077 } 3078 3079 Map<String, I_CmsDocumentFactory> matchingTypes = new HashMap<>(); 3080 for (Iterator<String> keyIt = documentFactory.getDocumentKeys( 3081 resourceTypes, 3082 mimeTypes).iterator(); keyIt.hasNext();) { 3083 String key = keyIt.next(); 3084 matchingTypes.put(key, documentFactory); 3085 m_extractionKeys.add(key); 3086 } 3087 m_documentTypes.put(name, matchingTypes); 3088 3089 } catch (CmsException e) { 3090 if (LOG.isWarnEnabled()) { 3091 LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); 3092 } 3093 } 3094 } 3095 } 3096 3097 /** 3098 * Initializes the index sources. 3099 */ 3100 protected void initIndexSources() { 3101 3102 for (CmsSearchIndexSource source : m_indexSources.values()) { 3103 source.init(); 3104 } 3105 } 3106 3107 /** 3108 * Initializes the configured search indexes.<p> 3109 * 3110 * This initializes also the list of Cms resources types 3111 * to be indexed by an index source.<p> 3112 */ 3113 protected void initSearchIndexes() { 3114 3115 I_CmsSearchIndex index = null; 3116 for (int i = 0, n = m_indexes.size(); i < n; i++) { 3117 index = m_indexes.get(i); 3118 // reset disabled flag 3119 index.setEnabled(true); 3120 // check if the index has been configured correctly 3121 if (index.checkConfiguration(m_adminCms)) { 3122 // the index is configured correctly 3123 try { 3124 index.initialize(); 3125 } catch (Exception e) { 3126 if (CmsLog.INIT.isWarnEnabled()) { 3127 // in this case the index will be disabled 3128 CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); 3129 } 3130 } 3131 } 3132 // output a log message if the index was successfully configured or not 3133 if (CmsLog.INIT.isInfoEnabled()) { 3134 if (index.isEnabled()) { 3135 CmsLog.INIT.info( 3136 Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); 3137 } else { 3138 CmsLog.INIT.warn( 3139 Messages.get().getBundle().key( 3140 Messages.INIT_INDEX_NOT_CONFIGURED_2, 3141 index, 3142 index.getProject())); 3143 } 3144 } 3145 } 3146 } 3147 3148 /** 3149 * Checks, if the index should be rebuilt/updated at all by the search manager. 3150 * @param index the index to check. 3151 * @return a flag, indicating if the index should be rebuilt/updated at all. 3152 */ 3153 protected boolean shouldUpdateAtAll(I_CmsSearchIndex index) { 3154 3155 if (I_CmsSearchIndex.REBUILD_MODE_NEVER.equals(index.getRebuildMode())) { 3156 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIP_REBUILD_FOR_MODE_NEVER_1, index.getName())); 3157 return false; 3158 } else { 3159 return true; 3160 } 3161 3162 } 3163 3164 /** 3165 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code> 3166 * after resources have been published.<p> 3167 * 3168 * @param adminCms an OpenCms user context with Admin permissions 3169 * @param publishHistoryId the history ID of the published project 3170 * @param report the report to write the output to 3171 */ 3172 protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { 3173 3174 int oldPriority = Thread.currentThread().getPriority(); 3175 ONLINE_LOCK.lock(true); 3176 try { 3177 Thread.currentThread().setPriority(Thread.MIN_PRIORITY); 3178 List<CmsPublishedResource> publishedResources; 3179 try { 3180 // read the list of all published resources 3181 publishedResources = adminCms.readPublishedResources(publishHistoryId); 3182 } catch (CmsException e) { 3183 LOG.error( 3184 Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), 3185 e); 3186 return; 3187 } 3188 List<CmsPublishedResource> updateResources = computeUpdateResources(adminCms, publishedResources); 3189 updateAllIndexes(adminCms, updateResources, report); 3190 } finally { 3191 ONLINE_LOCK.unlock(); 3192 Thread.currentThread().setPriority(oldPriority); 3193 } 3194 } 3195 3196 /** 3197 * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>.<p> 3198 * 3199 * @param adminCms an OpenCms user context with Admin permissions 3200 * @param updateResources the resources to update 3201 * @param report the report to write the output to 3202 */ 3203 protected void updateAllIndexes( 3204 CmsObject adminCms, 3205 List<CmsPublishedResource> updateResources, 3206 I_CmsReport report) { 3207 3208 try { 3209 ONLINE_LOCK.lock(true); 3210 if (!updateResources.isEmpty()) { 3211 // sort the resource to update 3212 Collections.sort(updateResources); 3213 // only update the indexes if the list of remaining published resources is not empty 3214 Iterator<I_CmsSearchIndex> i = m_indexes.iterator(); 3215 while (i.hasNext()) { 3216 I_CmsSearchIndex index = i.next(); 3217 if (I_CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { 3218 // only update indexes which have the rebuild mode set to "auto" 3219 try { 3220 updateIndex(index, report, updateResources); 3221 } catch (CmsException e) { 3222 LOG.error( 3223 Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), 3224 e); 3225 } 3226 } 3227 } 3228 } 3229 // clean up the extraction result cache 3230 cleanExtractionCache(); 3231 } finally { 3232 ONLINE_LOCK.unlock(); 3233 } 3234 3235 } 3236 3237 /** 3238 * Updates (if required creates) the index with the given name.<p> 3239 * 3240 * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be 3241 * incrementally updated for these resources only. If this List is <code>null</code> or empty, 3242 * the index will be fully rebuild.<p> 3243 * 3244 * @param index the index to update or rebuild 3245 * @param report the report to write output messages to 3246 * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3247 * 3248 * @throws CmsException if something goes wrong 3249 */ 3250 protected void updateIndex(I_CmsSearchIndex index, I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) 3251 throws CmsException { 3252 3253 if (shouldUpdateAtAll(index)) { 3254 CmsPriorityLock lock = I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode()) 3255 ? OFFLINE_LOCK 3256 : ONLINE_LOCK; 3257 try { 3258 lock.lock(true); 3259 3260 // copy the stored admin context for the indexing 3261 CmsObject cms = OpenCms.initCmsObject(m_adminCms); 3262 // make sure a report is available 3263 if (report == null) { 3264 report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); 3265 } 3266 3267 // check if the index has been configured correctly 3268 if (!index.checkConfiguration(cms)) { 3269 // the index is disabled 3270 return; 3271 } 3272 3273 // set site root and project for this index 3274 cms.getRequestContext().setSiteRoot("/"); 3275 // switch to the index project 3276 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3277 3278 if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { 3279 // rebuild the complete index 3280 3281 updateIndexCompletely(cms, index, report); 3282 } else { 3283 updateIndexIncremental(cms, index, report, resourcesToIndex); 3284 } 3285 } finally { 3286 lock.unlock(); 3287 } 3288 } 3289 } 3290 3291 /** 3292 * The method updates all OpenCms documents that are indexed. 3293 * @param cms the OpenCms user context to use for accessing the VFS 3294 * @param index the index to update 3295 * @param report the report to write output messages to 3296 * @throws CmsIndexException thrown if indexing fails for some reason 3297 */ 3298 @SuppressWarnings("null") 3299 protected void updateIndexCompletely(CmsObject cms, I_CmsSearchIndex index, I_CmsReport report) 3300 throws CmsIndexException { 3301 3302 // create a new thread manager for the indexing threads 3303 CmsIndexingThreadManager threadManager = getThreadManager(); 3304 3305 boolean isOfflineIndex = false; 3306 if (I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { 3307 // disable offline indexing while the complete index is rebuild 3308 isOfflineIndex = true; 3309 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_MANUAL); 3310 // re-initialize the offline indexes, this will disable this offline index 3311 initOfflineIndexes(); 3312 } 3313 3314 I_CmsIndexWriter writer = null; 3315 try { 3316 // create a backup of the existing index 3317 CmsSearchIndex indexInternal = null; 3318 String backup = null; 3319 if (index instanceof CmsSearchIndex) { 3320 indexInternal = (CmsSearchIndex)index; 3321 backup = indexInternal.createIndexBackup(); 3322 if (backup != null) { 3323 indexInternal.indexSearcherOpen(backup); 3324 } 3325 } 3326 3327 // create a new index writer 3328 writer = index.getIndexWriter(report, true); 3329 if (writer instanceof I_CmsSolrIndexWriter) { 3330 try { 3331 ((I_CmsSolrIndexWriter)writer).deleteAllDocuments(); 3332 } catch (IOException e) { 3333 LOG.error(e.getMessage(), e); 3334 } 3335 } 3336 3337 // output start information on the report 3338 report.println( 3339 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), 3340 I_CmsReport.FORMAT_HEADLINE); 3341 3342 // iterate all configured index sources of this index 3343 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3344 while (sources.hasNext()) { 3345 // get the next index source 3346 CmsSearchIndexSource source = sources.next(); 3347 // create the indexer 3348 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3349 // new index creation, use all resources from the index source 3350 indexer.rebuildIndex(writer, threadManager, source); 3351 3352 // wait for indexing threads to finish 3353 while (threadManager.isRunning()) { 3354 try { 3355 Thread.sleep(500); 3356 } catch (InterruptedException e) { 3357 // just continue with the loop after interruption 3358 LOG.info(e.getLocalizedMessage(), e); 3359 } 3360 } 3361 3362 // commit and optimize the index after each index source has been finished 3363 try { 3364 writer.commit(); 3365 } catch (IOException e) { 3366 if (LOG.isWarnEnabled()) { 3367 LOG.warn( 3368 Messages.get().getBundle().key( 3369 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3370 index.getName(), 3371 index.getPath()), 3372 e); 3373 } 3374 } 3375 try { 3376 writer.optimize(); 3377 } catch (IOException e) { 3378 if (LOG.isWarnEnabled()) { 3379 LOG.warn( 3380 Messages.get().getBundle().key( 3381 Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, 3382 index.getName(), 3383 index.getPath()), 3384 e); 3385 } 3386 } 3387 } 3388 3389 // we are sure here that indexInternal is not null 3390 if (backup != null) { 3391 // remove the backup after the files have been re-indexed 3392 indexInternal.indexSearcherClose(); 3393 indexInternal.removeIndexBackup(backup); 3394 } 3395 3396 // output finish information on the report 3397 report.println( 3398 Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), 3399 I_CmsReport.FORMAT_HEADLINE); 3400 3401 } finally { 3402 if (writer != null) { 3403 try { 3404 writer.close(); 3405 } catch (IOException e) { 3406 if (LOG.isWarnEnabled()) { 3407 LOG.warn( 3408 Messages.get().getBundle().key( 3409 Messages.LOG_IO_INDEX_WRITER_CLOSE_2, 3410 index.getPath(), 3411 index.getName()), 3412 e); 3413 } 3414 } 3415 } 3416 if (isOfflineIndex) { 3417 // reset the mode of the offline index 3418 index.setRebuildMode(I_CmsSearchIndex.REBUILD_MODE_OFFLINE); 3419 // re-initialize the offline indexes, this will re-enable this index 3420 initOfflineIndexes(); 3421 } 3422 // index has changed - initialize the index searcher instance 3423 index.onIndexChanged(true); 3424 } 3425 3426 // show information about indexing runtime 3427 threadManager.reportStatistics(report); 3428 } 3429 3430 /** 3431 * Incrementally updates the given index.<p> 3432 * 3433 * @param cms the OpenCms user context to use for accessing the VFS 3434 * @param index the index to update 3435 * @param report the report to write output messages to 3436 * @param resourcesToIndex a list of <code>{@link CmsPublishedResource}</code> objects to update in the index 3437 * 3438 * @throws CmsException if something goes wrong 3439 */ 3440 protected void updateIndexIncremental( 3441 CmsObject cms, 3442 I_CmsSearchIndex index, 3443 I_CmsReport report, 3444 List<CmsPublishedResource> resourcesToIndex) 3445 throws CmsException { 3446 3447 CmsPriorityLock lock = I_CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode()) 3448 ? OFFLINE_LOCK 3449 : ONLINE_LOCK; 3450 lock.lock(true); 3451 try { 3452 // update the existing index 3453 List<CmsSearchIndexUpdateData> updateCollections = new ArrayList<CmsSearchIndexUpdateData>(); 3454 3455 boolean hasResourcesToDelete = false; 3456 boolean hasResourcesToUpdate = false; 3457 3458 // iterate all configured index sources of this index 3459 Iterator<CmsSearchIndexSource> sources = index.getSources().iterator(); 3460 while (sources.hasNext()) { 3461 // get the next index source 3462 CmsSearchIndexSource source = sources.next(); 3463 // create the indexer 3464 I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); 3465 // collect the resources to update 3466 CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); 3467 if (!updateData.isEmpty()) { 3468 // add the update collection to the internal pipeline 3469 updateCollections.add(updateData); 3470 hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); 3471 hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); 3472 } 3473 } 3474 3475 // only start index modification if required 3476 if (hasResourcesToDelete || hasResourcesToUpdate) { 3477 // output start information on the report 3478 report.println( 3479 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), 3480 I_CmsReport.FORMAT_HEADLINE); 3481 3482 I_CmsIndexWriter writer = null; 3483 try { 3484 // obtain an index writer that updates the current index 3485 writer = index.getIndexWriter(report, false); 3486 3487 if (hasResourcesToDelete) { 3488 // delete the resource from the index 3489 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3490 while (i.hasNext()) { 3491 CmsSearchIndexUpdateData updateCollection = i.next(); 3492 if (updateCollection.hasResourcesToDelete()) { 3493 updateCollection.getIndexer().deleteResources( 3494 writer, 3495 updateCollection.getResourcesToDelete()); 3496 } 3497 } 3498 } 3499 3500 if (hasResourcesToUpdate) { 3501 // create a new thread manager 3502 CmsIndexingThreadManager threadManager = getThreadManager(); 3503 3504 Iterator<CmsSearchIndexUpdateData> i = updateCollections.iterator(); 3505 while (i.hasNext()) { 3506 CmsSearchIndexUpdateData updateCollection = i.next(); 3507 if (updateCollection.hasResourceToUpdate()) { 3508 updateCollection.getIndexer().updateResources( 3509 writer, 3510 threadManager, 3511 updateCollection.getResourcesToUpdate()); 3512 } 3513 } 3514 3515 // wait for indexing threads to finish 3516 while (threadManager.isRunning()) { 3517 try { 3518 Thread.sleep(500); 3519 } catch (InterruptedException e) { 3520 // just continue with the loop after interruption 3521 LOG.info(e.getLocalizedMessage(), e); 3522 } 3523 } 3524 } 3525 } finally { 3526 // close the index writer 3527 if (writer != null) { 3528 try { 3529 writer.commit(); 3530 } catch (IOException e) { 3531 LOG.error( 3532 Messages.get().getBundle().key( 3533 Messages.LOG_IO_INDEX_WRITER_COMMIT_2, 3534 index.getName(), 3535 index.getPath()), 3536 e); 3537 } 3538 } 3539 // index has changed - initialize the index searcher instance 3540 index.onIndexChanged(false); 3541 } 3542 3543 // output finish information on the report 3544 report.println( 3545 Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), 3546 I_CmsReport.FORMAT_HEADLINE); 3547 } 3548 } finally { 3549 lock.unlock(); 3550 } 3551 } 3552 3553 /** 3554 * Updates the offline search indexes for the given list of resources.<p> 3555 * 3556 * @param report the report to write the index information to 3557 * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index 3558 */ 3559 protected void updateIndexOffline(I_CmsReport report, List<CmsPublishedResource> resourcesToIndex) { 3560 3561 CmsObject cms = m_adminCms; 3562 try { 3563 // copy the administration context for the indexing 3564 cms = OpenCms.initCmsObject(m_adminCms); 3565 // set site root and project for this index 3566 cms.getRequestContext().setSiteRoot("/"); 3567 } catch (CmsException e) { 3568 LOG.error(e.getLocalizedMessage(), e); 3569 } 3570 3571 Iterator<I_CmsSearchIndex> j = m_offlineIndexes.iterator(); 3572 while (j.hasNext()) { 3573 I_CmsSearchIndex index = j.next(); 3574 if (index.getSources() != null) { 3575 try { 3576 // switch to the index project 3577 cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); 3578 updateIndexIncremental(cms, index, report, resourcesToIndex); 3579 } catch (CmsException e) { 3580 LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); 3581 } 3582 } 3583 } 3584 } 3585 3586 /** 3587 * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.<p> 3588 * 3589 * @param adminCms the cms context 3590 * @param containerPages the containerpages 3591 * @param containerPage the container page site path 3592 */ 3593 private void addDetailContent(CmsObject adminCms, Set<CmsResource> containerPages, String containerPage) { 3594 3595 if (CmsDetailOnlyContainerUtil.isDetailContainersPage(adminCms, containerPage)) { 3596 3597 try { 3598 CmsResource detailRes = adminCms.readResource( 3599 CmsDetailOnlyContainerUtil.getDetailContentPath(containerPage), 3600 CmsResourceFilter.IGNORE_EXPIRATION); 3601 containerPages.add(detailRes); 3602 } catch (Throwable e) { 3603 if (LOG.isWarnEnabled()) { 3604 LOG.warn(e.getLocalizedMessage(), e); 3605 } 3606 } 3607 } 3608 } 3609 3610 private List<CmsPublishedResource> computeUpdateResources( 3611 CmsObject cms, 3612 List<CmsPublishedResource> publishedResources) { 3613 3614 Set<CmsUUID> bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); 3615 // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved 3616 3617 List<CmsPublishedResource> updateResources = new ArrayList<CmsPublishedResource>(); 3618 for (CmsPublishedResource res : publishedResources) { 3619 if (res.getState().isUnchanged()) { 3620 // unchanged resources don't need to be indexed after publish 3621 continue; 3622 } 3623 if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { 3624 if (updateResources.contains(res)) { 3625 // resource may have been added as a sibling of another resource 3626 // in this case we make sure to use the value from the publish list because of the "deleted" flag 3627 boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) 3628 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) 3629 || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); 3630 // check it this is a moved resource with source / target info, in this case we need both entries 3631 if (!hasMoved) { 3632 // if the resource was moved, we must contain both entries 3633 updateResources.remove(res); 3634 } 3635 // "equals()" implementation of published resource checks for id, 3636 // so the removed value may have a different "deleted" or "modified" status value 3637 updateResources.add(res); 3638 } else { 3639 // resource not yet contained in the list 3640 updateResources.add(res); 3641 // check for the siblings (not for deleted resources, these are already gone) 3642 if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { 3643 // this resource has siblings 3644 try { 3645 // read siblings from the online project 3646 List<CmsResource> siblings = cms.readSiblings(res.getRootPath(), CmsResourceFilter.ALL); 3647 Iterator<CmsResource> itSib = siblings.iterator(); 3648 while (itSib.hasNext()) { 3649 // check all siblings 3650 CmsResource sibling = itSib.next(); 3651 CmsPublishedResource sib = new CmsPublishedResource(sibling); 3652 if (!updateResources.contains(sib)) { 3653 // ensure sibling is added only once 3654 updateResources.add(sib); 3655 } 3656 } 3657 } catch (CmsException e) { 3658 // ignore, just use the original resource 3659 if (LOG.isWarnEnabled()) { 3660 LOG.warn( 3661 Messages.get().getBundle().key( 3662 Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, 3663 res.getRootPath()), 3664 e); 3665 } 3666 } 3667 } 3668 } 3669 } 3670 } 3671 3672 addAdditionallyAffectedResources(cms, updateResources); 3673 return updateResources; 3674 } 3675 3676 /** 3677 * Creates the Solr core container.<p> 3678 * 3679 * @return the created core container 3680 */ 3681 private CoreContainer createCoreContainer() { 3682 3683 CoreContainer container = null; 3684 try { 3685 // get the core container 3686 // still no core container: create it 3687 container = CoreContainer.createAndLoad( 3688 Paths.get(m_solrConfig.getHome()), 3689 m_solrConfig.getSolrFile().toPath()); 3690 if (CmsLog.INIT.isInfoEnabled()) { 3691 CmsLog.INIT.info( 3692 Messages.get().getBundle().key( 3693 Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, 3694 m_solrConfig.getHome(), 3695 m_solrConfig.getSolrFile().getName())); 3696 } 3697 } catch (Exception e) { 3698 LOG.error( 3699 Messages.get().getBundle().key( 3700 Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, 3701 m_solrConfig.getSolrFile().getAbsolutePath()), 3702 e); 3703 } 3704 return container; 3705 3706 } 3707 3708 /** 3709 * Remove write.lock file in the data directory to ensure the index is unlocked. 3710 * @param dataDir the data directory of the Solr index that should be unlocked. 3711 */ 3712 private void ensureIndexIsUnlocked(String dataDir) { 3713 3714 Collection<File> lockFiles = new ArrayList<File>(2); 3715 lockFiles.add( 3716 new File( 3717 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "index") + "write.lock")); 3718 lockFiles.add( 3719 new File( 3720 CmsFileUtil.addTrailingSeparator(CmsFileUtil.addTrailingSeparator(dataDir) + "spellcheck") 3721 + "write.lock")); 3722 for (File lockFile : lockFiles) { 3723 if (lockFile.exists()) { 3724 lockFile.delete(); 3725 LOG.warn( 3726 "Forcely unlocking index with data dir \"" 3727 + dataDir 3728 + "\" by removing file \"" 3729 + lockFile.getAbsolutePath() 3730 + "\"."); 3731 } 3732 } 3733 } 3734 3735 /** 3736 * Returns the report in the given event data, if <code>null</code> 3737 * a new log report is used.<p> 3738 * 3739 * @param event the event to get the report for 3740 * 3741 * @return the report 3742 */ 3743 private I_CmsReport getEventReport(CmsEvent event) { 3744 3745 I_CmsReport report = null; 3746 if (event.getData() != null) { 3747 report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); 3748 } 3749 if (report == null) { 3750 report = new CmsLogReport(Locale.ENGLISH, getClass()); 3751 } 3752 return report; 3753 } 3754 3755 /** 3756 * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.<p> 3757 * 3758 * @param publishedResources a list of published resources 3759 * 3760 * @return the set of structure ids that satisfy the condition above 3761 */ 3762 private Set<CmsUUID> getIdsOfPublishResourcesWhichAreBothNewAndDeleted( 3763 List<CmsPublishedResource> publishedResources) { 3764 3765 Set<CmsUUID> result = new HashSet<CmsUUID>(); 3766 Set<CmsUUID> deletedSet = new HashSet<CmsUUID>(); 3767 for (CmsPublishedResource pubRes : publishedResources) { 3768 if (pubRes.getState().isNew()) { 3769 result.add(pubRes.getStructureId()); 3770 } 3771 if (pubRes.getState().isDeleted()) { 3772 deletedSet.add(pubRes.getStructureId()); 3773 } 3774 } 3775 result.retainAll(deletedSet); 3776 return result; 3777 } 3778 3779 /** 3780 * Checks if the given type id belongs to a group type. 3781 * 3782 * @param type the type id to check 3783 * @return true if the type is a group type 3784 */ 3785 private boolean isGroup(int type) { 3786 3787 for (String groupType : groupTypes) { 3788 if (OpenCms.getResourceManager().matchResourceType(groupType, type)) { 3789 return true; 3790 } 3791 } 3792 return false; 3793 3794 } 3795 3796 /** 3797 * Shuts down the Solr core container.<p> 3798 */ 3799 private void shutDownSolrContainer() { 3800 3801 if (m_coreContainer != null) { 3802 for (SolrCore core : m_coreContainer.getCores()) { 3803 // do not unload spellcheck core because otherwise the core.properties file is removed 3804 // even when calling m_coreContainer.unload(core.getName(), false, false, false); 3805 if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { 3806 m_coreContainer.unload(core.getName(), false, false, true); 3807 } 3808 } 3809 m_coreContainer.shutdown(); 3810 if (CmsLog.INIT.isInfoEnabled()) { 3811 CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); 3812 } 3813 m_coreContainer = null; 3814 } 3815 } 3816 3817}