001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.db.CmsPublishedResource; 031import org.opencms.file.CmsObject; 032import org.opencms.file.CmsProject; 033import org.opencms.file.CmsResource; 034import org.opencms.file.CmsResourceFilter; 035import org.opencms.main.CmsException; 036import org.opencms.main.CmsLog; 037import org.opencms.report.I_CmsReport; 038import org.opencms.util.CmsUUID; 039 040import java.io.IOException; 041import java.util.ArrayList; 042import java.util.Iterator; 043import java.util.List; 044 045import org.apache.commons.logging.Log; 046 047/** 048 * An indexer indexing {@link CmsResource} based content from the OpenCms VFS.<p> 049 * 050 * @since 6.0.0 051 */ 052public class CmsVfsIndexer implements I_CmsIndexer { 053 054 /** The log object for this class. */ 055 private static final Log LOG = CmsLog.getLog(CmsVfsIndexer.class); 056 057 // Note: The following member variables must all be "protected" (not "private") since 058 // in case the indexer is extended, the factory method "newInstance()" needs to set them. 059 060 /** The OpenCms user context to use when reading resources from the VFS during indexing. */ 061 protected CmsObject m_cms; 062 063 /** The index. */ 064 protected I_CmsSearchIndex m_index; 065 066 /** The report. */ 067 protected I_CmsReport m_report; 068 069 /** 070 * @see org.opencms.search.I_CmsIndexer#deleteResources(org.opencms.search.I_CmsIndexWriter, java.util.List) 071 */ 072 public void deleteResources(I_CmsIndexWriter indexWriter, List<CmsPublishedResource> resourcesToDelete) { 073 074 if ((resourcesToDelete == null) || resourcesToDelete.isEmpty()) { 075 // nothing to delete 076 return; 077 } 078 079 // contains all resources already deleted to avoid multiple deleting in case of siblings 080 List<CmsUUID> resourcesAlreadyDeleted = new ArrayList<CmsUUID>(resourcesToDelete.size()); 081 082 Iterator<CmsPublishedResource> i = resourcesToDelete.iterator(); 083 while (i.hasNext()) { 084 // iterate all resources in the given list of resources to delete 085 CmsPublishedResource res = i.next(); 086 if (!resourcesAlreadyDeleted.contains(res.getStructureId())) { 087 // ensure siblings are only deleted once per update 088 resourcesAlreadyDeleted.add(res.getStructureId()); 089 if (!res.isFolder() && !CmsResource.isTemporaryFileName(res.getRootPath())) { 090 // now delete the resource from the index 091 deleteResource(indexWriter, res); 092 } 093 } 094 } 095 } 096 097 /** 098 * Returns the OpenCms user context used by this indexer.<p> 099 * 100 * @return the OpenCms user context used by this indexer 101 */ 102 public CmsObject getCms() { 103 104 return m_cms; 105 } 106 107 /** 108 * Returns the OpenCms search index updated by this indexer.<p> 109 * 110 * @return the OpenCms search index updated by this indexer 111 */ 112 public I_CmsSearchIndex getIndex() { 113 114 return m_index; 115 } 116 117 /** 118 * Returns the report used by this indexer.<p> 119 * 120 * @return the report used by this indexer 121 */ 122 public I_CmsReport getReport() { 123 124 return m_report; 125 } 126 127 /** 128 * @see org.opencms.search.I_CmsIndexer#getUpdateData(org.opencms.search.CmsSearchIndexSource, java.util.List) 129 */ 130 public CmsSearchIndexUpdateData getUpdateData( 131 CmsSearchIndexSource source, 132 List<CmsPublishedResource> publishedResources) { 133 134 // create a new update collection from this indexer and the given index source 135 CmsSearchIndexUpdateData result = new CmsSearchIndexUpdateData(source, this); 136 137 Iterator<CmsPublishedResource> i = publishedResources.iterator(); 138 while (i.hasNext()) { 139 // check all published resources if they match this indexer / source 140 CmsPublishedResource pubRes = i.next(); 141 // VFS resources will always have a structure id 142 if (!pubRes.getStructureId().isNullUUID()) { 143 // use utility method from CmsProject to check if published resource is "inside" this index source 144 if (CmsProject.isInsideProject(source.getResourcesNames(), pubRes.getRootPath())) { 145 // the resource is "inside" this index source 146 addResourceToUpdateData(pubRes, result); 147 } 148 } 149 } 150 return result; 151 } 152 153 /** 154 * The default indexer is not able to resolve locale dependencies between documents.<p> 155 * 156 * @see org.opencms.search.I_CmsIndexer#isLocaleDependenciesEnable() 157 */ 158 public boolean isLocaleDependenciesEnable() { 159 160 return false; 161 } 162 163 /** 164 * @see org.opencms.search.I_CmsIndexer#newInstance(org.opencms.file.CmsObject, org.opencms.report.I_CmsReport, org.opencms.search.I_CmsSearchIndex) 165 */ 166 public I_CmsIndexer newInstance(CmsObject cms, I_CmsReport report, I_CmsSearchIndex index) { 167 168 CmsVfsIndexer indexer = null; 169 try { 170 indexer = getClass().newInstance(); 171 indexer.m_cms = cms; 172 indexer.m_report = report; 173 indexer.m_index = index; 174 } catch (Exception e) { 175 LOG.error( 176 Messages.get().getBundle().key( 177 Messages.ERR_INDEXSOURCE_INDEXER_CLASS_NAME_2, 178 getClass().getName(), 179 CmsVfsIndexer.class), 180 e); 181 } 182 return indexer; 183 } 184 185 /** 186 * @see org.opencms.search.I_CmsIndexer#rebuildIndex(org.opencms.search.I_CmsIndexWriter, org.opencms.search.CmsIndexingThreadManager, org.opencms.search.CmsSearchIndexSource) 187 */ 188 public void rebuildIndex( 189 I_CmsIndexWriter writer, 190 CmsIndexingThreadManager threadManager, 191 CmsSearchIndexSource source) { 192 193 List<String> resourceNames = source.getResourcesNames(); 194 Iterator<String> i = resourceNames.iterator(); 195 while (i.hasNext()) { 196 // read the resources from all configured source folders 197 String resourceName = i.next(); 198 List<CmsResource> resources = null; 199 try { 200 // read all resources (only files) below the given path 201 resources = m_cms.readResources(resourceName, CmsResourceFilter.IGNORE_EXPIRATION.addRequireFile()); 202 } catch (CmsException e) { 203 if (m_report != null) { 204 m_report.println( 205 Messages.get().container( 206 Messages.RPT_UNABLE_TO_READ_SOURCE_2, 207 resourceName, 208 e.getLocalizedMessage()), 209 I_CmsReport.FORMAT_WARNING); 210 } 211 if (LOG.isWarnEnabled()) { 212 LOG.warn( 213 Messages.get().getBundle().key( 214 Messages.LOG_UNABLE_TO_READ_SOURCE_2, 215 resourceName, 216 m_index.getName()), 217 e); 218 } 219 } 220 if (resources != null) { 221 // iterate all resources found in the folder 222 Iterator<CmsResource> j = resources.iterator(); 223 while (j.hasNext()) { 224 // now update all the resources individually 225 CmsResource resource = j.next(); 226 updateResource(writer, threadManager, resource); 227 } 228 } 229 } 230 } 231 232 /** 233 * @see org.opencms.search.I_CmsIndexer#updateResources(org.opencms.search.I_CmsIndexWriter, org.opencms.search.CmsIndexingThreadManager, java.util.List) 234 */ 235 public void updateResources( 236 I_CmsIndexWriter writer, 237 CmsIndexingThreadManager threadManager, 238 List<CmsPublishedResource> resourcesToUpdate) { 239 240 if ((resourcesToUpdate == null) || resourcesToUpdate.isEmpty()) { 241 // nothing to update 242 return; 243 } 244 245 // contains all resources already updated to avoid multiple updates in case of siblings 246 List<String> resourcesAlreadyUpdated = new ArrayList<String>(resourcesToUpdate.size()); 247 248 // index all resources that are in the given list 249 Iterator<CmsPublishedResource> i = resourcesToUpdate.iterator(); 250 while (i.hasNext()) { 251 CmsPublishedResource res = i.next(); 252 CmsResource resource = null; 253 if (!CmsResource.isTemporaryFileName(res.getRootPath())) { 254 try { 255 resource = m_cms.readResource(res.getRootPath(), CmsResourceFilter.IGNORE_EXPIRATION); 256 } catch (CmsException e) { 257 if (LOG.isWarnEnabled()) { 258 LOG.warn( 259 Messages.get().getBundle().key( 260 Messages.LOG_UNABLE_TO_READ_RESOURCE_2, 261 res.getRootPath(), 262 m_index.getName()), 263 e); 264 } 265 } 266 267 if (resource != null) { 268 if (!resourcesAlreadyUpdated.contains(resource.getRootPath())) { 269 // ensure resources are only indexed once per update 270 resourcesAlreadyUpdated.add(resource.getRootPath()); 271 updateResource(writer, threadManager, resource); 272 } 273 } 274 } 275 } 276 } 277 278 /** 279 * Adds a given published resource to the provided search index update data.<p> 280 * 281 * This method decides if the resource has to be included in the "update" or "delete" list.<p> 282 * 283 * @param pubRes the published resource to add 284 * @param updateData the search index update data to add the resource to 285 */ 286 protected void addResourceToUpdateData(CmsPublishedResource pubRes, CmsSearchIndexUpdateData updateData) { 287 288 if (pubRes.getState().isDeleted()) { 289 // deleted resource just needs to be removed 290 updateData.addResourceToDelete(pubRes); 291 } else if (pubRes.getState().isNew() || pubRes.getState().isChanged() || pubRes.getState().isUnchanged()) { 292 updateData.addResourceToUpdate(pubRes); 293 } 294 } 295 296 /** 297 * Deletes a resource with the given index writer.<p> 298 * 299 * @param indexWriter the index writer to resource the resource with 300 * @param resource the root path of the resource to delete 301 */ 302 protected void deleteResource(I_CmsIndexWriter indexWriter, CmsPublishedResource resource) { 303 304 try { 305 if (LOG.isInfoEnabled()) { 306 LOG.info(Messages.get().getBundle().key(Messages.LOG_DELETING_FROM_INDEX_1, resource.getRootPath())); 307 } 308 // delete all documents with this term from the index 309 indexWriter.deleteDocument(resource); 310 } catch (IOException e) { 311 if (LOG.isWarnEnabled()) { 312 LOG.warn( 313 Messages.get().getBundle().key( 314 Messages.LOG_IO_INDEX_DOCUMENT_DELETE_2, 315 resource.getRootPath(), 316 m_index.getName()), 317 e); 318 } 319 } 320 } 321 322 /** 323 * Checks if the published resource is inside the time window set with release and expiration date.<p> 324 * 325 * @param resource the published resource to check 326 * @return true if the published resource is inside the time window, otherwise false 327 */ 328 protected boolean isResourceInTimeWindow(CmsPublishedResource resource) { 329 330 return m_cms.existsResource( 331 m_cms.getRequestContext().removeSiteRoot(resource.getRootPath()), 332 CmsResourceFilter.DEFAULT); 333 } 334 335 /** 336 * Updates (writes) a single resource in the index.<p> 337 * 338 * @param writer the index writer to use 339 * @param threadManager the thread manager to use when extracting the document text 340 * @param resource the resource to update 341 */ 342 protected void updateResource( 343 I_CmsIndexWriter writer, 344 CmsIndexingThreadManager threadManager, 345 CmsResource resource) { 346 347 if (resource.isFolder() || resource.isTemporaryFile()) { 348 // don't ever index folders or temporary files 349 return; 350 } 351 try { 352 // create the index thread for the resource 353 threadManager.createIndexingThread(this, writer, resource); 354 } catch (Throwable e) { 355 // Only runtime exceptions can appear here. 356 if (m_report != null) { 357 m_report.println( 358 Messages.get().container(Messages.RPT_SEARCH_INDEXING_FAILED_0), 359 I_CmsReport.FORMAT_WARNING); 360 } 361 if (LOG.isWarnEnabled()) { 362 LOG.warn( 363 Messages.get().getBundle().key( 364 Messages.ERR_INDEX_RESOURCE_FAILED_2, 365 resource.getRootPath(), 366 m_index.getName()), 367 e); 368 } 369 } 370 } 371 372 /** 373 * Updates a resource with the given index writer and the new document provided.<p> 374 * 375 * @param indexWriter the index writer to update the resource with 376 * @param rootPath the root path of the resource to update 377 * @param doc the new document for the resource 378 */ 379 protected void updateResource(I_CmsIndexWriter indexWriter, String rootPath, I_CmsSearchDocument doc) { 380 381 try { 382 indexWriter.updateDocument(rootPath, doc); 383 } catch (Exception e) { 384 if (LOG.isWarnEnabled()) { 385 LOG.warn( 386 Messages.get().getBundle().key( 387 Messages.LOG_IO_INDEX_DOCUMENT_UPDATE_2, 388 rootPath, 389 m_index.getName()), 390 e); 391 } 392 } 393 } 394}