001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.file.CmsObject; 031import org.opencms.file.CmsResource; 032import org.opencms.main.CmsException; 033import org.opencms.main.CmsLog; 034import org.opencms.report.I_CmsReport; 035import org.opencms.search.documents.CmsIndexNoContentException; 036import org.opencms.search.documents.I_CmsDocumentFactory; 037 038import org.apache.commons.logging.Log; 039 040/** 041 * Implements the indexing method for a single resource as thread.<p> 042 * 043 * The indexing of a single resource is wrapped into a thread 044 * in order to prevent the overall indexer from hanging.<p> 045 * 046 * @since 6.0.0 047 */ 048public class CmsIndexingThread extends Thread { 049 050 /** The log object for this class. */ 051 private static final Log LOG = CmsLog.getLog(CmsIndexingThread.class); 052 053 /** The cms object. */ 054 private CmsObject m_cms; 055 056 /** The counter to output for the report. */ 057 private int m_count; 058 059 /** The current index. */ 060 private I_CmsSearchIndex m_index; 061 062 /** The current report. */ 063 private I_CmsReport m_report; 064 065 /** The resource to index. */ 066 private CmsResource m_res; 067 068 /** The result document. */ 069 private I_CmsSearchDocument m_result; 070 071 /** Flag, indicating if a default document for the resource should be created. */ 072 private boolean m_addDefaultDocument = true; 073 074 /** 075 * Create a new indexing thread.<p> 076 * 077 * @param cms the current OpenCms user context 078 * @param res the resource to index 079 * @param index the index to update the resource in 080 * @param count the report count 081 * @param report the report to write the output to 082 */ 083 public CmsIndexingThread(CmsObject cms, CmsResource res, I_CmsSearchIndex index, int count, I_CmsReport report) { 084 085 super("OpenCms: Indexing '" + res.getName() + "'"); 086 087 m_cms = cms; 088 m_res = res; 089 m_index = index; 090 m_count = count; 091 m_report = report; 092 m_result = null; 093 } 094 095 /** 096 * Returns the document created by this indexer thread.<p> 097 * 098 * In case the resource could not be indexed, <code>null</code> is returned.<p> 099 * 100 * @return the document created by this indexer thread 101 */ 102 public I_CmsSearchDocument getResult() { 103 104 if ((null == m_result) && m_addDefaultDocument) { 105 if (LOG.isWarnEnabled()) { 106 LOG.warn( 107 "Creating default document without content for " 108 + m_res.getRootPath() 109 + " in index " 110 + m_index.getName()); 111 } 112 return createDefaultIndexDocument(); 113 } 114 return m_result; 115 } 116 117 /** 118 * Starts the thread to index a single resource.<p> 119 * 120 * @see java.lang.Runnable#run() 121 */ 122 @Override 123 public void run() { 124 125 // flag for logging in the "final" block 126 boolean docOk = false; 127 try { 128 129 // create the index document 130 m_result = createIndexDocument(m_cms, m_res, m_index, m_count, m_report); 131 docOk = true; 132 133 // check if the thread was interrupted 134 if (isInterrupted() && LOG.isDebugEnabled()) { 135 LOG.debug( 136 Messages.get().getBundle().key(Messages.LOG_ABANDONED_THREAD_FINISHED_1, m_res.getRootPath())); 137 } 138 139 } catch (CmsIndexNoContentException e) { 140 // Ignore exception caused by empty documents, so that the report is not messed up with error message 141 m_report.println( 142 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0), 143 I_CmsReport.FORMAT_OK); 144 } catch (Throwable exc) { 145 if (m_report != null) { 146 m_report.println( 147 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_FAILED_0), 148 I_CmsReport.FORMAT_ERROR); 149 m_report.println( 150 org.opencms.report.Messages.get().container( 151 org.opencms.report.Messages.RPT_ARGUMENT_1, 152 exc.toString()), 153 I_CmsReport.FORMAT_ERROR); 154 } 155 if (LOG.isErrorEnabled()) { 156 LOG.error( 157 Messages.get().getBundle().key( 158 Messages.ERR_INDEX_RESOURCE_FAILED_2, 159 m_res.getRootPath(), 160 m_index.getName()), 161 exc); 162 } 163 // set flag to avoid logging in finally block 164 docOk = true; 165 } finally { 166 if (!docOk) { 167 // apparently there was a Throwable that causes an issue 168 if (m_report != null) { 169 m_report.println( 170 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_FAILED_0), 171 I_CmsReport.FORMAT_ERROR); 172 m_report.println( 173 Messages.get().container( 174 Messages.ERR_INDEX_RESOURCE_FAILED_2, 175 m_res.getRootPath(), 176 m_index.getName()), 177 I_CmsReport.FORMAT_ERROR); 178 } 179 if (LOG.isErrorEnabled()) { 180 LOG.error( 181 Messages.get().getBundle().key( 182 Messages.ERR_INDEX_RESOURCE_FAILED_2, 183 m_res.getRootPath(), 184 m_index.getName())); 185 } 186 } 187 } 188 } 189 190 /** 191 * Creates a document for the resource without extracting the content. The aim is to get a content indexed, 192 * even if extraction runs into a timeout. 193 * 194 * @return the document for the resource generated if the content is discarded, 195 * i.e., only meta information are indexed. 196 */ 197 protected I_CmsSearchDocument createDefaultIndexDocument() { 198 199 try { 200 return m_index.getFieldConfiguration().createDocument(m_cms, m_res, m_index, null); 201 } catch (CmsException e) { 202 LOG.error( 203 "Default document for " 204 + m_res.getRootPath() 205 + " and index " 206 + m_index.getName() 207 + " could not be created.", 208 e); 209 return null; 210 } 211 } 212 213 /** 214 * Creates the search index document.<p> 215 * 216 * @param cms the current OpenCms user context 217 * @param res the resource to index 218 * @param index the index to update the resource in 219 * @param count the report count 220 * @param report the report to write the output to 221 * 222 * @return the created search index document 223 * 224 * @throws CmsException in case of issues while creating the search index document 225 */ 226 protected I_CmsSearchDocument createIndexDocument( 227 CmsObject cms, 228 CmsResource res, 229 I_CmsSearchIndex index, 230 int count, 231 I_CmsReport report) 232 throws CmsException { 233 234 I_CmsSearchDocument result = null; 235 236 if (report != null) { 237 report.print( 238 org.opencms.report.Messages.get().container( 239 org.opencms.report.Messages.RPT_SUCCESSION_1, 240 String.valueOf(count)), 241 I_CmsReport.FORMAT_NOTE); 242 report.print(Messages.get().container(Messages.RPT_SEARCH_INDEXING_FILE_BEGIN_0), I_CmsReport.FORMAT_NOTE); 243 report.print( 244 org.opencms.report.Messages.get().container( 245 org.opencms.report.Messages.RPT_ARGUMENT_1, 246 report.removeSiteRoot(res.getRootPath()))); 247 report.print( 248 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0), 249 I_CmsReport.FORMAT_DEFAULT); 250 } 251 252 if (m_index.excludeFromIndex(m_cms, m_res)) { 253 m_addDefaultDocument = false; 254 } else { 255 // resource is to be included in the index 256 I_CmsDocumentFactory documentFactory = index.getDocumentFactory(res); 257 if (documentFactory != null) { 258 // some resources e.g. JSP do not have a default document factory 259 if (LOG.isDebugEnabled()) { 260 LOG.debug( 261 Messages.get().getBundle().key( 262 Messages.LOG_INDEXING_WITH_FACTORY_2, 263 res.getRootPath(), 264 documentFactory.getName())); 265 } 266 // create the document 267 result = documentFactory.createDocument(cms, res, index); 268 } else { 269 m_addDefaultDocument = false; 270 } 271 } 272 if (result == null) { 273 // this resource is not contained in the given search index or locale did not match 274 if (report != null) { 275 report.println( 276 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_SKIPPED_0), 277 I_CmsReport.FORMAT_NOTE); 278 } 279 if (LOG.isDebugEnabled()) { 280 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIPPED_1, res.getRootPath())); 281 } 282 } else { 283 // index document was successfully created 284 if ((m_report != null)) { 285 m_report.println( 286 org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0), 287 I_CmsReport.FORMAT_OK); 288 } 289 } 290 291 return result; 292 } 293}