001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.file.CmsObject;
031import org.opencms.file.CmsResource;
032import org.opencms.main.CmsException;
033import org.opencms.main.CmsLog;
034import org.opencms.report.I_CmsReport;
035import org.opencms.search.documents.CmsIndexNoContentException;
036import org.opencms.search.documents.I_CmsDocumentFactory;
037
038import org.apache.commons.logging.Log;
039
040/**
041 * Implements the indexing method for a single resource as thread.<p>
042 *
043 * The indexing of a single resource is wrapped into a thread
044 * in order to prevent the overall indexer from hanging.<p>
045 *
046 * @since 6.0.0
047 */
048public class CmsIndexingThread extends Thread {
049
050    /** The log object for this class. */
051    private static final Log LOG = CmsLog.getLog(CmsIndexingThread.class);
052
053    /** The cms object. */
054    private CmsObject m_cms;
055
056    /** The counter to output for the report. */
057    private int m_count;
058
059    /** The current index. */
060    private I_CmsSearchIndex m_index;
061
062    /** The current report. */
063    private I_CmsReport m_report;
064
065    /** The resource to index. */
066    private CmsResource m_res;
067
068    /** The result document. */
069    private I_CmsSearchDocument m_result;
070
071    /** Flag, indicating if a default document for the resource should be created. */
072    private boolean m_addDefaultDocument = true;
073
074    /**
075     * Create a new indexing thread.<p>
076     *
077     * @param cms the current OpenCms user context
078     * @param res the resource to index
079     * @param index the index to update the resource in
080     * @param count the report count
081     * @param report the report to write the output to
082     */
083    public CmsIndexingThread(CmsObject cms, CmsResource res, I_CmsSearchIndex index, int count, I_CmsReport report) {
084
085        super("OpenCms: Indexing '" + res.getName() + "'");
086
087        m_cms = cms;
088        m_res = res;
089        m_index = index;
090        m_count = count;
091        m_report = report;
092        m_result = null;
093    }
094
095    /**
096     * Returns the document created by this indexer thread.<p>
097     *
098     * In case the resource could not be indexed, <code>null</code> is returned.<p>
099     *
100     * @return the document created by this indexer thread
101     */
102    public I_CmsSearchDocument getResult() {
103
104        if ((null == m_result) && m_addDefaultDocument) {
105            if (LOG.isWarnEnabled()) {
106                LOG.warn(
107                    "Creating default document without content for "
108                        + m_res.getRootPath()
109                        + " in index "
110                        + m_index.getName());
111            }
112            return createDefaultIndexDocument();
113        }
114        return m_result;
115    }
116
117    /**
118     * Starts the thread to index a single resource.<p>
119     *
120     * @see java.lang.Runnable#run()
121     */
122    @Override
123    public void run() {
124
125        // flag for logging in the "final" block
126        boolean docOk = false;
127        try {
128
129            // create the index document
130            m_result = createIndexDocument(m_cms, m_res, m_index, m_count, m_report);
131            docOk = true;
132
133            // check if the thread was interrupted
134            if (isInterrupted() && LOG.isDebugEnabled()) {
135                LOG.debug(
136                    Messages.get().getBundle().key(Messages.LOG_ABANDONED_THREAD_FINISHED_1, m_res.getRootPath()));
137            }
138
139        } catch (CmsIndexNoContentException e) {
140            // Ignore exception caused by empty documents, so that the report is not messed up with error message
141            m_report.println(
142                org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0),
143                I_CmsReport.FORMAT_OK);
144        } catch (Throwable exc) {
145            if (m_report != null) {
146                m_report.println(
147                    org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_FAILED_0),
148                    I_CmsReport.FORMAT_ERROR);
149                m_report.println(
150                    org.opencms.report.Messages.get().container(
151                        org.opencms.report.Messages.RPT_ARGUMENT_1,
152                        exc.toString()),
153                    I_CmsReport.FORMAT_ERROR);
154            }
155            if (LOG.isErrorEnabled()) {
156                LOG.error(
157                    Messages.get().getBundle().key(
158                        Messages.ERR_INDEX_RESOURCE_FAILED_2,
159                        m_res.getRootPath(),
160                        m_index.getName()),
161                    exc);
162            }
163            // set flag to avoid logging in finally block
164            docOk = true;
165        } finally {
166            if (!docOk) {
167                // apparently there was a Throwable that causes an issue
168                if (m_report != null) {
169                    m_report.println(
170                        org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_FAILED_0),
171                        I_CmsReport.FORMAT_ERROR);
172                    m_report.println(
173                        Messages.get().container(
174                            Messages.ERR_INDEX_RESOURCE_FAILED_2,
175                            m_res.getRootPath(),
176                            m_index.getName()),
177                        I_CmsReport.FORMAT_ERROR);
178                }
179                if (LOG.isErrorEnabled()) {
180                    LOG.error(
181                        Messages.get().getBundle().key(
182                            Messages.ERR_INDEX_RESOURCE_FAILED_2,
183                            m_res.getRootPath(),
184                            m_index.getName()));
185                }
186            }
187        }
188    }
189
190    /**
191     * Creates a document for the resource without extracting the content. The aim is to get a content indexed,
192     * even if extraction runs into a timeout.
193     *
194     * @return the document for the resource generated if the content is discarded,
195     *         i.e., only meta information are indexed.
196     */
197    protected I_CmsSearchDocument createDefaultIndexDocument() {
198
199        try {
200            return m_index.getFieldConfiguration().createDocument(m_cms, m_res, m_index, null);
201        } catch (CmsException e) {
202            LOG.error(
203                "Default document for "
204                    + m_res.getRootPath()
205                    + " and index "
206                    + m_index.getName()
207                    + " could not be created.",
208                e);
209            return null;
210        }
211    }
212
213    /**
214     * Creates the search index document.<p>
215     *
216     * @param cms the current OpenCms user context
217     * @param res the resource to index
218     * @param index the index to update the resource in
219     * @param count the report count
220     * @param report the report to write the output to
221     *
222     * @return the created search index document
223     *
224     * @throws CmsException in case of issues while creating the search index document
225     */
226    protected I_CmsSearchDocument createIndexDocument(
227        CmsObject cms,
228        CmsResource res,
229        I_CmsSearchIndex index,
230        int count,
231        I_CmsReport report)
232    throws CmsException {
233
234        I_CmsSearchDocument result = null;
235
236        if (report != null) {
237            report.print(
238                org.opencms.report.Messages.get().container(
239                    org.opencms.report.Messages.RPT_SUCCESSION_1,
240                    String.valueOf(count)),
241                I_CmsReport.FORMAT_NOTE);
242            report.print(Messages.get().container(Messages.RPT_SEARCH_INDEXING_FILE_BEGIN_0), I_CmsReport.FORMAT_NOTE);
243            report.print(
244                org.opencms.report.Messages.get().container(
245                    org.opencms.report.Messages.RPT_ARGUMENT_1,
246                    report.removeSiteRoot(res.getRootPath())));
247            report.print(
248                org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_DOTS_0),
249                I_CmsReport.FORMAT_DEFAULT);
250        }
251
252        if (m_index.excludeFromIndex(m_cms, m_res)) {
253            m_addDefaultDocument = false;
254        } else {
255            // resource is to be included in the index
256            I_CmsDocumentFactory documentFactory = index.getDocumentFactory(res);
257            if (documentFactory != null) {
258                // some resources e.g. JSP do not have a default document factory
259                if (LOG.isDebugEnabled()) {
260                    LOG.debug(
261                        Messages.get().getBundle().key(
262                            Messages.LOG_INDEXING_WITH_FACTORY_2,
263                            res.getRootPath(),
264                            documentFactory.getName()));
265                }
266                // create the document
267                result = documentFactory.createDocument(cms, res, index);
268            } else {
269                m_addDefaultDocument = false;
270            }
271        }
272        if (result == null) {
273            // this resource is not contained in the given search index or locale did not match
274            if (report != null) {
275                report.println(
276                    org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_SKIPPED_0),
277                    I_CmsReport.FORMAT_NOTE);
278            }
279            if (LOG.isDebugEnabled()) {
280                LOG.debug(Messages.get().getBundle().key(Messages.LOG_SKIPPED_1, res.getRootPath()));
281            }
282        } else {
283            // index document was successfully created
284            if ((m_report != null)) {
285                m_report.println(
286                    org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_OK_0),
287                    I_CmsReport.FORMAT_OK);
288            }
289        }
290
291        return result;
292    }
293}