001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search.solr; 033 034import org.opencms.db.CmsPublishedResource; 035import org.opencms.main.CmsLog; 036import org.opencms.main.OpenCms; 037import org.opencms.search.CmsSearchUtil; 038import org.opencms.search.I_CmsSearchDocument; 039import org.opencms.search.fields.CmsSearchField; 040 041import java.io.IOException; 042import java.text.ParseException; 043import java.util.Calendar; 044import java.util.Date; 045import java.util.List; 046 047import org.apache.commons.logging.Log; 048import org.apache.solr.client.solrj.SolrClient; 049import org.apache.solr.client.solrj.SolrServerException; 050import org.apache.solr.common.SolrException; 051import org.apache.solr.common.SolrInputDocument; 052 053/** 054 * Implements the index writer for the Solr server used by OpenCms.<p> 055 * 056 * @since 8.5.0 057 */ 058public class CmsSolrIndexWriter implements I_CmsSolrIndexWriter { 059 060 /** The log object for this class. */ 061 protected static final Log LOG = CmsLog.getLog(CmsSolrIndexWriter.class); 062 063 /** The time to wait before a commit is sent to the Solr index. */ 064 private int m_commitMs = Long.valueOf( 065 OpenCms.getSearchManager().getSolrServerConfiguration().getSolrCommitMs()).intValue(); 066 067 /** The Solr index. */ 068 private CmsSolrIndex m_index; 069 070 /** The Solr client. */ 071 private SolrClient m_server; 072 073 /** 074 * Constructor to create a Solr index writer.<p> 075 * 076 * @param client the client to use 077 */ 078 public CmsSolrIndexWriter(SolrClient client) { 079 080 this(client, null); 081 } 082 083 /** 084 * Creates a new index writer based on the provided standard Lucene IndexWriter for the 085 * provided OpenCms search index instance.<p> 086 * 087 * The OpenCms search instance is currently used only for improved logging of the 088 * index operations.<p> 089 * 090 * @param client the standard Lucene IndexWriter to use as delegate 091 * @param index the OpenCms search index instance this writer to supposed to write to 092 */ 093 public CmsSolrIndexWriter(SolrClient client, CmsSolrIndex index) { 094 095 m_index = index; 096 m_server = client; 097 if (m_index != null) { 098 LOG.info( 099 Messages.get().getBundle().key( 100 Messages.LOG_SOLR_WRITER_CREATE_2, 101 m_index.getName(), 102 m_index.getPath())); 103 } 104 } 105 106 /** 107 * @see org.opencms.search.I_CmsIndexWriter#close() 108 */ 109 public void close() { 110 111 // nothing to do here 112 } 113 114 /** 115 * @see org.opencms.search.I_CmsIndexWriter#commit() 116 */ 117 public void commit() throws IOException { 118 119 if ((m_server != null) && (m_index != null)) { 120 try { 121 LOG.info( 122 Messages.get().getBundle().key( 123 Messages.LOG_SOLR_WRITER_COMMIT_2, 124 m_index.getName(), 125 m_index.getPath())); 126 m_server.commit(m_index.getCoreName()); 127 } catch (SolrServerException e) { 128 throw new IOException(e.getLocalizedMessage(), e); 129 } 130 } 131 } 132 133 /** 134 * @see org.opencms.search.solr.I_CmsSolrIndexWriter#deleteAllDocuments() 135 */ 136 public void deleteAllDocuments() throws IOException { 137 138 if ((m_server != null) && (m_index != null)) { 139 try { 140 LOG.info( 141 Messages.get().getBundle().key( 142 Messages.LOG_SOLR_WRITER_DELETE_ALL_2, 143 m_index.getName(), 144 m_index.getPath())); 145 m_server.deleteByQuery(m_index.getCoreName(), "*:*", m_commitMs); 146 } catch (SolrServerException e) { 147 throw new IOException(e.getLocalizedMessage(), e); 148 } 149 } 150 } 151 152 /** 153 * @see org.opencms.search.I_CmsIndexWriter#deleteDocument(org.opencms.db.CmsPublishedResource) 154 */ 155 public void deleteDocument(CmsPublishedResource resource) throws IOException { 156 157 if ((m_server != null) && (m_index != null)) { 158 try { 159 LOG.info( 160 Messages.get().getBundle().key( 161 Messages.LOG_SOLR_WRITER_DOC_DELETE_3, 162 resource.getRootPath(), 163 m_index.getName(), 164 m_index.getPath())); 165 m_server.deleteByQuery(m_index.getCoreName(), "id:" + resource.getStructureId().toString(), m_commitMs); 166 } catch (SolrServerException e) { 167 throw new IOException(e.getLocalizedMessage(), e); 168 } catch (SolrException e) { 169 throw new IOException(e.getLocalizedMessage(), e); 170 } 171 } 172 } 173 174 /** 175 * @see org.opencms.search.I_CmsIndexWriter#optimize() 176 */ 177 public void optimize() { 178 179 // optimization is not recommended 180 // should be configured within solrconfig.xml 181 } 182 183 /** 184 * Updates a search document without removing it beforehand. Use for migration purposes only. 185 * @param searchDocument the search document. 186 * @throws IOException if the update fails 187 */ 188 public void updateDocument(I_CmsSearchDocument searchDocument) throws IOException { 189 190 SolrInputDocument inputDoc = (SolrInputDocument)searchDocument.getDocument(); 191 try { 192 m_server.add(m_index.getCoreName(), inputDoc, m_commitMs); 193 } catch (SolrServerException e) { 194 LOG.error(e.getLocalizedMessage(), e); 195 } 196 } 197 198 /** 199 * @see org.opencms.search.I_CmsIndexWriter#updateDocument(java.lang.String, org.opencms.search.I_CmsSearchDocument) 200 */ 201 public void updateDocument(String rootPath, I_CmsSearchDocument document) throws IOException { 202 203 if ((m_server != null) && (m_index != null)) { 204 205 if (document.getDocument() != null) { 206 try { 207 m_server.deleteByQuery(m_index.getCoreName(), "path:\"" + rootPath + "\"", m_commitMs); 208 } catch (Exception e1) { 209 LOG.error(e1.getLocalizedMessage(), e1); 210 } 211 try { 212 LOG.info( 213 Messages.get().getBundle().key( 214 Messages.LOG_SOLR_WRITER_DOC_UPDATE_3, 215 rootPath, 216 m_index.getName(), 217 m_index.getPath())); 218 addDocumentInstances(document); 219 } catch (SolrServerException e) { 220 throw new IOException(e.getLocalizedMessage(), e); 221 } 222 } 223 } 224 } 225 226 /** 227 * Adds Solr documents to the index for the {@link I_CmsSearchDocument}. 228 * Documents for serial dates are added for each occurrence once with the date of the respective occurrence. 229 * @param document the document for the indexed resource 230 * @throws SolrServerException thrown if adding the document to the index fails 231 * @throws IOException thrown if adding the document to the index fails 232 */ 233 private void addDocumentInstances(I_CmsSearchDocument document) throws SolrServerException, IOException { 234 235 List<String> serialDates = document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_SERIESDATES); 236 SolrInputDocument inputDoc = (SolrInputDocument)document.getDocument(); 237 String id = inputDoc.getFieldValue(CmsSearchField.FIELD_ID).toString(); 238 if (null != serialDates) { 239 // NOTE: We can assume the following two arrays have the same length as serialDates. 240 List<String> serialDatesEnd = document.getMultivaluedFieldAsStringList( 241 CmsSearchField.FIELD_SERIESDATES_END); 242 List<String> serialDatesCurrentTill = document.getMultivaluedFieldAsStringList( 243 CmsSearchField.FIELD_SERIESDATES_CURRENT_TILL); 244 for (int i = 0; i < serialDates.size(); i++) { 245 String date = serialDates.get(i); 246 String endDate = serialDatesEnd.get(i); 247 String endDateRange = endDate; 248 if (!date.equals(endDate)) { 249 try { 250 Date parsed = CmsSearchUtil.parseDate(endDate); 251 Calendar calendar = Calendar.getInstance(); 252 calendar.setTime(parsed); 253 calendar.add(Calendar.SECOND, -1); 254 endDateRange = CmsSearchUtil.getDateAsIso8601(calendar.getTime()); 255 } catch (ParseException e) { 256 LOG.error(e.getLocalizedMessage(), e); 257 } 258 } 259 String dateRange = "[" + date + " TO " + endDateRange + "]"; 260 String currentTillDate = serialDatesCurrentTill.get(i); 261 inputDoc.setField(CmsSearchField.FIELD_INSTANCEDATE + CmsSearchField.FIELD_POSTFIX_DATE, date); 262 inputDoc.setField(CmsSearchField.FIELD_INSTANCEDATE_END + CmsSearchField.FIELD_POSTFIX_DATE, endDate); 263 inputDoc.setField( 264 CmsSearchField.FIELD_INSTANCEDATE_RANGE + CmsSearchField.FIELD_POSTFIX_DATE_RANGE, 265 dateRange); 266 inputDoc.setField( 267 CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL + CmsSearchField.FIELD_POSTFIX_DATE, 268 currentTillDate); 269 for (String locale : document.getMultivaluedFieldAsStringList(CmsSearchField.FIELD_CONTENT_LOCALES)) { 270 inputDoc.setField( 271 CmsSearchField.FIELD_INSTANCEDATE + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE, 272 date); 273 inputDoc.setField( 274 CmsSearchField.FIELD_INSTANCEDATE_END + "_" + locale + CmsSearchField.FIELD_POSTFIX_DATE, 275 endDate); 276 inputDoc.setField( 277 CmsSearchField.FIELD_INSTANCEDATE_RANGE 278 + "_" 279 + locale 280 + CmsSearchField.FIELD_POSTFIX_DATE_RANGE, 281 dateRange); 282 inputDoc.setField( 283 CmsSearchField.FIELD_INSTANCEDATE_CURRENT_TILL 284 + "_" 285 + locale 286 + CmsSearchField.FIELD_POSTFIX_DATE, 287 currentTillDate); 288 } 289 String newId = id + String.format("-%04d", Integer.valueOf(i + 1)); 290 inputDoc.setField(CmsSearchField.FIELD_SOLR_ID, newId); 291 //remove fields that should not be part of the index, but were used to transport extra-information on date series 292 inputDoc.removeField(CmsSearchField.FIELD_SERIESDATES_END); 293 inputDoc.removeField(CmsSearchField.FIELD_SERIESDATES_CURRENT_TILL); 294 m_server.add(m_index.getCoreName(), inputDoc, m_commitMs); 295 } 296 } else { 297 inputDoc.setField(CmsSearchField.FIELD_SOLR_ID, id); 298 m_server.add(m_index.getCoreName(), inputDoc, m_commitMs); 299 } 300 301 } 302}