001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: http://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: http://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search; 033 034import org.apache.lucene.document.*; 035import org.apache.lucene.index.IndexOptions; 036import org.apache.lucene.index.IndexableField; 037import org.apache.tika.io.IOUtils; 038import org.opencms.main.CmsRuntimeException; 039import org.opencms.main.OpenCms; 040import org.opencms.relations.CmsCategory; 041import org.opencms.search.fields.CmsLuceneField; 042import org.opencms.search.fields.CmsSearchField; 043import org.opencms.search.fields.CmsSearchFieldConfiguration; 044 045import java.io.IOException; 046import java.text.ParseException; 047import java.util.*; 048 049/** 050 * A Lucene search document implementation.<p> 051 */ 052public class CmsLuceneDocument implements I_CmsSearchDocument { 053 054 /** 055 * Type for a stored-only field. 056 */ 057 public static final FieldType NOT_STORED_ANALYSED_TYPE; 058 059 /** 060 * Type for a stored-and analyzed fields. 061 */ 062 public static final FieldType STORED_ANALYSED_TYPE; 063 064 /** 065 * Type for a stored-only field. 066 */ 067 public static final FieldType STORED_NOT_ANALYSED_TYPE; 068 069 static { 070 STORED_ANALYSED_TYPE = new FieldType(); 071 STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 072 STORED_ANALYSED_TYPE.setOmitNorms(false); 073 STORED_ANALYSED_TYPE.setStored(true); 074 STORED_ANALYSED_TYPE.setTokenized(true); 075 STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false); 076 STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false); 077 STORED_ANALYSED_TYPE.setStoreTermVectors(false); 078 STORED_ANALYSED_TYPE.freeze(); 079 } 080 081 static { 082 NOT_STORED_ANALYSED_TYPE = new FieldType(); 083 NOT_STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 084 NOT_STORED_ANALYSED_TYPE.setOmitNorms(false); 085 NOT_STORED_ANALYSED_TYPE.setStored(false); 086 NOT_STORED_ANALYSED_TYPE.setTokenized(true); 087 NOT_STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false); 088 NOT_STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false); 089 NOT_STORED_ANALYSED_TYPE.setStoreTermVectors(false); 090 NOT_STORED_ANALYSED_TYPE.freeze(); 091 } 092 093 static { 094 STORED_NOT_ANALYSED_TYPE = new FieldType(); 095 STORED_NOT_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS); 096 STORED_NOT_ANALYSED_TYPE.setOmitNorms(false); 097 STORED_NOT_ANALYSED_TYPE.setStored(true); 098 STORED_NOT_ANALYSED_TYPE.setTokenized(false); 099 STORED_NOT_ANALYSED_TYPE.setStoreTermVectorPositions(false); 100 STORED_NOT_ANALYSED_TYPE.setStoreTermVectorOffsets(false); 101 STORED_NOT_ANALYSED_TYPE.setStoreTermVectors(false); 102 STORED_NOT_ANALYSED_TYPE.freeze(); 103 } 104 105 /** The Lucene document. */ 106 private Document m_doc; 107 108 /** The fields stored in this document. */ 109 private Map<String, Field> m_fields; 110 111 /** Holds the score for this document. */ 112 private float m_score; 113 114 /** 115 * Public constructor.<p> 116 * 117 * @param doc the Lucene document 118 */ 119 public CmsLuceneDocument(Document doc) { 120 121 m_doc = doc; 122 m_fields = new HashMap<String, Field>(); 123 } 124 125 /** 126 * Generate a list of date terms for the optimized date range search.<p> 127 * 128 * @param date the date for get the date terms for 129 * 130 * @return a list of date terms for the optimized date range search 131 * 132 * @see CmsSearchIndex#getDateRangeSpan(long, long) 133 */ 134 public static String getDateTerms(long date) { 135 136 Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone()); 137 cal.setTimeInMillis(date); 138 String day = CmsSearchIndex.DATES[cal.get(5)]; 139 String month = CmsSearchIndex.DATES[(cal.get(2) + 1)]; 140 String year = String.valueOf(cal.get(1)); 141 142 StringBuffer result = new StringBuffer(); 143 result.append(year); 144 result.append(month); 145 result.append(day); 146 result.append(' '); 147 result.append(year); 148 result.append(month); 149 result.append(' '); 150 result.append(year); 151 return result.toString(); 152 } 153 154 /** 155 * @see org.opencms.search.I_CmsSearchDocument#addCategoryField(java.util.List) 156 */ 157 public void addCategoryField(List<CmsCategory> categories) { 158 159 if ((categories != null) && (categories.size() > 0)) { 160 161 StringBuffer categoryBuffer = new StringBuffer(128); 162 for (CmsCategory category : categories) { 163 categoryBuffer.append(category.getPath()); 164 categoryBuffer.append(' '); 165 } 166 if (categoryBuffer.length() > 0) { 167 Field field = new Field( 168 CmsSearchField.FIELD_CATEGORY, 169 categoryBuffer.toString().toLowerCase(), 170 STORED_ANALYSED_TYPE); 171 add(field); 172 } 173 } else { 174 // synthetic "unknown" category if no category property defined for resource 175 Field field = new Field( 176 CmsSearchField.FIELD_CATEGORY, 177 CmsSearchCategoryCollector.UNKNOWN_CATEGORY, 178 STORED_ANALYSED_TYPE); 179 add(field); 180 } 181 } 182 183 /** 184 * @see org.opencms.search.I_CmsSearchDocument#addContentField(byte[]) 185 */ 186 public void addContentField(byte[] data) { 187 188 Field field = new StoredField(CmsSearchField.FIELD_CONTENT_BLOB, data); 189 m_doc.add(field); 190 } 191 192 /** 193 * @see org.opencms.search.I_CmsSearchDocument#addContentLocales(java.util.Collection) 194 */ 195 public void addContentLocales(Collection<Locale> locales) { 196 197 // Lucene documents are not localized by defualt: Nothing to do here 198 } 199 200 /** 201 * @see org.opencms.search.I_CmsSearchDocument#addDateField(java.lang.String, long, boolean) 202 */ 203 public void addDateField(String name, long date, boolean analyzed) { 204 205 Field field = new Field( 206 name, 207 DateTools.dateToString(new Date(date), DateTools.Resolution.MILLISECOND), 208 STORED_NOT_ANALYSED_TYPE); 209 add(field); 210 211 if (analyzed) { 212 field = new Field( 213 name + CmsSearchField.FIELD_DATE_LOOKUP_SUFFIX, 214 getDateTerms(date), 215 NOT_STORED_ANALYSED_TYPE); 216 add(field); 217 } 218 } 219 220 /** 221 * @see org.opencms.search.I_CmsSearchDocument#addFileSizeField(int) 222 */ 223 public void addFileSizeField(int length) { 224 225 // a default lucene implementation does not have a field for the file size 226 } 227 228 /** 229 * @see org.opencms.search.I_CmsSearchDocument#addPathField(java.lang.String) 230 */ 231 public void addPathField(String rootPath) { 232 233 String parentFolders = CmsSearchFieldConfiguration.getParentFolderTokens(rootPath); 234 Field field = new Field(CmsSearchField.FIELD_PARENT_FOLDERS, parentFolders, NOT_STORED_ANALYSED_TYPE); 235 add(field); 236 } 237 238 /** 239 * @see org.opencms.search.I_CmsSearchDocument#addResourceLocales(java.util.Collection) 240 */ 241 public void addResourceLocales(Collection<Locale> locales) { 242 243 // A default lucene document has only one locale. 244 } 245 246 /** 247 * @see org.opencms.search.I_CmsSearchDocument#addRootPathField(java.lang.String) 248 */ 249 public void addRootPathField(String rootPath) { 250 251 add(new StringField(CmsSearchField.FIELD_PATH, rootPath, Field.Store.YES)); 252 } 253 254 /** 255 * @see org.opencms.search.I_CmsSearchDocument#addSearchField(org.opencms.search.fields.CmsSearchField, java.lang.String) 256 */ 257 public void addSearchField(CmsSearchField field, String value) { 258 259 if (field instanceof CmsLuceneField) { 260 add(((CmsLuceneField)field).createField(value)); 261 } else { 262 throw (new CmsRuntimeException( 263 Messages.get().container(Messages.LOG_INVALID_FIELD_CLASS_1, field.getClass().getName()))); 264 } 265 } 266 267 /** 268 * @see org.opencms.search.I_CmsSearchDocument#addSuffixField(java.lang.String) 269 */ 270 public void addSuffixField(String suffix) { 271 272 add(new StringField(CmsSearchField.FIELD_SUFFIX, suffix, Field.Store.YES)); 273 } 274 275 /** 276 * @see org.opencms.search.I_CmsSearchDocument#addTypeField(java.lang.String) 277 */ 278 public void addTypeField(String typeName) { 279 280 add(new StringField(CmsSearchField.FIELD_TYPE, typeName, Field.Store.YES)); 281 } 282 283 /** 284 * @see org.opencms.search.I_CmsSearchDocument#getContentBlob() 285 */ 286 public byte[] getContentBlob() { 287 288 IndexableField fieldContentBlob = m_doc.getField(CmsSearchField.FIELD_CONTENT_BLOB); 289 if (fieldContentBlob != null) { 290 try { 291 if (fieldContentBlob.readerValue() != null) { 292 return IOUtils.toByteArray(fieldContentBlob.readerValue()); 293 } 294 } catch (IOException e) { 295 // TODO: 296 } 297 } 298 return null; 299 } 300 301 /** 302 * @see org.opencms.search.I_CmsSearchDocument#getDocument() 303 */ 304 public Object getDocument() { 305 306 return m_doc; 307 } 308 309 /** 310 * @see org.opencms.search.I_CmsSearchDocument#getFieldNames() 311 */ 312 public List<String> getFieldNames() { 313 314 List<String> result = new ArrayList<String>(); 315 for (IndexableField field : m_doc.getFields()) { 316 result.add(field.name()); 317 } 318 return result; 319 } 320 321 /** 322 * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsDate(java.lang.String) 323 */ 324 public Date getFieldValueAsDate(String fieldName) { 325 326 String contentDate = getFieldValueAsString(fieldName); 327 if (contentDate != null) { 328 try { 329 return new Date(DateTools.stringToTime(contentDate)); 330 } catch (ParseException e) { 331 // ignore and assume the given field name does not refer a date field 332 } 333 } 334 return null; 335 } 336 337 /** 338 * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsString(java.lang.String) 339 */ 340 public String getFieldValueAsString(String fieldName) { 341 342 IndexableField fieldValue = m_doc.getField(fieldName); 343 if (fieldValue != null) { 344 return fieldValue.stringValue(); 345 } 346 return null; 347 } 348 349 /** 350 * @see org.opencms.search.I_CmsSearchDocument#getMultivaluedFieldAsStringList(java.lang.String) 351 */ 352 public List<String> getMultivaluedFieldAsStringList(String fieldName) { 353 354 return Collections.singletonList(getFieldValueAsString(fieldName)); 355 } 356 357 /** 358 * @see org.opencms.search.I_CmsSearchDocument#getPath() 359 */ 360 public String getPath() { 361 362 return getFieldValueAsString(CmsSearchField.FIELD_PATH); 363 } 364 365 /** 366 * @see org.opencms.search.I_CmsSearchDocument#getScore() 367 */ 368 public float getScore() { 369 370 return m_score; 371 } 372 373 /** 374 * @see org.opencms.search.I_CmsSearchDocument#getType() 375 */ 376 public String getType() { 377 378 return getFieldValueAsString(CmsSearchField.FIELD_TYPE); 379 } 380 381 /** 382 * @see org.opencms.search.I_CmsSearchDocument#setScore(float) 383 */ 384 public void setScore(float score) { 385 386 m_score = score; 387 } 388 389 /** 390 * Adds a field to this document.<p> 391 * 392 * @param f the field to add 393 */ 394 private void add(Field f) { 395 396 m_fields.put(f.name(), f); 397 m_doc.add(f); 398 } 399}