001/* 002 * File : $Source$ 003 * Date : $Date$ 004 * Version: $Revision$ 005 * 006 * This library is part of OpenCms - 007 * the Open Source Content Management System 008 * 009 * Copyright (C) 2002 - 2009 Alkacon Software (https://www.alkacon.com) 010 * 011 * This library is free software; you can redistribute it and/or 012 * modify it under the terms of the GNU Lesser General Public 013 * License as published by the Free Software Foundation; either 014 * version 2.1 of the License, or (at your option) any later version. 015 * 016 * This library is distributed in the hope that it will be useful, 017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 019 * Lesser General Public License for more details. 020 * 021 * For further information about Alkacon Software, please see the 022 * company website: https://www.alkacon.com 023 * 024 * For further information about OpenCms, please see the 025 * project website: https://www.opencms.org 026 * 027 * You should have received a copy of the GNU Lesser General Public 028 * License along with this library; if not, write to the Free Software 029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 030 */ 031 032package org.opencms.search; 033 034import org.opencms.main.CmsRuntimeException; 035import org.opencms.main.OpenCms; 036import org.opencms.relations.CmsCategory; 037import org.opencms.search.fields.CmsLuceneField; 038import org.opencms.search.fields.CmsSearchField; 039import org.opencms.search.fields.CmsSearchFieldConfiguration; 040 041import java.io.IOException; 042import java.nio.charset.StandardCharsets; 043import java.text.ParseException; 044import java.util.ArrayList; 045import java.util.Calendar; 046import java.util.Collection; 047import java.util.Collections; 048import java.util.Date; 049import java.util.HashMap; 050import java.util.List; 051import java.util.Locale; 052import java.util.Map; 053 054import org.apache.lucene.document.DateTools; 055import org.apache.lucene.document.Document; 056import org.apache.lucene.document.Field; 057import org.apache.lucene.document.FieldType; 058import org.apache.lucene.document.StoredField; 059import org.apache.lucene.document.StringField; 060import org.apache.lucene.index.IndexOptions; 061import org.apache.lucene.index.IndexableField; 062 063/** 064 * A Lucene search document implementation.<p> 065 */ 066public class CmsLuceneDocument implements I_CmsSearchDocument { 067 068 /** 069 * Type for a stored-only field. 070 */ 071 public static final FieldType NOT_STORED_ANALYSED_TYPE; 072 073 /** 074 * Type for a stored-and analyzed fields. 075 */ 076 public static final FieldType STORED_ANALYSED_TYPE; 077 078 /** 079 * Type for a stored-only field. 080 */ 081 public static final FieldType STORED_NOT_ANALYSED_TYPE; 082 083 static { 084 STORED_ANALYSED_TYPE = new FieldType(); 085 STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 086 STORED_ANALYSED_TYPE.setOmitNorms(false); 087 STORED_ANALYSED_TYPE.setStored(true); 088 STORED_ANALYSED_TYPE.setTokenized(true); 089 STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false); 090 STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false); 091 STORED_ANALYSED_TYPE.setStoreTermVectors(false); 092 STORED_ANALYSED_TYPE.freeze(); 093 } 094 095 static { 096 NOT_STORED_ANALYSED_TYPE = new FieldType(); 097 NOT_STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); 098 NOT_STORED_ANALYSED_TYPE.setOmitNorms(false); 099 NOT_STORED_ANALYSED_TYPE.setStored(false); 100 NOT_STORED_ANALYSED_TYPE.setTokenized(true); 101 NOT_STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false); 102 NOT_STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false); 103 NOT_STORED_ANALYSED_TYPE.setStoreTermVectors(false); 104 NOT_STORED_ANALYSED_TYPE.freeze(); 105 } 106 107 static { 108 STORED_NOT_ANALYSED_TYPE = new FieldType(); 109 STORED_NOT_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS); 110 STORED_NOT_ANALYSED_TYPE.setOmitNorms(false); 111 STORED_NOT_ANALYSED_TYPE.setStored(true); 112 STORED_NOT_ANALYSED_TYPE.setTokenized(false); 113 STORED_NOT_ANALYSED_TYPE.setStoreTermVectorPositions(false); 114 STORED_NOT_ANALYSED_TYPE.setStoreTermVectorOffsets(false); 115 STORED_NOT_ANALYSED_TYPE.setStoreTermVectors(false); 116 STORED_NOT_ANALYSED_TYPE.freeze(); 117 } 118 119 /** The Lucene document. */ 120 private Document m_doc; 121 122 /** The fields stored in this document. */ 123 private Map<String, Field> m_fields; 124 125 /** Holds the score for this document. */ 126 private float m_score; 127 128 /** 129 * Public constructor.<p> 130 * 131 * @param doc the Lucene document 132 */ 133 public CmsLuceneDocument(Document doc) { 134 135 m_doc = doc; 136 m_fields = new HashMap<String, Field>(); 137 } 138 139 /** 140 * Generate a list of date terms for the optimized date range search.<p> 141 * 142 * @param date the date for get the date terms for 143 * 144 * @return a list of date terms for the optimized date range search 145 * 146 * @see CmsSearchIndex#getDateRangeSpan(long, long) 147 */ 148 public static String getDateTerms(long date) { 149 150 Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone()); 151 cal.setTimeInMillis(date); 152 String day = CmsSearchIndex.DATES[cal.get(5)]; 153 String month = CmsSearchIndex.DATES[(cal.get(2) + 1)]; 154 String year = String.valueOf(cal.get(1)); 155 156 StringBuffer result = new StringBuffer(); 157 result.append(year); 158 result.append(month); 159 result.append(day); 160 result.append(' '); 161 result.append(year); 162 result.append(month); 163 result.append(' '); 164 result.append(year); 165 return result.toString(); 166 } 167 168 /** 169 * @see org.opencms.search.I_CmsSearchDocument#addCategoryField(java.util.List) 170 */ 171 public void addCategoryField(List<CmsCategory> categories) { 172 173 if ((categories != null) && (categories.size() > 0)) { 174 175 StringBuffer categoryBuffer = new StringBuffer(128); 176 for (CmsCategory category : categories) { 177 categoryBuffer.append(category.getPath()); 178 categoryBuffer.append(' '); 179 } 180 if (categoryBuffer.length() > 0) { 181 Field field = new Field( 182 CmsSearchField.FIELD_CATEGORY, 183 categoryBuffer.toString().toLowerCase(), 184 STORED_ANALYSED_TYPE); 185 add(field); 186 } 187 } else { 188 // synthetic "unknown" category if no category property defined for resource 189 Field field = new Field( 190 CmsSearchField.FIELD_CATEGORY, 191 CmsSearchCategoryCollector.UNKNOWN_CATEGORY, 192 STORED_ANALYSED_TYPE); 193 add(field); 194 } 195 } 196 197 /** 198 * @see org.opencms.search.I_CmsSearchDocument#addContentField(byte[]) 199 */ 200 public void addContentField(byte[] data) { 201 202 Field field = new StoredField(CmsSearchField.FIELD_CONTENT_BLOB, data); 203 m_doc.add(field); 204 } 205 206 /** 207 * @see org.opencms.search.I_CmsSearchDocument#addContentLocales(java.util.Collection) 208 */ 209 public void addContentLocales(Collection<Locale> locales) { 210 211 // Lucene documents are not localized by defualt: Nothing to do here 212 } 213 214 /** 215 * @see org.opencms.search.I_CmsSearchDocument#addDateField(java.lang.String, long, boolean) 216 */ 217 public void addDateField(String name, long date, boolean analyzed) { 218 219 Field field = new Field( 220 name, 221 DateTools.dateToString(new Date(date), DateTools.Resolution.MILLISECOND), 222 STORED_NOT_ANALYSED_TYPE); 223 add(field); 224 225 if (analyzed) { 226 field = new Field( 227 name + CmsSearchField.FIELD_DATE_LOOKUP_SUFFIX, 228 getDateTerms(date), 229 NOT_STORED_ANALYSED_TYPE); 230 add(field); 231 } 232 } 233 234 /** 235 * @see org.opencms.search.I_CmsSearchDocument#addFileSizeField(int) 236 */ 237 public void addFileSizeField(int length) { 238 239 // a default lucene implementation does not have a field for the file size 240 } 241 242 /** 243 * @see org.opencms.search.I_CmsSearchDocument#addPathField(java.lang.String) 244 */ 245 public void addPathField(String rootPath) { 246 247 String parentFolders = CmsSearchFieldConfiguration.getParentFolderTokens(rootPath); 248 Field field = new Field(CmsSearchField.FIELD_PARENT_FOLDERS, parentFolders, NOT_STORED_ANALYSED_TYPE); 249 add(field); 250 } 251 252 /** 253 * @see org.opencms.search.I_CmsSearchDocument#addResourceLocales(java.util.Collection) 254 */ 255 public void addResourceLocales(Collection<Locale> locales) { 256 257 // A default lucene document has only one locale. 258 } 259 260 /** 261 * @see org.opencms.search.I_CmsSearchDocument#addRootPathField(java.lang.String) 262 */ 263 public void addRootPathField(String rootPath) { 264 265 add(new StringField(CmsSearchField.FIELD_PATH, rootPath, Field.Store.YES)); 266 } 267 268 /** 269 * @see org.opencms.search.I_CmsSearchDocument#addSearchField(org.opencms.search.fields.CmsSearchField, java.lang.String) 270 */ 271 public void addSearchField(CmsSearchField field, String value) { 272 273 if (field instanceof CmsLuceneField) { 274 add(((CmsLuceneField)field).createField(value)); 275 } else { 276 throw (new CmsRuntimeException( 277 Messages.get().container(Messages.LOG_INVALID_FIELD_CLASS_1, field.getClass().getName()))); 278 } 279 } 280 281 /** 282 * @see org.opencms.search.I_CmsSearchDocument#addSuffixField(java.lang.String) 283 */ 284 public void addSuffixField(String suffix) { 285 286 add(new StringField(CmsSearchField.FIELD_SUFFIX, suffix, Field.Store.YES)); 287 } 288 289 /** 290 * @see org.opencms.search.I_CmsSearchDocument#addTypeField(java.lang.String) 291 */ 292 public void addTypeField(String typeName) { 293 294 add(new StringField(CmsSearchField.FIELD_TYPE, typeName, Field.Store.YES)); 295 } 296 297 /** 298 * @see org.opencms.search.I_CmsSearchDocument#getContentBlob() 299 */ 300 public byte[] getContentBlob() { 301 302 IndexableField fieldContentBlob = m_doc.getField(CmsSearchField.FIELD_CONTENT_BLOB); 303 if (fieldContentBlob != null) { 304 try { 305 if (fieldContentBlob.readerValue() != null) { 306 return org.apache.commons.io.IOUtils.toByteArray( 307 fieldContentBlob.readerValue(), 308 StandardCharsets.UTF_8); 309 } 310 } catch (IOException e) { 311 // TODO: 312 } 313 } 314 return null; 315 } 316 317 /** 318 * @see org.opencms.search.I_CmsSearchDocument#getDocument() 319 */ 320 public Object getDocument() { 321 322 return m_doc; 323 } 324 325 /** 326 * @see org.opencms.search.I_CmsSearchDocument#getFieldNames() 327 */ 328 public List<String> getFieldNames() { 329 330 List<String> result = new ArrayList<String>(); 331 for (IndexableField field : m_doc.getFields()) { 332 result.add(field.name()); 333 } 334 return result; 335 } 336 337 /** 338 * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsDate(java.lang.String) 339 */ 340 public Date getFieldValueAsDate(String fieldName) { 341 342 String contentDate = getFieldValueAsString(fieldName); 343 if (contentDate != null) { 344 try { 345 return new Date(DateTools.stringToTime(contentDate)); 346 } catch (ParseException e) { 347 // ignore and assume the given field name does not refer a date field 348 } 349 } 350 return null; 351 } 352 353 /** 354 * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsString(java.lang.String) 355 */ 356 public String getFieldValueAsString(String fieldName) { 357 358 IndexableField fieldValue = m_doc.getField(fieldName); 359 if (fieldValue != null) { 360 return fieldValue.stringValue(); 361 } 362 return null; 363 } 364 365 /** 366 * @see org.opencms.search.I_CmsSearchDocument#getMultivaluedFieldAsStringList(java.lang.String) 367 */ 368 public List<String> getMultivaluedFieldAsStringList(String fieldName) { 369 370 return Collections.singletonList(getFieldValueAsString(fieldName)); 371 } 372 373 /** 374 * @see org.opencms.search.I_CmsSearchDocument#getPath() 375 */ 376 public String getPath() { 377 378 return getFieldValueAsString(CmsSearchField.FIELD_PATH); 379 } 380 381 /** 382 * @see org.opencms.search.I_CmsSearchDocument#getScore() 383 */ 384 public float getScore() { 385 386 return m_score; 387 } 388 389 /** 390 * @see org.opencms.search.I_CmsSearchDocument#getType() 391 */ 392 public String getType() { 393 394 return getFieldValueAsString(CmsSearchField.FIELD_TYPE); 395 } 396 397 /** 398 * @see org.opencms.search.I_CmsSearchDocument#setScore(float) 399 */ 400 public void setScore(float score) { 401 402 m_score = score; 403 } 404 405 /** 406 * Adds a field to this document.<p> 407 * 408 * @param f the field to add 409 */ 410 private void add(Field f) { 411 412 m_fields.put(f.name(), f); 413 m_doc.add(f); 414 } 415}