001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (https://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: https://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: https://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search;
033
034import org.opencms.main.CmsRuntimeException;
035import org.opencms.main.OpenCms;
036import org.opencms.relations.CmsCategory;
037import org.opencms.search.fields.CmsLuceneField;
038import org.opencms.search.fields.CmsSearchField;
039import org.opencms.search.fields.CmsSearchFieldConfiguration;
040
041import java.io.IOException;
042import java.nio.charset.StandardCharsets;
043import java.text.ParseException;
044import java.util.ArrayList;
045import java.util.Calendar;
046import java.util.Collection;
047import java.util.Collections;
048import java.util.Date;
049import java.util.HashMap;
050import java.util.List;
051import java.util.Locale;
052import java.util.Map;
053
054import org.apache.lucene.document.DateTools;
055import org.apache.lucene.document.Document;
056import org.apache.lucene.document.Field;
057import org.apache.lucene.document.FieldType;
058import org.apache.lucene.document.StoredField;
059import org.apache.lucene.document.StringField;
060import org.apache.lucene.index.IndexOptions;
061import org.apache.lucene.index.IndexableField;
062
063/**
064 * A Lucene search document implementation.<p>
065 */
066public class CmsLuceneDocument implements I_CmsSearchDocument {
067
068    /**
069     * Type for a stored-only field.
070     */
071    public static final FieldType NOT_STORED_ANALYSED_TYPE;
072
073    /**
074     * Type for a stored-and analyzed fields.
075     */
076    public static final FieldType STORED_ANALYSED_TYPE;
077
078    /**
079     * Type for a stored-only field.
080     */
081    public static final FieldType STORED_NOT_ANALYSED_TYPE;
082
083    static {
084        STORED_ANALYSED_TYPE = new FieldType();
085        STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
086        STORED_ANALYSED_TYPE.setOmitNorms(false);
087        STORED_ANALYSED_TYPE.setStored(true);
088        STORED_ANALYSED_TYPE.setTokenized(true);
089        STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false);
090        STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false);
091        STORED_ANALYSED_TYPE.setStoreTermVectors(false);
092        STORED_ANALYSED_TYPE.freeze();
093    }
094
095    static {
096        NOT_STORED_ANALYSED_TYPE = new FieldType();
097        NOT_STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
098        NOT_STORED_ANALYSED_TYPE.setOmitNorms(false);
099        NOT_STORED_ANALYSED_TYPE.setStored(false);
100        NOT_STORED_ANALYSED_TYPE.setTokenized(true);
101        NOT_STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false);
102        NOT_STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false);
103        NOT_STORED_ANALYSED_TYPE.setStoreTermVectors(false);
104        NOT_STORED_ANALYSED_TYPE.freeze();
105    }
106
107    static {
108        STORED_NOT_ANALYSED_TYPE = new FieldType();
109        STORED_NOT_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS);
110        STORED_NOT_ANALYSED_TYPE.setOmitNorms(false);
111        STORED_NOT_ANALYSED_TYPE.setStored(true);
112        STORED_NOT_ANALYSED_TYPE.setTokenized(false);
113        STORED_NOT_ANALYSED_TYPE.setStoreTermVectorPositions(false);
114        STORED_NOT_ANALYSED_TYPE.setStoreTermVectorOffsets(false);
115        STORED_NOT_ANALYSED_TYPE.setStoreTermVectors(false);
116        STORED_NOT_ANALYSED_TYPE.freeze();
117    }
118
119    /** The Lucene document. */
120    private Document m_doc;
121
122    /** The fields stored in this document. */
123    private Map<String, Field> m_fields;
124
125    /** Holds the score for this document. */
126    private float m_score;
127
128    /**
129     * Public constructor.<p>
130     *
131     * @param doc the Lucene document
132     */
133    public CmsLuceneDocument(Document doc) {
134
135        m_doc = doc;
136        m_fields = new HashMap<String, Field>();
137    }
138
139    /**
140     * Generate a list of date terms for the optimized date range search.<p>
141     *
142     * @param date the date for get the date terms for
143     *
144     * @return a list of date terms for the optimized date range search
145     *
146     * @see CmsSearchIndex#getDateRangeSpan(long, long)
147     */
148    public static String getDateTerms(long date) {
149
150        Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone());
151        cal.setTimeInMillis(date);
152        String day = CmsSearchIndex.DATES[cal.get(5)];
153        String month = CmsSearchIndex.DATES[(cal.get(2) + 1)];
154        String year = String.valueOf(cal.get(1));
155
156        StringBuffer result = new StringBuffer();
157        result.append(year);
158        result.append(month);
159        result.append(day);
160        result.append(' ');
161        result.append(year);
162        result.append(month);
163        result.append(' ');
164        result.append(year);
165        return result.toString();
166    }
167
168    /**
169     * @see org.opencms.search.I_CmsSearchDocument#addCategoryField(java.util.List)
170     */
171    public void addCategoryField(List<CmsCategory> categories) {
172
173        if ((categories != null) && (categories.size() > 0)) {
174
175            StringBuffer categoryBuffer = new StringBuffer(128);
176            for (CmsCategory category : categories) {
177                categoryBuffer.append(category.getPath());
178                categoryBuffer.append(' ');
179            }
180            if (categoryBuffer.length() > 0) {
181                Field field = new Field(
182                    CmsSearchField.FIELD_CATEGORY,
183                    categoryBuffer.toString().toLowerCase(),
184                    STORED_ANALYSED_TYPE);
185                add(field);
186            }
187        } else {
188            // synthetic "unknown" category if no category property defined for resource
189            Field field = new Field(
190                CmsSearchField.FIELD_CATEGORY,
191                CmsSearchCategoryCollector.UNKNOWN_CATEGORY,
192                STORED_ANALYSED_TYPE);
193            add(field);
194        }
195    }
196
197    /**
198     * @see org.opencms.search.I_CmsSearchDocument#addContentField(byte[])
199     */
200    public void addContentField(byte[] data) {
201
202        Field field = new StoredField(CmsSearchField.FIELD_CONTENT_BLOB, data);
203        m_doc.add(field);
204    }
205
206    /**
207     * @see org.opencms.search.I_CmsSearchDocument#addContentLocales(java.util.Collection)
208     */
209    public void addContentLocales(Collection<Locale> locales) {
210
211        // Lucene documents are not localized by defualt: Nothing to do here
212    }
213
214    /**
215     * @see org.opencms.search.I_CmsSearchDocument#addDateField(java.lang.String, long, boolean)
216     */
217    public void addDateField(String name, long date, boolean analyzed) {
218
219        Field field = new Field(
220            name,
221            DateTools.dateToString(new Date(date), DateTools.Resolution.MILLISECOND),
222            STORED_NOT_ANALYSED_TYPE);
223        add(field);
224
225        if (analyzed) {
226            field = new Field(
227                name + CmsSearchField.FIELD_DATE_LOOKUP_SUFFIX,
228                getDateTerms(date),
229                NOT_STORED_ANALYSED_TYPE);
230            add(field);
231        }
232    }
233
234    /**
235     * @see org.opencms.search.I_CmsSearchDocument#addFileSizeField(int)
236     */
237    public void addFileSizeField(int length) {
238
239        // a default lucene implementation does not have a field for the file size
240    }
241
242    /**
243     * @see org.opencms.search.I_CmsSearchDocument#addPathField(java.lang.String)
244     */
245    public void addPathField(String rootPath) {
246
247        String parentFolders = CmsSearchFieldConfiguration.getParentFolderTokens(rootPath);
248        Field field = new Field(CmsSearchField.FIELD_PARENT_FOLDERS, parentFolders, NOT_STORED_ANALYSED_TYPE);
249        add(field);
250    }
251
252    /**
253     * @see org.opencms.search.I_CmsSearchDocument#addResourceLocales(java.util.Collection)
254     */
255    public void addResourceLocales(Collection<Locale> locales) {
256
257        // A default lucene document has only one locale.
258    }
259
260    /**
261     * @see org.opencms.search.I_CmsSearchDocument#addRootPathField(java.lang.String)
262     */
263    public void addRootPathField(String rootPath) {
264
265        add(new StringField(CmsSearchField.FIELD_PATH, rootPath, Field.Store.YES));
266    }
267
268    /**
269     * @see org.opencms.search.I_CmsSearchDocument#addSearchField(org.opencms.search.fields.CmsSearchField, java.lang.String)
270     */
271    public void addSearchField(CmsSearchField field, String value) {
272
273        if (field instanceof CmsLuceneField) {
274            add(((CmsLuceneField)field).createField(value));
275        } else {
276            throw (new CmsRuntimeException(
277                Messages.get().container(Messages.LOG_INVALID_FIELD_CLASS_1, field.getClass().getName())));
278        }
279    }
280
281    /**
282     * @see org.opencms.search.I_CmsSearchDocument#addSuffixField(java.lang.String)
283     */
284    public void addSuffixField(String suffix) {
285
286        add(new StringField(CmsSearchField.FIELD_SUFFIX, suffix, Field.Store.YES));
287    }
288
289    /**
290     * @see org.opencms.search.I_CmsSearchDocument#addTypeField(java.lang.String)
291     */
292    public void addTypeField(String typeName) {
293
294        add(new StringField(CmsSearchField.FIELD_TYPE, typeName, Field.Store.YES));
295    }
296
297    /**
298     * @see org.opencms.search.I_CmsSearchDocument#getContentBlob()
299     */
300    public byte[] getContentBlob() {
301
302        IndexableField fieldContentBlob = m_doc.getField(CmsSearchField.FIELD_CONTENT_BLOB);
303        if (fieldContentBlob != null) {
304            try {
305                if (fieldContentBlob.readerValue() != null) {
306                    return org.apache.commons.io.IOUtils.toByteArray(
307                        fieldContentBlob.readerValue(),
308                        StandardCharsets.UTF_8);
309                }
310            } catch (IOException e) {
311                // TODO:
312            }
313        }
314        return null;
315    }
316
317    /**
318     * @see org.opencms.search.I_CmsSearchDocument#getDocument()
319     */
320    public Object getDocument() {
321
322        return m_doc;
323    }
324
325    /**
326     * @see org.opencms.search.I_CmsSearchDocument#getFieldNames()
327     */
328    public List<String> getFieldNames() {
329
330        List<String> result = new ArrayList<String>();
331        for (IndexableField field : m_doc.getFields()) {
332            result.add(field.name());
333        }
334        return result;
335    }
336
337    /**
338     * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsDate(java.lang.String)
339     */
340    public Date getFieldValueAsDate(String fieldName) {
341
342        String contentDate = getFieldValueAsString(fieldName);
343        if (contentDate != null) {
344            try {
345                return new Date(DateTools.stringToTime(contentDate));
346            } catch (ParseException e) {
347                // ignore and assume the given field name does not refer a date field
348            }
349        }
350        return null;
351    }
352
353    /**
354     * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsString(java.lang.String)
355     */
356    public String getFieldValueAsString(String fieldName) {
357
358        IndexableField fieldValue = m_doc.getField(fieldName);
359        if (fieldValue != null) {
360            return fieldValue.stringValue();
361        }
362        return null;
363    }
364
365    /**
366     * @see org.opencms.search.I_CmsSearchDocument#getMultivaluedFieldAsStringList(java.lang.String)
367     */
368    public List<String> getMultivaluedFieldAsStringList(String fieldName) {
369
370        return Collections.singletonList(getFieldValueAsString(fieldName));
371    }
372
373    /**
374     * @see org.opencms.search.I_CmsSearchDocument#getPath()
375     */
376    public String getPath() {
377
378        return getFieldValueAsString(CmsSearchField.FIELD_PATH);
379    }
380
381    /**
382     * @see org.opencms.search.I_CmsSearchDocument#getScore()
383     */
384    public float getScore() {
385
386        return m_score;
387    }
388
389    /**
390     * @see org.opencms.search.I_CmsSearchDocument#getType()
391     */
392    public String getType() {
393
394        return getFieldValueAsString(CmsSearchField.FIELD_TYPE);
395    }
396
397    /**
398     * @see org.opencms.search.I_CmsSearchDocument#setScore(float)
399     */
400    public void setScore(float score) {
401
402        m_score = score;
403    }
404
405    /**
406     * Adds a field to this document.<p>
407     *
408     * @param f the field to add
409     */
410    private void add(Field f) {
411
412        m_fields.put(f.name(), f);
413        m_doc.add(f);
414    }
415}