001/*
002 * File   : $Source$
003 * Date   : $Date$
004 * Version: $Revision$
005 *
006 * This library is part of OpenCms -
007 * the Open Source Content Management System
008 *
009 * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com)
010 *
011 * This library is free software; you can redistribute it and/or
012 * modify it under the terms of the GNU Lesser General Public
013 * License as published by the Free Software Foundation; either
014 * version 2.1 of the License, or (at your option) any later version.
015 *
016 * This library is distributed in the hope that it will be useful,
017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019 * Lesser General Public License for more details.
020 *
021 * For further information about Alkacon Software, please see the
022 * company website: http://www.alkacon.com
023 *
024 * For further information about OpenCms, please see the
025 * project website: http://www.opencms.org
026 *
027 * You should have received a copy of the GNU Lesser General Public
028 * License along with this library; if not, write to the Free Software
029 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
030 */
031
032package org.opencms.search;
033
034import org.apache.lucene.document.*;
035import org.apache.lucene.index.IndexOptions;
036import org.apache.lucene.index.IndexableField;
037import org.apache.tika.io.IOUtils;
038import org.opencms.main.CmsRuntimeException;
039import org.opencms.main.OpenCms;
040import org.opencms.relations.CmsCategory;
041import org.opencms.search.fields.CmsLuceneField;
042import org.opencms.search.fields.CmsSearchField;
043import org.opencms.search.fields.CmsSearchFieldConfiguration;
044
045import java.io.IOException;
046import java.text.ParseException;
047import java.util.*;
048
049/**
050 * A Lucene search document implementation.<p>
051 */
052public class CmsLuceneDocument implements I_CmsSearchDocument {
053
054    /**
055     * Type for a stored-only field.
056     */
057    public static final FieldType NOT_STORED_ANALYSED_TYPE;
058
059    /**
060     * Type for a stored-and analyzed fields.
061     */
062    public static final FieldType STORED_ANALYSED_TYPE;
063
064    /**
065     * Type for a stored-only field.
066     */
067    public static final FieldType STORED_NOT_ANALYSED_TYPE;
068
069    static {
070        STORED_ANALYSED_TYPE = new FieldType();
071        STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
072        STORED_ANALYSED_TYPE.setOmitNorms(false);
073        STORED_ANALYSED_TYPE.setStored(true);
074        STORED_ANALYSED_TYPE.setTokenized(true);
075        STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false);
076        STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false);
077        STORED_ANALYSED_TYPE.setStoreTermVectors(false);
078        STORED_ANALYSED_TYPE.freeze();
079    }
080
081    static {
082        NOT_STORED_ANALYSED_TYPE = new FieldType();
083        NOT_STORED_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
084        NOT_STORED_ANALYSED_TYPE.setOmitNorms(false);
085        NOT_STORED_ANALYSED_TYPE.setStored(false);
086        NOT_STORED_ANALYSED_TYPE.setTokenized(true);
087        NOT_STORED_ANALYSED_TYPE.setStoreTermVectorPositions(false);
088        NOT_STORED_ANALYSED_TYPE.setStoreTermVectorOffsets(false);
089        NOT_STORED_ANALYSED_TYPE.setStoreTermVectors(false);
090        NOT_STORED_ANALYSED_TYPE.freeze();
091    }
092
093    static {
094        STORED_NOT_ANALYSED_TYPE = new FieldType();
095        STORED_NOT_ANALYSED_TYPE.setIndexOptions(IndexOptions.DOCS);
096        STORED_NOT_ANALYSED_TYPE.setOmitNorms(false);
097        STORED_NOT_ANALYSED_TYPE.setStored(true);
098        STORED_NOT_ANALYSED_TYPE.setTokenized(false);
099        STORED_NOT_ANALYSED_TYPE.setStoreTermVectorPositions(false);
100        STORED_NOT_ANALYSED_TYPE.setStoreTermVectorOffsets(false);
101        STORED_NOT_ANALYSED_TYPE.setStoreTermVectors(false);
102        STORED_NOT_ANALYSED_TYPE.freeze();
103    }
104
105    /** The Lucene document. */
106    private Document m_doc;
107
108    /** The fields stored in this document. */
109    private Map<String, Field> m_fields;
110
111    /** Holds the score for this document. */
112    private float m_score;
113
114    /**
115     * Public constructor.<p>
116     *
117     * @param doc the Lucene document
118     */
119    public CmsLuceneDocument(Document doc) {
120
121        m_doc = doc;
122        m_fields = new HashMap<String, Field>();
123    }
124
125    /**
126     * Generate a list of date terms for the optimized date range search.<p>
127     *
128     * @param date the date for get the date terms for
129     *
130     * @return a list of date terms for the optimized date range search
131     *
132     * @see CmsSearchIndex#getDateRangeSpan(long, long)
133     */
134    public static String getDateTerms(long date) {
135
136        Calendar cal = Calendar.getInstance(OpenCms.getLocaleManager().getTimeZone());
137        cal.setTimeInMillis(date);
138        String day = CmsSearchIndex.DATES[cal.get(5)];
139        String month = CmsSearchIndex.DATES[(cal.get(2) + 1)];
140        String year = String.valueOf(cal.get(1));
141
142        StringBuffer result = new StringBuffer();
143        result.append(year);
144        result.append(month);
145        result.append(day);
146        result.append(' ');
147        result.append(year);
148        result.append(month);
149        result.append(' ');
150        result.append(year);
151        return result.toString();
152    }
153
154    /**
155     * @see org.opencms.search.I_CmsSearchDocument#addCategoryField(java.util.List)
156     */
157    public void addCategoryField(List<CmsCategory> categories) {
158
159        if ((categories != null) && (categories.size() > 0)) {
160
161            StringBuffer categoryBuffer = new StringBuffer(128);
162            for (CmsCategory category : categories) {
163                categoryBuffer.append(category.getPath());
164                categoryBuffer.append(' ');
165            }
166            if (categoryBuffer.length() > 0) {
167                Field field = new Field(
168                    CmsSearchField.FIELD_CATEGORY,
169                    categoryBuffer.toString().toLowerCase(),
170                    STORED_ANALYSED_TYPE);
171                add(field);
172            }
173        } else {
174            // synthetic "unknown" category if no category property defined for resource
175            Field field = new Field(
176                CmsSearchField.FIELD_CATEGORY,
177                CmsSearchCategoryCollector.UNKNOWN_CATEGORY,
178                STORED_ANALYSED_TYPE);
179            add(field);
180        }
181    }
182
183    /**
184     * @see org.opencms.search.I_CmsSearchDocument#addContentField(byte[])
185     */
186    public void addContentField(byte[] data) {
187
188        Field field = new StoredField(CmsSearchField.FIELD_CONTENT_BLOB, data);
189        m_doc.add(field);
190    }
191
192    /**
193     * @see org.opencms.search.I_CmsSearchDocument#addContentLocales(java.util.Collection)
194     */
195    public void addContentLocales(Collection<Locale> locales) {
196
197        // Lucene documents are not localized by defualt: Nothing to do here
198    }
199
200    /**
201     * @see org.opencms.search.I_CmsSearchDocument#addDateField(java.lang.String, long, boolean)
202     */
203    public void addDateField(String name, long date, boolean analyzed) {
204
205        Field field = new Field(
206            name,
207            DateTools.dateToString(new Date(date), DateTools.Resolution.MILLISECOND),
208            STORED_NOT_ANALYSED_TYPE);
209        add(field);
210
211        if (analyzed) {
212            field = new Field(
213                name + CmsSearchField.FIELD_DATE_LOOKUP_SUFFIX,
214                getDateTerms(date),
215                NOT_STORED_ANALYSED_TYPE);
216            add(field);
217        }
218    }
219
220    /**
221     * @see org.opencms.search.I_CmsSearchDocument#addFileSizeField(int)
222     */
223    public void addFileSizeField(int length) {
224
225        // a default lucene implementation does not have a field for the file size
226    }
227
228    /**
229     * @see org.opencms.search.I_CmsSearchDocument#addPathField(java.lang.String)
230     */
231    public void addPathField(String rootPath) {
232
233        String parentFolders = CmsSearchFieldConfiguration.getParentFolderTokens(rootPath);
234        Field field = new Field(CmsSearchField.FIELD_PARENT_FOLDERS, parentFolders, NOT_STORED_ANALYSED_TYPE);
235        add(field);
236    }
237
238    /**
239     * @see org.opencms.search.I_CmsSearchDocument#addResourceLocales(java.util.Collection)
240     */
241    public void addResourceLocales(Collection<Locale> locales) {
242
243        // A default lucene document has only one locale.
244    }
245
246    /**
247     * @see org.opencms.search.I_CmsSearchDocument#addRootPathField(java.lang.String)
248     */
249    public void addRootPathField(String rootPath) {
250
251        add(new StringField(CmsSearchField.FIELD_PATH, rootPath, Field.Store.YES));
252    }
253
254    /**
255     * @see org.opencms.search.I_CmsSearchDocument#addSearchField(org.opencms.search.fields.CmsSearchField, java.lang.String)
256     */
257    public void addSearchField(CmsSearchField field, String value) {
258
259        if (field instanceof CmsLuceneField) {
260            add(((CmsLuceneField)field).createField(value));
261        } else {
262            throw (new CmsRuntimeException(
263                Messages.get().container(Messages.LOG_INVALID_FIELD_CLASS_1, field.getClass().getName())));
264        }
265    }
266
267    /**
268     * @see org.opencms.search.I_CmsSearchDocument#addSuffixField(java.lang.String)
269     */
270    public void addSuffixField(String suffix) {
271
272        add(new StringField(CmsSearchField.FIELD_SUFFIX, suffix, Field.Store.YES));
273    }
274
275    /**
276     * @see org.opencms.search.I_CmsSearchDocument#addTypeField(java.lang.String)
277     */
278    public void addTypeField(String typeName) {
279
280        add(new StringField(CmsSearchField.FIELD_TYPE, typeName, Field.Store.YES));
281    }
282
283    /**
284     * @see org.opencms.search.I_CmsSearchDocument#getContentBlob()
285     */
286    public byte[] getContentBlob() {
287
288        IndexableField fieldContentBlob = m_doc.getField(CmsSearchField.FIELD_CONTENT_BLOB);
289        if (fieldContentBlob != null) {
290            try {
291                if (fieldContentBlob.readerValue() != null) {
292                    return IOUtils.toByteArray(fieldContentBlob.readerValue());
293                }
294            } catch (IOException e) {
295                // TODO:
296            }
297        }
298        return null;
299    }
300
301    /**
302     * @see org.opencms.search.I_CmsSearchDocument#getDocument()
303     */
304    public Object getDocument() {
305
306        return m_doc;
307    }
308
309    /**
310     * @see org.opencms.search.I_CmsSearchDocument#getFieldNames()
311     */
312    public List<String> getFieldNames() {
313
314        List<String> result = new ArrayList<String>();
315        for (IndexableField field : m_doc.getFields()) {
316            result.add(field.name());
317        }
318        return result;
319    }
320
321    /**
322     * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsDate(java.lang.String)
323     */
324    public Date getFieldValueAsDate(String fieldName) {
325
326        String contentDate = getFieldValueAsString(fieldName);
327        if (contentDate != null) {
328            try {
329                return new Date(DateTools.stringToTime(contentDate));
330            } catch (ParseException e) {
331                // ignore and assume the given field name does not refer a date field
332            }
333        }
334        return null;
335    }
336
337    /**
338     * @see org.opencms.search.I_CmsSearchDocument#getFieldValueAsString(java.lang.String)
339     */
340    public String getFieldValueAsString(String fieldName) {
341
342        IndexableField fieldValue = m_doc.getField(fieldName);
343        if (fieldValue != null) {
344            return fieldValue.stringValue();
345        }
346        return null;
347    }
348
349    /**
350     * @see org.opencms.search.I_CmsSearchDocument#getMultivaluedFieldAsStringList(java.lang.String)
351     */
352    public List<String> getMultivaluedFieldAsStringList(String fieldName) {
353
354        return Collections.singletonList(getFieldValueAsString(fieldName));
355    }
356
357    /**
358     * @see org.opencms.search.I_CmsSearchDocument#getPath()
359     */
360    public String getPath() {
361
362        return getFieldValueAsString(CmsSearchField.FIELD_PATH);
363    }
364
365    /**
366     * @see org.opencms.search.I_CmsSearchDocument#getScore()
367     */
368    public float getScore() {
369
370        return m_score;
371    }
372
373    /**
374     * @see org.opencms.search.I_CmsSearchDocument#getType()
375     */
376    public String getType() {
377
378        return getFieldValueAsString(CmsSearchField.FIELD_TYPE);
379    }
380
381    /**
382     * @see org.opencms.search.I_CmsSearchDocument#setScore(float)
383     */
384    public void setScore(float score) {
385
386        m_score = score;
387    }
388
389    /**
390     * Adds a field to this document.<p>
391     *
392     * @param f the field to add
393     */
394    private void add(Field f) {
395
396        m_fields.put(f.name(), f);
397        m_doc.add(f);
398    }
399}