001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search;
029
030import org.opencms.main.CmsLog;
031import org.opencms.search.fields.CmsSearchField;
032import org.opencms.util.CmsStringUtil;
033
034import java.io.IOException;
035import java.util.HashMap;
036import java.util.Iterator;
037import java.util.Map;
038import java.util.TreeMap;
039
040import org.apache.commons.logging.Log;
041import org.apache.lucene.document.Document;
042import org.apache.lucene.index.LeafReaderContext;
043import org.apache.lucene.search.IndexSearcher;
044import org.apache.lucene.search.ScoreMode;
045import org.apache.lucene.search.SimpleCollector;
046
047/**
048 * Collects category information during a search process.<p>
049 *
050 * <b>Please note:</b> The calculation of the category count slows down the search time by an order
051 * of magnitude. Make sure that you only use this feature if it's really required!
052 * Be especially careful if your search result list can become large (> 1000 documents), since in this case
053 * overall system performance will certainly be impacted considerably when calculating the categories.<p>
054 *
055 * @since 6.0.0
056 */
057public class CmsSearchCategoryCollector extends SimpleCollector {
058
059    /**
060     * Class with an increasing counter to avoid multiple look ups and
061     * object creations when dealing with the category count.<p>
062     */
063    private static class CmsCategroyCount {
064
065        /** The category count. */
066        int m_count;
067
068        /**
069         * Creates a new instance with a initial count of 1.<p>
070         */
071        CmsCategroyCount() {
072
073            m_count = 1;
074        }
075
076        /**
077         * Increases the count by one.<p>
078         */
079        void inc() {
080
081            m_count++;
082        }
083
084        /**
085         * Creates an Integer for this count.<p>
086         *
087         * @return an Integer for this count
088         */
089        Integer toInteger() {
090
091            return Integer.valueOf(m_count);
092        }
093    }
094
095    /** Category used in case the document belongs to no category. */
096    public static final String UNKNOWN_CATEGORY = "unknown";
097
098    /** The log object for this class. */
099    private static final Log LOG = CmsLog.getLog(CmsSearchCategoryCollector.class);
100
101    /** The internal map of the categories found. */
102    private Map<String, CmsCategroyCount> m_categories;
103
104    /** The index of the document reader. */
105    private int m_docBase;
106
107    /** The index searcher used. */
108    private IndexSearcher m_searcher;
109
110    /**
111     * Creates a new category search collector instance.<p>
112     *
113     * @param searcher the index searcher used
114     */
115    public CmsSearchCategoryCollector(IndexSearcher searcher) {
116
117        super();
118        m_docBase = 0;
119        m_searcher = searcher;
120        m_categories = new HashMap<String, CmsCategroyCount>();
121    }
122
123    /**
124     * Convenience method to format a map of categories in a nice 2 column list, for example
125     * for display of debugging output.<p>
126     *
127     * @param categories the map to format
128     * @return the formatted category map
129     */
130    public static final String formatCategoryMap(Map<String, Integer> categories) {
131
132        StringBuffer result = new StringBuffer(256);
133        result.append("Total categories: ");
134        result.append(categories.size());
135        result.append('\n');
136        Iterator<Map.Entry<String, Integer>> i = categories.entrySet().iterator();
137        while (i.hasNext()) {
138            Map.Entry<String, Integer> entry = i.next();
139            result.append(CmsStringUtil.padRight(entry.getKey(), 30));
140            result.append(entry.getValue().intValue());
141            result.append('\n');
142        }
143        return result.toString();
144    }
145
146    /**
147     * @see org.apache.lucene.search.SimpleCollector#collect(int)
148     */
149    @Override
150    public void collect(int id) {
151
152        String category = null;
153        int rebasedId = m_docBase + id;
154        try {
155            Document doc = m_searcher.doc(rebasedId);
156            category = doc.get(CmsSearchField.FIELD_CATEGORY);
157        } catch (IOException e) {
158            // category will be null
159            if (LOG.isDebugEnabled()) {
160                LOG.debug(
161                    Messages.get().getBundle().key(Messages.LOG_READ_CATEGORY_FAILED_1, Integer.valueOf(rebasedId)),
162                    e);
163            }
164
165        }
166        if (category == null) {
167            category = UNKNOWN_CATEGORY;
168        }
169        CmsCategroyCount count = m_categories.get(category);
170        if (count != null) {
171            count.inc();
172        } else {
173            count = new CmsCategroyCount();
174            m_categories.put(category, count);
175        }
176    }
177
178    /**
179     * Returns the category count result, the returned map
180     * contains Strings (category names) mapped to an Integer (the count).<p>
181     *
182     * @return the category count result
183     */
184    public Map<String, Integer> getCategoryCountResult() {
185
186        Map<String, Integer> result = new TreeMap<String, Integer>();
187        Iterator<Map.Entry<String, CmsCategroyCount>> i = m_categories.entrySet().iterator();
188        while (i.hasNext()) {
189            Map.Entry<String, CmsCategroyCount> entry = i.next();
190            result.put(entry.getKey(), entry.getValue().toInteger());
191        }
192        return result;
193    }
194
195    /**
196     * @see org.apache.lucene.search.Collector#scoreMode()
197     */
198    public ScoreMode scoreMode() {
199
200        // we do not need scores
201        return ScoreMode.COMPLETE_NO_SCORES;
202    }
203
204    /**
205     * @see java.lang.Object#toString()
206     */
207    @Override
208    public String toString() {
209
210        return formatCategoryMap(getCategoryCountResult());
211    }
212
213    /**
214     * @see org.apache.lucene.search.SimpleCollector#doSetNextReader(org.apache.lucene.index.LeafReaderContext)
215     */
216    @Override
217    protected void doSetNextReader(LeafReaderContext ctx) {
218
219        m_docBase = ctx.docBase;
220    }
221}