001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search; 029 030import org.opencms.main.CmsLog; 031import org.opencms.search.fields.CmsSearchField; 032import org.opencms.util.CmsStringUtil; 033 034import java.io.IOException; 035import java.util.HashMap; 036import java.util.Iterator; 037import java.util.Map; 038import java.util.TreeMap; 039 040import org.apache.commons.logging.Log; 041import org.apache.lucene.document.Document; 042import org.apache.lucene.index.LeafReaderContext; 043import org.apache.lucene.search.IndexSearcher; 044import org.apache.lucene.search.ScoreMode; 045import org.apache.lucene.search.SimpleCollector; 046 047/** 048 * Collects category information during a search process.<p> 049 * 050 * <b>Please note:</b> The calculation of the category count slows down the search time by an order 051 * of magnitude. Make sure that you only use this feature if it's really required! 052 * Be especially careful if your search result list can become large (> 1000 documents), since in this case 053 * overall system performance will certainly be impacted considerably when calculating the categories.<p> 054 * 055 * @since 6.0.0 056 */ 057public class CmsSearchCategoryCollector extends SimpleCollector { 058 059 /** 060 * Class with an increasing counter to avoid multiple look ups and 061 * object creations when dealing with the category count.<p> 062 */ 063 private static class CmsCategroyCount { 064 065 /** The category count. */ 066 int m_count; 067 068 /** 069 * Creates a new instance with a initial count of 1.<p> 070 */ 071 CmsCategroyCount() { 072 073 m_count = 1; 074 } 075 076 /** 077 * Increases the count by one.<p> 078 */ 079 void inc() { 080 081 m_count++; 082 } 083 084 /** 085 * Creates an Integer for this count.<p> 086 * 087 * @return an Integer for this count 088 */ 089 Integer toInteger() { 090 091 return Integer.valueOf(m_count); 092 } 093 } 094 095 /** Category used in case the document belongs to no category. */ 096 public static final String UNKNOWN_CATEGORY = "unknown"; 097 098 /** The log object for this class. */ 099 private static final Log LOG = CmsLog.getLog(CmsSearchCategoryCollector.class); 100 101 /** The internal map of the categories found. */ 102 private Map<String, CmsCategroyCount> m_categories; 103 104 /** The index of the document reader. */ 105 private int m_docBase; 106 107 /** The index searcher used. */ 108 private IndexSearcher m_searcher; 109 110 /** 111 * Creates a new category search collector instance.<p> 112 * 113 * @param searcher the index searcher used 114 */ 115 public CmsSearchCategoryCollector(IndexSearcher searcher) { 116 117 super(); 118 m_docBase = 0; 119 m_searcher = searcher; 120 m_categories = new HashMap<String, CmsCategroyCount>(); 121 } 122 123 /** 124 * Convenience method to format a map of categories in a nice 2 column list, for example 125 * for display of debugging output.<p> 126 * 127 * @param categories the map to format 128 * @return the formatted category map 129 */ 130 public static final String formatCategoryMap(Map<String, Integer> categories) { 131 132 StringBuffer result = new StringBuffer(256); 133 result.append("Total categories: "); 134 result.append(categories.size()); 135 result.append('\n'); 136 Iterator<Map.Entry<String, Integer>> i = categories.entrySet().iterator(); 137 while (i.hasNext()) { 138 Map.Entry<String, Integer> entry = i.next(); 139 result.append(CmsStringUtil.padRight(entry.getKey(), 30)); 140 result.append(entry.getValue().intValue()); 141 result.append('\n'); 142 } 143 return result.toString(); 144 } 145 146 /** 147 * @see org.apache.lucene.search.SimpleCollector#collect(int) 148 */ 149 @Override 150 public void collect(int id) { 151 152 String category = null; 153 int rebasedId = m_docBase + id; 154 try { 155 Document doc = m_searcher.doc(rebasedId); 156 category = doc.get(CmsSearchField.FIELD_CATEGORY); 157 } catch (IOException e) { 158 // category will be null 159 if (LOG.isDebugEnabled()) { 160 LOG.debug( 161 Messages.get().getBundle().key(Messages.LOG_READ_CATEGORY_FAILED_1, Integer.valueOf(rebasedId)), 162 e); 163 } 164 165 } 166 if (category == null) { 167 category = UNKNOWN_CATEGORY; 168 } 169 CmsCategroyCount count = m_categories.get(category); 170 if (count != null) { 171 count.inc(); 172 } else { 173 count = new CmsCategroyCount(); 174 m_categories.put(category, count); 175 } 176 } 177 178 /** 179 * Returns the category count result, the returned map 180 * contains Strings (category names) mapped to an Integer (the count).<p> 181 * 182 * @return the category count result 183 */ 184 public Map<String, Integer> getCategoryCountResult() { 185 186 Map<String, Integer> result = new TreeMap<String, Integer>(); 187 Iterator<Map.Entry<String, CmsCategroyCount>> i = m_categories.entrySet().iterator(); 188 while (i.hasNext()) { 189 Map.Entry<String, CmsCategroyCount> entry = i.next(); 190 result.put(entry.getKey(), entry.getValue().toInteger()); 191 } 192 return result; 193 } 194 195 /** 196 * @see org.apache.lucene.search.Collector#scoreMode() 197 */ 198 public ScoreMode scoreMode() { 199 200 // we do not need scores 201 return ScoreMode.COMPLETE_NO_SCORES; 202 } 203 204 /** 205 * @see java.lang.Object#toString() 206 */ 207 @Override 208 public String toString() { 209 210 return formatCategoryMap(getCategoryCountResult()); 211 } 212 213 /** 214 * @see org.apache.lucene.search.SimpleCollector#doSetNextReader(org.apache.lucene.index.LeafReaderContext) 215 */ 216 @Override 217 protected void doSetNextReader(LeafReaderContext ctx) { 218 219 m_docBase = ctx.docBase; 220 } 221}