001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search.fields; 029 030import org.opencms.file.CmsPropertyDefinition; 031 032import java.util.ArrayList; 033import java.util.HashMap; 034import java.util.HashSet; 035import java.util.Iterator; 036import java.util.List; 037import java.util.Map; 038import java.util.Set; 039 040import org.apache.lucene.analysis.Analyzer; 041import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 042import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; 043 044/** 045 * Describes a configuration of fields that are used in building a search index.<p> 046 * 047 * @since 7.0.0 048 */ 049public class CmsLuceneFieldConfiguration extends CmsSearchFieldConfiguration { 050 051 /** 052 * The default for the standard search configuration.<p> 053 * 054 * This defines the default that is used in case no "standard" field configuration 055 * is defined in <code>opencms-search.xml</code>.<p> 056 */ 057 public static final CmsLuceneFieldConfiguration DEFAULT_STANDARD = createStandardConfiguration(); 058 059 /** The description for the standard field configuration. */ 060 public static final String STR_STANDARD_DESCRIPTION = "The standard OpenCms search index field configuration."; 061 062 /** The fields that will be returned by a regular search (all stored and not lazy fields). */ 063 private static Set<String> m_returnFields = new HashSet<String>(); 064 065 /** The serial version id. */ 066 private static final long serialVersionUID = 8011265789649614792L; 067 068 static { 069 m_returnFields.add(CmsSearchField.FIELD_CATEGORY); 070 m_returnFields.add(CmsSearchField.FIELD_DATE_CONTENT); 071 m_returnFields.add(CmsSearchField.FIELD_DATE_CREATED); 072 m_returnFields.add(CmsSearchField.FIELD_DATE_EXPIRED); 073 m_returnFields.add(CmsSearchField.FIELD_DATE_LASTMODIFIED); 074 m_returnFields.add(CmsSearchField.FIELD_DATE_RELEASED); 075 m_returnFields.add(CmsSearchField.FIELD_PARENT_FOLDERS); 076 m_returnFields.add(CmsSearchField.FIELD_PATH); 077 m_returnFields.add(CmsSearchField.FIELD_SUFFIX); 078 m_returnFields.add(CmsSearchField.FIELD_TYPE); 079 } 080 081 /** Contains all names of the fields that are used in the excerpt. */ 082 private List<String> m_excerptFieldNames; 083 084 /** The field added flag. */ 085 private boolean m_fieldAdded; 086 087 /** 088 * Creates the default standard search configuration.<p> 089 * 090 * This defines the default that is used in case no "standard" field configuration 091 * is defined in <code>opencms-search.xml</code>.<p> 092 * 093 * @return the default standard search configuration 094 */ 095 private static CmsLuceneFieldConfiguration createStandardConfiguration() { 096 097 CmsLuceneFieldConfiguration result = new CmsLuceneFieldConfiguration(); 098 result.setName(STR_STANDARD); 099 result.setDescription(STR_STANDARD_DESCRIPTION); 100 101 CmsLuceneField field; 102 // content mapping, store as compressed value 103 field = new CmsLuceneField( 104 CmsSearchField.FIELD_CONTENT, 105 "%(key.field.content)", 106 true, 107 true, 108 true, 109 true, 110 true, 111 null, 112 null); 113 field.addMapping(new CmsSearchFieldMapping(CmsSearchFieldMappingType.CONTENT, null, true)); 114 result.addField(field); 115 116 // title mapping as a keyword 117 field = new CmsLuceneField( 118 CmsSearchField.FIELD_TITLE, 119 CmsLuceneField.IGNORE_DISPLAY_NAME, 120 true, 121 true, 122 false, 123 false, 124 null); 125 field.addMapping( 126 new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE, true)); 127 result.addField(field); 128 129 // title mapping as indexed field 130 field = new CmsLuceneField(CmsSearchField.FIELD_TITLE_UNSTORED, "%(key.field.title)", false, true); 131 field.addMapping( 132 new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE, true)); 133 result.addField(field); 134 135 // mapping of "Keywords" property to search field with the same name 136 field = new CmsLuceneField(CmsSearchField.FIELD_KEYWORDS, "%(key.field.keywords)", true, true); 137 field.addMapping( 138 new CmsSearchFieldMapping( 139 CmsSearchFieldMappingType.PROPERTY, 140 CmsPropertyDefinition.PROPERTY_KEYWORDS, 141 true)); 142 result.addField(field); 143 144 // mapping of "Description" property to search field with the same name 145 field = new CmsLuceneField(CmsSearchField.FIELD_DESCRIPTION, "%(key.field.description)", true, true); 146 field.addMapping( 147 new CmsSearchFieldMapping( 148 CmsSearchFieldMappingType.PROPERTY, 149 CmsPropertyDefinition.PROPERTY_DESCRIPTION, 150 true)); 151 result.addField(field); 152 153 // "meta" field is a combination of "Title", "Keywords" and "Description" properties 154 field = new CmsLuceneField(CmsSearchField.FIELD_META, "%(key.field.meta)", false, true); 155 field.addMapping( 156 new CmsSearchFieldMapping(CmsSearchFieldMappingType.PROPERTY, CmsPropertyDefinition.PROPERTY_TITLE, true)); 157 field.addMapping( 158 new CmsSearchFieldMapping( 159 CmsSearchFieldMappingType.PROPERTY, 160 CmsPropertyDefinition.PROPERTY_KEYWORDS, 161 true)); 162 field.addMapping( 163 new CmsSearchFieldMapping( 164 CmsSearchFieldMappingType.PROPERTY, 165 CmsPropertyDefinition.PROPERTY_DESCRIPTION, 166 true)); 167 result.addField(field); 168 169 return result; 170 } 171 172 /** 173 * 174 * @see org.opencms.search.fields.CmsSearchFieldConfiguration#addField(org.opencms.search.fields.CmsSearchField) 175 */ 176 @Override 177 public void addField(CmsSearchField field) { 178 179 super.addField(field); 180 m_fieldAdded = true; 181 } 182 183 /** 184 * Returns an analyzer that wraps the given base analyzer with the analyzers of this individual field configuration.<p> 185 * 186 * @param analyzer the base analyzer to wrap 187 * 188 * @return an analyzer that wraps the given base analyzer with the analyzers of this individual field configuration 189 */ 190 public Analyzer getAnalyzer(Analyzer analyzer) { 191 192 // parent folder and last modified lookup fields must use whitespace analyzer 193 WhitespaceAnalyzer ws = new WhitespaceAnalyzer(); 194 Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>(); 195 // first make map the default hard coded fields 196 analyzers.put(CmsSearchField.FIELD_PARENT_FOLDERS, ws); 197 analyzers.put(CmsSearchField.FIELD_CATEGORY, ws); 198 analyzers.put(CmsSearchField.FIELD_DATE_LASTMODIFIED_LOOKUP, ws); 199 analyzers.put(CmsSearchField.FIELD_DATE_CREATED_LOOKUP, ws); 200 201 for (CmsLuceneField field : getLuceneFields()) { 202 Analyzer fieldAnalyzer = field.getAnalyzer(); 203 if (fieldAnalyzer != null) { 204 // this field has an individual analyzer configured 205 analyzers.put(field.getName(), fieldAnalyzer); 206 } 207 } 208 // return the individual field configured analyzer 209 return new PerFieldAnalyzerWrapper(analyzer, analyzers); 210 } 211 212 /** 213 * Returns a list of all field names (Strings) that are used in generating the search excerpt.<p> 214 * 215 * @return a list of all field names (Strings) that are used in generating the search excerpt 216 */ 217 public List<String> getExcerptFieldNames() { 218 219 if (m_excerptFieldNames == null) { 220 // lazy initialize the field names 221 m_excerptFieldNames = new ArrayList<String>(); 222 Iterator<CmsSearchField> i = getFields().iterator(); 223 while (i.hasNext()) { 224 CmsLuceneField field = (CmsLuceneField)i.next(); 225 if (field.isInExcerptAndStored()) { 226 m_excerptFieldNames.add(field.getName()); 227 } 228 } 229 } 230 231 // create a copy of the list to prevent changes in other classes 232 return new ArrayList<String>(m_excerptFieldNames); 233 } 234 235 /** 236 * Returns the field names used for the excerpt generation.<p> 237 * 238 * @return the field names used for the excerpt generation 239 */ 240 public Set<String> getExcerptFields() { 241 242 return new HashSet<String>(getExcerptFieldNames()); 243 } 244 245 /** 246 * Returns a list of the concrete Lucene search fields.<p> 247 * 248 * @return a list of lucene search fields 249 */ 250 public List<CmsLuceneField> getLuceneFields() { 251 252 List<CmsLuceneField> result = new ArrayList<CmsLuceneField>(); 253 for (CmsSearchField field : getFields()) { 254 if (field instanceof CmsLuceneField) { 255 result.add((CmsLuceneField)field); 256 } 257 } 258 return result; 259 } 260 261 /** 262 * Returns the field names used for a regular result.<p> 263 * 264 * @return the field names used for a regular result 265 */ 266 public Set<String> getReturnFields() { 267 268 if (m_fieldAdded) { 269 for (CmsSearchField field : getLuceneFields()) { 270 if (field.isStored() && !LAZY_FIELDS.contains(field.getName())) { 271 m_returnFields.add(field.getName()); 272 } 273 } 274 } 275 m_fieldAdded = false; 276 return m_returnFields; 277 } 278}