001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.search.solr.updateprocessors; 029 030import java.util.regex.Pattern; 031import java.util.regex.PatternSyntaxException; 032 033import org.apache.solr.common.SolrException; 034import org.apache.solr.common.SolrException.ErrorCode; 035import org.apache.solr.common.util.NamedList; 036import org.apache.solr.request.SolrQueryRequest; 037import org.apache.solr.response.SolrQueryResponse; 038import org.apache.solr.update.processor.UpdateRequestProcessor; 039import org.apache.solr.update.processor.UpdateRequestProcessorFactory; 040 041/** 042 * An updated processor that applies a configured regex to any 043 * CharSequence values found in the source field, replaces 044 * any matches with the configured replacement string, and writes 045 * the resulting string to the target field. 046 * 047 * <p> 048 * For example, with the configuration listed below, the sequence in field <code>path</code> 049 * will be matched against the regex <code>(.*)_([a-z]{2}(?:_[A-Z]{2})?)((?:\.[^\.]*)?)$</code>, 050 * where matched parts will be replaced by <code>$1$3</code>, i.e., the first and third group of the match. 051 * The resulting sequence will be written to <code>path_remove_locale</code>. 052 * </p> 053 * 054 * <pre class="prettyprint"> 055 * <processor class="org.opencms.search.solr.updateprocessors.CmsSolrCopyModifiedUpateProcessorFactory"> 056 * <str name="source">path</str> 057 * <str name="target">path_remove_locale</str> 058 * <str name="regex">(.*)_([a-z]{2}(?:_[A-Z]{2})?)((?:\.[^\.]*)?)$</str> 059 * <str name="replacement">$1$3</str> 060 * </processor></pre> 061 * 062 * <p> 063 * If, e.g., a document with value "document_de.txt" in field <code>source</code> is processed, the field 064 * <code>path_remove_locale</code> with value "document.txt will be added. 065 * </p> 066 * 067 * <p> 068 * To add the update processor to your installation, define an update processor chain as in the following example. 069 * </p> 070 * 071 * <pre class="prettyprint"> 072 * <updateRequestProcessorChain name="mychain" default="true"> 073 * <processor class="org.opencms.search.solr.updateprocessors.CmsSolrCopyModifiedUpateProcessorFactory"> 074 * <str name="source">path</str> 075 * <str name="target">path_remove_locale</str> 076 * <str name="regex">(.*)_([a-z]{2}(?:_[A-Z]{2})?)((?:\.[^\.]*)?)$</str> 077 * <str name="replacement">$1$3</str> 078 * </processor> 079 * <processor class="solr.LogUpdateProcessorFactory" /> 080 * <processor class="solr.RunUpdateProcessorFactory" /> 081 * </updateRequestProcessorChain></pre> 082 * 083 * @see org.apache.solr.update.processor.UpdateRequestProcessorChain 084 * 085 * @see java.util.regex.Pattern 086 */ 087public class CmsSolrCopyModifiedUpateProcessorFactory extends UpdateRequestProcessorFactory { 088 089 /** Name of the parameter, the regex is provided. */ 090 private static final String PARAM_REGEX = "regex"; 091 /** Name of the parameter, the replacement string is provided. */ 092 private static final String PARAM_REPLACEMENT = "replacement"; 093 /** Name of the parameter, the source field is provided. */ 094 private static final String PARAM_SOURCE = "source"; 095 /** Name of the parameter, the target field is provided. */ 096 private static final String PARAM_TARGET = "target"; 097 098 /** The pattern to match the source against. */ 099 private Pattern m_regex; 100 /** The replacement string for matches. */ 101 private String m_replacement; 102 /** The field, the value that is matched against is read from. */ 103 private String m_source; 104 /** The field, the modified value is written to. */ 105 private String m_target; 106 107 /** 108 * @see org.apache.solr.update.processor.UpdateRequestProcessorFactory#getInstance(org.apache.solr.request.SolrQueryRequest, org.apache.solr.response.SolrQueryResponse, org.apache.solr.update.processor.UpdateRequestProcessor) 109 */ 110 @Override 111 public UpdateRequestProcessor getInstance( 112 SolrQueryRequest req, 113 SolrQueryResponse rsp, 114 UpdateRequestProcessor next) { 115 116 return new CmsSolrCopyModifiedUpateProcessor(m_source, m_target, m_regex, m_replacement, next); 117 } 118 119 /** 120 * Read the parameters on initialization. 121 * 122 * @see org.apache.solr.update.processor.UpdateRequestProcessorFactory#init(org.apache.solr.common.util.NamedList) 123 */ 124 @Override 125 public void init(NamedList args) { 126 127 Object regex = args.remove(PARAM_REGEX); 128 if (null == regex) { 129 throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_REGEX); 130 } 131 try { 132 m_regex = Pattern.compile(regex.toString()); 133 } catch (PatternSyntaxException e) { 134 throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid regex: " + regex, e); 135 } 136 137 Object replacement = args.remove(PARAM_REPLACEMENT); 138 if (null == replacement) { 139 throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_REPLACEMENT); 140 } 141 m_replacement = replacement.toString(); 142 143 Object source = args.remove(PARAM_SOURCE); 144 if (null == source) { 145 throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_SOURCE); 146 } 147 m_source = source.toString(); 148 149 Object target = args.remove(PARAM_TARGET); 150 if (null == target) { 151 throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_TARGET); 152 } 153 m_target = target.toString(); 154 155 } 156 157}