001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.search.solr.updateprocessors;
029
030import java.util.regex.Pattern;
031import java.util.regex.PatternSyntaxException;
032
033import org.apache.solr.common.SolrException;
034import org.apache.solr.common.SolrException.ErrorCode;
035import org.apache.solr.common.util.NamedList;
036import org.apache.solr.request.SolrQueryRequest;
037import org.apache.solr.response.SolrQueryResponse;
038import org.apache.solr.update.processor.UpdateRequestProcessor;
039import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
040
041/**
042 * An updated processor that applies a configured regex to any
043 * CharSequence values found in the source field, replaces
044 * any matches with the configured replacement string, and writes
045 * the resulting string to the target field.
046 *
047 * <p>
048 * For example, with the configuration listed below, the sequence in field <code>path</code>
049 * will be matched against the regex <code>(.*)_([a-z]{2}(?:_[A-Z]{2})?)((?:\.[^\.]*)?)$</code>,
050 * where matched parts will be replaced by <code>$1$3</code>, i.e., the first and third group of the match.
051 * The resulting sequence will be written to <code>path_remove_locale</code>.
052 * </p>
053 *
054 * <pre class="prettyprint">
055 * &lt;processor class="org.opencms.search.solr.updateprocessors.CmsSolrCopyModifiedUpateProcessorFactory"&gt;
056 *   &lt;str name="source"&gt;path&lt;/str&gt;
057 *   &lt;str name="target"&gt;path_remove_locale&lt;/str&gt;
058 *   &lt;str name="regex"&gt;(.*)_([a-z]{2}(?:_[A-Z]{2})?)((?:\.[^\.]*)?)$&lt;/str&gt;
059 *   &lt;str name="replacement"&gt;$1$3&lt;/str&gt;
060 * &lt;/processor&gt;</pre>
061 *
062 * <p>
063 * If, e.g., a document with value "document_de.txt" in field <code>source</code> is processed, the field
064 * <code>path_remove_locale</code> with value "document.txt will be added.
065 * </p>
066 *
067 * <p>
068 * To add the update processor to your installation, define an update processor chain as in the following example.
069 * </p>
070 *
071 * <pre class="prettyprint">
072 * &lt;updateRequestProcessorChain name="mychain" default="true"&gt;
073 *   &lt;processor class="org.opencms.search.solr.updateprocessors.CmsSolrCopyModifiedUpateProcessorFactory"&gt;
074 *     &lt;str name="source"&gt;path&lt;/str&gt;
075 *     &lt;str name="target"&gt;path_remove_locale&lt;/str&gt;
076 *     &lt;str name="regex"&gt;(.*)_([a-z]{2}(?:_[A-Z]{2})?)((?:\.[^\.]*)?)$&lt;/str&gt;
077 *     &lt;str name="replacement"&gt;$1$3&lt;/str&gt;
078 *   &lt;/processor&gt;
079 *   &lt;processor class="solr.LogUpdateProcessorFactory" /&gt;
080 *   &lt;processor class="solr.RunUpdateProcessorFactory" /&gt;
081 * &lt;/updateRequestProcessorChain&gt;</pre>
082 *
083 * @see org.apache.solr.update.processor.UpdateRequestProcessorChain
084 *
085 * @see java.util.regex.Pattern
086 */
087public class CmsSolrCopyModifiedUpateProcessorFactory extends UpdateRequestProcessorFactory {
088
089    /** Name of the parameter, the regex is provided. */
090    private static final String PARAM_REGEX = "regex";
091    /** Name of the parameter, the replacement string is provided. */
092    private static final String PARAM_REPLACEMENT = "replacement";
093    /** Name of the parameter, the source field is provided. */
094    private static final String PARAM_SOURCE = "source";
095    /** Name of the parameter, the target field is provided. */
096    private static final String PARAM_TARGET = "target";
097
098    /** The pattern to match the source against. */
099    private Pattern m_regex;
100    /** The replacement string for matches. */
101    private String m_replacement;
102    /** The field, the value that is matched against is read from. */
103    private String m_source;
104    /** The field, the modified value is written to. */
105    private String m_target;
106
107    /**
108     * @see org.apache.solr.update.processor.UpdateRequestProcessorFactory#getInstance(org.apache.solr.request.SolrQueryRequest, org.apache.solr.response.SolrQueryResponse, org.apache.solr.update.processor.UpdateRequestProcessor)
109     */
110    @Override
111    public UpdateRequestProcessor getInstance(
112        SolrQueryRequest req,
113        SolrQueryResponse rsp,
114        UpdateRequestProcessor next) {
115
116        return new CmsSolrCopyModifiedUpateProcessor(m_source, m_target, m_regex, m_replacement, next);
117    }
118
119    /**
120     * Read the parameters on initialization.
121     *
122     * @see org.apache.solr.update.processor.UpdateRequestProcessorFactory#init(org.apache.solr.common.util.NamedList)
123     */
124    @Override
125    public void init(NamedList args) {
126
127        Object regex = args.remove(PARAM_REGEX);
128        if (null == regex) {
129            throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_REGEX);
130        }
131        try {
132            m_regex = Pattern.compile(regex.toString());
133        } catch (PatternSyntaxException e) {
134            throw new SolrException(ErrorCode.SERVER_ERROR, "Invalid regex: " + regex, e);
135        }
136
137        Object replacement = args.remove(PARAM_REPLACEMENT);
138        if (null == replacement) {
139            throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_REPLACEMENT);
140        }
141        m_replacement = replacement.toString();
142
143        Object source = args.remove(PARAM_SOURCE);
144        if (null == source) {
145            throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_SOURCE);
146        }
147        m_source = source.toString();
148
149        Object target = args.remove(PARAM_TARGET);
150        if (null == target) {
151            throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PARAM_TARGET);
152        }
153        m_target = target.toString();
154
155    }
156
157}