001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (C) Alkacon Software (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import org.opencms.cache.CmsVfsMemoryObjectCache;
031import org.opencms.file.CmsFile;
032import org.opencms.file.CmsObject;
033import org.opencms.i18n.CmsEncoder;
034import org.opencms.main.CmsException;
035import org.opencms.main.CmsLog;
036
037import java.io.ByteArrayInputStream;
038import java.io.InputStream;
039import java.util.Collection;
040import java.util.HashSet;
041import java.util.Set;
042
043import org.apache.commons.logging.Log;
044
045import org.owasp.validator.html.AntiSamy;
046import org.owasp.validator.html.CleanResults;
047import org.owasp.validator.html.Policy;
048import org.owasp.validator.html.PolicyException;
049import org.owasp.validator.html.ScanException;
050
051/**
052 * This class is responsible for automatically escaping parameters in Flex requests. It keeps track
053 * of which parameters to escape (or not escape), and which parameters need to be processed by AntiSamy.<p>
054 */
055public class CmsParameterEscaper {
056
057    /** The file name of the default policy. */
058    public static final String DEFAULT_POLICY = "antisamy-opencms.xml";
059
060    /** The default policy, which is used when no policy path is given. */
061    protected static Policy defaultPolicy;
062
063    /** The logger instance for this class. */
064    private static final Log LOG = CmsLog.getLog(CmsParameterEscaper.class);
065
066    /** The AntiSamy instance for cleaning HTML. */
067    private AntiSamy m_antiSamy;
068
069    /** The names of parameters which need to be HTML-cleaned. */
070    private Set<String> m_cleanHtml = new HashSet<String>();
071
072    /** The dummy value to replace invalid values with (if this is set, it replaces XML escaping). */
073    private String m_dummyValue;
074
075    /** Set of parameter names which should still replaced even if the dummy value is set. */
076    private Set<String> m_escapeInvalid = new HashSet<>();
077
078    /** The names of parameters which shouldn't be escaped. */
079    private Set<String> m_exceptions = new HashSet<String>();
080
081    static {
082        try {
083            // Don't hardcode the resource path, use the package of this class as the location
084            String packageName = CmsParameterEscaper.class.getPackage().getName();
085            String resourceName = packageName.replace(".", "/") + "/" + DEFAULT_POLICY;
086            InputStream stream = CmsParameterEscaper.class.getClassLoader().getResourceAsStream(resourceName);
087            Policy policy = Policy.getInstance(stream);
088            defaultPolicy = policy;
089        } catch (PolicyException e) {
090            LOG.error(e.getLocalizedMessage(), e);
091        }
092    }
093
094    /**
095     * Helper method for reading an AntiSamy policy file from the VFS.<p>
096     *
097     * @param cms the current CMS context
098     * @param sitePath the site path of the policy file
099     *
100     * @return the policy object for the given path
101     */
102    public static Policy readPolicy(CmsObject cms, String sitePath) {
103
104        try {
105            CmsFile policyFile = cms.readFile(sitePath);
106            ByteArrayInputStream input = new ByteArrayInputStream(policyFile.getContents());
107
108            // we use the deprecated method here because it is the only way to load
109            // a policy directly from the VFS.
110            Policy policy = Policy.getInstance(input);
111            return policy;
112        } catch (CmsException e) {
113            LOG.error("Could not read Antisamy policy file");
114            LOG.error(e.getLocalizedMessage(), e);
115            return null;
116        } catch (PolicyException e) {
117            LOG.error("Invalid Antisamy policy read from " + sitePath);
118            LOG.error(e.getLocalizedMessage(), e);
119            return null;
120        }
121    }
122
123    /**
124     * Creates a new AntiSamy instance for a given policy path.<p>
125     *
126     * @param cms the current CMS context
127     * @param policyPath the policy site path
128     *
129     * @return the new AntiSamy instance
130     */
131    public AntiSamy createAntiSamy(CmsObject cms, String policyPath) {
132
133        String rootPath = cms.addSiteRoot(policyPath);
134        Policy policy = null;
135        if (policyPath != null) {
136            Object cacheValue = CmsVfsMemoryObjectCache.getVfsMemoryObjectCache().getCachedObject(cms, rootPath);
137            if (cacheValue == null) {
138                policy = readPolicy(cms, policyPath);
139                if (policy != null) {
140                    CmsVfsMemoryObjectCache.getVfsMemoryObjectCache().putCachedObject(cms, rootPath, policy);
141                }
142            } else {
143                policy = (Policy)cacheValue;
144            }
145        }
146        if (policy == null) {
147            policy = defaultPolicy;
148        }
149        if (policy != null) {
150            return new AntiSamy(policy);
151        }
152        return null;
153    }
154
155    /**
156     * Enables the AntiSamy HTML cleaning for some parameters.<p>
157     *
158     * @param cms the current CMS context
159     * @param policyPath the policy site path in the VFS
160     * @param params the parameters for which HTML cleaning should be  enabled
161     */
162    public void enableAntiSamy(CmsObject cms, String policyPath, Set<String> params) {
163
164        m_antiSamy = createAntiSamy(cms, policyPath);
165        m_cleanHtml = params;
166    }
167
168    /**
169     * Escapes a single parameter value.<p>
170     *
171     * @param name the name of the parameter
172     * @param value the value of the parameter
173     *
174     * @return the escaped parameter value
175     */
176    public String escape(String name, String value) {
177
178        if (value == null) {
179            return null;
180        }
181        if (m_exceptions.contains(name)) {
182            return value;
183        }
184        LOG.info("Escaping parameter '" + name + "' with value '" + value + "'");
185        if (m_cleanHtml.contains(name)) {
186            return filterAntiSamy(name, value);
187        }
188        return escapeSimple(name, value);
189    }
190
191    /**
192     * Escapes an array of parameter values.<p>
193     *
194     * @param name the parameter name
195     * @param values the parameter values
196     *
197     * @return the escaped parameter values
198     */
199    public String[] escape(String name, String[] values) {
200
201        if (values == null) {
202            return null;
203        }
204        if (m_exceptions.contains(name)) {
205            return values;
206        }
207        boolean cleanHtml = m_cleanHtml.contains(name);
208        String[] result = new String[values.length];
209        for (int i = 0; i < values.length; i++) {
210            if (cleanHtml) {
211                result[i] = filterAntiSamy(name, values[i]);
212            } else {
213                result[i] = escapeSimple(name, values[i]);
214            }
215        }
216        return result;
217    }
218
219    /**
220     * Filters HTML input using the internal AntiSamy instance.<p>
221     *
222     * @param name the parameter name
223     * @param html the HTML to filter
224     *
225     * @return the filtered HTML
226     */
227    public String filterAntiSamy(String name, String html) {
228
229        if (m_antiSamy == null) {
230            LOG.warn("Antisamy policy invalid, using simple escaping as a fallback");
231            return escapeSimple(name, html);
232        }
233        try {
234            CleanResults results = m_antiSamy.scan(html);
235            if (results.getNumberOfErrors() > 0) {
236                LOG.info("Antisamy error messages:");
237                for (Object message : results.getErrorMessages()) {
238                    LOG.info(message);
239                }
240            }
241            return results.getCleanHTML();
242        } catch (PolicyException e) {
243            LOG.error(e.getLocalizedMessage(), e);
244            return escapeSimple(name, html);
245        } catch (ScanException e) {
246            LOG.error(e.getLocalizedMessage(), e);
247            return escapeSimple(name, html);
248        }
249    }
250
251    /**
252     * Sets the dummy value.<p>
253     *
254     * If the dummy value is set, then values which would otherwise be XML-escaped will be replaced with the dummy value instead.
255     *
256     * @param dummyValue the new value
257     */
258    public void setDummyValue(String dummyValue) {
259
260        m_dummyValue = dummyValue;
261    }
262
263    /**
264     * Sets the parameters which should be escaped even if the dummy value is set.
265     *
266     * @param escapeInvalidList the collection of parameters which should be escaped even if the dummy value is set
267     */
268    public void setEscapeInvalid(Collection<String> escapeInvalidList) {
269
270        m_escapeInvalid = new HashSet<>(escapeInvalidList);
271    }
272
273    /**
274     * Sets the set of names of parameters which shouldn't be escaped.<p>
275     *
276     * @param exceptions a set of parameter names
277     */
278    public void setExceptions(Collection<String> exceptions) {
279
280        m_exceptions = new HashSet<String>(exceptions);
281    }
282
283    /**
284     * Default escape function that doesn't do HTML filtering.
285     * @param name the parameter name
286     * @param value the parameter value
287     *
288     * @return the escaped value
289     */
290    protected String escapeSimple(String name, String value) {
291
292        String result = CmsEncoder.escapeXml(value);
293        if ((m_dummyValue != null) && !result.equals(value) && !m_escapeInvalid.contains(name)) {
294            return name + "_" + m_dummyValue;
295        } else {
296            return result;
297        }
298    }
299
300}