001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import org.opencms.main.CmsLog;
031
032import java.util.Random;
033
034import org.apache.commons.logging.Log;
035import org.apache.oro.text.PatternCache;
036import org.apache.oro.text.PatternCacheFIFO;
037import org.apache.oro.text.perl.MalformedPerl5PatternException;
038import org.apache.oro.text.perl.Perl5Util;
039import org.apache.oro.text.regex.MalformedPatternException;
040
041/**
042 * Provides a resource name / path translation facility.<p>
043 *
044 * This facility is used for translating new file names that contain
045 * illegal chars to legal names. This feature is most useful (and currently
046 * only used) for uploaded files. It is also applied to uploded ZIP directories
047 * that are extracted after upload.
048 * The rules that are used for resource name translation are available from
049 * {@link org.opencms.file.CmsRequestContext#getFileTranslator()}.<p>
050 *
051 * Optionally, resource name translation is also applied to all files read
052 * from the VFS, so it can be used for accessing files out of teir usual context.
053 * This feature is called directoy translation, and the configured directory
054 * translations are available from {@link org.opencms.file.CmsRequestContext#getDirectoryTranslator()}.<p>
055 *
056 * Directory translation was originally required for backward compatibility
057 * to the directory layout before OpenCms 5.0 beta 2. In a modern installation,
058 * directory translation is usually disabled.<p>
059 *
060 * The translations can be configured in <code>opencms-vfs.xml</code>
061 * in the <code>opencms\vfs\resources\translations</code> node.<p>
062 *
063 * The default file name translation setting is:<br>
064 * <pre>
065 * &lt;filetranslations enabled="true"&gt;
066 *    &lt;translation&gt;s#[\s]+#_#g&lt;/translation&gt;
067 *    &lt;translation&gt;s#\\#/#g&lt;/translation&gt;
068 *    &lt;translation&gt;s#&auml;#ae#g&lt;/translation&gt;
069 *    &lt;translation&gt;s#&Auml;#Ae#g&lt;/translation&gt;
070 *    &lt;translation&gt;s#&ouml;#oe#g&lt;/translation&gt;
071 *    &lt;translation&gt;s#&Ouml;#Oe#g&lt;/translation&gt;
072 *    &lt;translation&gt;s#&uuml;#ue#g&lt;/translation&gt;
073 *    &lt;translation&gt;s#&Uuml;#Ue#g&lt;/translation&gt;
074 *    &lt;translation&gt;s#&szlig;#ss#g&lt;/translation&gt;
075 *    &lt;translation&gt;s#[^0-9a-zA-Z_$~\.\-\/]#!#g&lt;/translation&gt;
076 *    &lt;translation&gt;s#!+#x#g&lt;/translation&gt;
077 * &lt;/filetranslations&gt;
078 * </pre><p>
079 *
080 * Directory translation is now usually not required and since disabled by default.
081 * The directory translation setting to convert an OpenCms 5.0 to 6.0 VFS is:<br>
082 * <pre>
083 * &lt;foldertranslations enabled="true"&gt;
084 *    &lt;translation&gt;s#/content/bodys/(.*)#/system/bodies/$1#&lt;/translation&gt;
085 *    &lt;translation&gt;s#/pics/system/(.*)#/system/workplace/resources/$1#&lt;/translation&gt;
086 *    &lt;translation&gt;s#/pics/(.*)#/system/galleries/pics/$1#&lt;/translation&gt;
087 *    &lt;translation&gt;s#/download/(.*)#/system/galleries/download/$1#&lt;/translation&gt;
088 *    &lt;translation&gt;s#/externallinks/(.*)#/system/galleries/externallinks/$1#&lt;/translation&gt;
089 *    &lt;translation&gt;s#/htmlgalleries/(.*)#/system/galleries/htmlgalleries/$1#&lt;/translation&gt;
090 *    &lt;translation&gt;s#/content/(.*)#/system/$1#&lt;/translation&gt;
091 * &lt;/foldertranslations&gt;
092 * </pre><p>
093 *
094 * @since 6.0.0
095 */
096public class CmsResourceTranslator {
097
098    /** The log object for this class. */
099    private static final Log LOG = CmsLog.getLog(CmsResourceTranslator.class);
100
101    /** Keep an array of Perl5Util to randomly select for use, because using just a single one causes contention problems under load (since they're synchronized). */
102    private Perl5Util[] m_perl5Utils = new Perl5Util[32];
103
104    /** Flag to indicate if one or more matchings should be tried. */
105    private boolean m_continueMatching;
106
107    /** Internal array containing the translations from opencms.properties. */
108    private String[] m_translations;
109
110    /** Random number generator for randomly choosing Perl5Utils. */
111    private Random m_random = new Random(42l);
112
113    /**
114     * Constructor for the CmsResourceTranslator.
115     *
116     * @param translations The array of translations read from the
117     *      opencms,properties
118     * @param continueMatching if <code>true</code>, matching will continue after
119     *      the first match was found
120     */
121    public CmsResourceTranslator(String[] translations, boolean continueMatching) {
122
123        super();
124        m_translations = translations;
125        m_continueMatching = continueMatching;
126        for (int i = 0; i < m_perl5Utils.length; i++) {
127            m_perl5Utils[i] = new Perl5Util(buildPatternCache());
128        }
129        if (LOG.isInfoEnabled()) {
130            LOG.info(
131                Messages.get().getBundle().key(
132                    Messages.LOG_NUM_TRANSLATION_RULES_INITIALIZED_1,
133                    Integer.valueOf(translations.length)));
134        }
135    }
136
137    /**
138     * Returns a copy of the initialized translation rules.<p>
139     *
140     * @return String[] a copy of the initialized translation rules
141     */
142    public String[] getTranslations() {
143
144        String[] copy = new String[m_translations.length];
145        System.arraycopy(m_translations, 0, copy, 0, m_translations.length);
146        return copy;
147    }
148
149    /**
150     * Translate a resource name according to the expressions set in
151     * <code>opencms-vfs.xml</code>. If no match is found,
152     * the resource name is returned unchanged.<p>
153     *
154     * @param resourceName The resource name to translate
155     * @return The translated name of the resource
156     */
157    public String translateResource(String resourceName) {
158
159        if (m_translations.length == 0) {
160            // no translations defined
161            return resourceName;
162        }
163        if (resourceName == null) {
164            return null;
165        }
166
167        StringBuffer result;
168        String current = resourceName;
169        int size = current.length() * 2;
170
171        Perl5Util perl5Util = getPerl5Util();
172        for (int i = 0; i < m_translations.length; i++) {
173            result = new StringBuffer(size);
174            try {
175                if (perl5Util.substitute(result, m_translations[i], current) != 0) {
176
177                    if (m_continueMatching) {
178                        // continue matching
179                        current = result.toString();
180                    } else {
181                        // first pattern matched, return the result
182                        if (LOG.isDebugEnabled()) {
183                            LOG.debug(
184                                Messages.get().getBundle().key(
185                                    Messages.LOG_TRANSLATION_MATCH_3,
186                                    Integer.valueOf(i),
187                                    resourceName,
188                                    result));
189                        }
190                        // Return first match result
191                        return result.toString();
192                    }
193                }
194            } catch (MalformedPerl5PatternException e) {
195                LOG.error(
196                    Messages.get().getBundle().key(Messages.LOG_MALFORMED_TRANSLATION_RULE_1, m_translations[i]),
197                    e);
198            }
199        }
200
201        // the pattern matched, return the result
202        if (LOG.isDebugEnabled()) {
203            LOG.debug(Messages.get().getBundle().key(Messages.LOG_TRANSLATION_MATCH_2, resourceName, current));
204        }
205        // return last translation (or original if no matching translation found)
206        return current;
207    }
208
209    /**
210     * Builds a pattern cache from the stored list of substitutions.
211     *
212     * @return the pattern cache
213     */
214    private PatternCache buildPatternCache() {
215
216        PatternCacheFIFO cache = new PatternCacheFIFO(m_translations.length + 1);
217        for (int i = 0; i < m_translations.length; i++) {
218            try {
219                cache.addPattern(m_translations[i]);
220            } catch (MalformedPatternException e) {
221                LOG.error(
222                    Messages.get().getBundle().key(Messages.LOG_MALFORMED_TRANSLATION_RULE_1, m_translations[i]),
223                    e);
224            }
225        }
226        return cache;
227    }
228
229    /**
230     * Gets the perl5util instance for the current thread.
231     *
232     * @return the perl5util instance for the current thread
233     */
234    private Perl5Util getPerl5Util() {
235
236        int index = m_random.nextInt(m_perl5Utils.length);
237        return m_perl5Utils[index];
238    }
239}