001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (https://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: https://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: https://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.i18n;
029
030import org.opencms.json.JSONArray;
031import org.opencms.json.JSONException;
032import org.opencms.main.CmsLog;
033import org.opencms.main.OpenCms;
034import org.opencms.util.CmsStringUtil;
035
036import java.io.UnsupportedEncodingException;
037import java.net.IDN;
038import java.net.URI;
039import java.net.URISyntaxException;
040import java.net.URLDecoder;
041import java.net.URLEncoder;
042import java.nio.CharBuffer;
043import java.nio.charset.Charset;
044import java.nio.charset.CharsetEncoder;
045import java.util.HashMap;
046import java.util.List;
047import java.util.Map;
048import java.util.Random;
049import java.util.regex.Matcher;
050import java.util.regex.Pattern;
051
052import org.apache.commons.codec.binary.Base64;
053import org.apache.commons.lang3.StringUtils;
054import org.apache.commons.logging.Log;
055import org.apache.http.client.utils.URIBuilder;
056
057import com.google.common.base.Strings;
058import com.google.common.collect.Lists;
059
060/**
061 * The OpenCms CmsEncoder class provides static methods to decode and encode data.<p>
062 *
063 * The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and
064 * <code>java.net.URLDecoder.decode()</code>. Use the methods from this class in all OpenCms
065 * core classes to ensure the encoding is always handled the same way.<p>
066 *
067 * The de- and encoding uses the same coding mechanism as JavaScript, special characters are
068 * replaced with <code>%hex</code> where hex is a two digit hex number.<p>
069 *
070 * <b>Note:</b> On the client side (browser) instead of using the deprecated <code>escape</code>
071 * and <code>unescape</code> JavaScript functions, always the use <code>encodeURIComponent</code> and
072 * <code>decodeURIComponent</code> functions. Only these work properly with unicode characters.<p>
073 *
074 * @since 6.0.0
075 */
076public final class CmsEncoder {
077
078    /** Non-alphanumeric characters used for Base64 encoding. */
079    public static final String BASE64_EXTRA = "+/=";
080
081    /** Characters used as replacements for non-alphanumeric Base64 characters when using Base64 for request parameters. */
082    public static final String BASE64_EXTRA_REPLACEMENTS = "-_.";
083
084    /** Constant for the standard <code>ISO-8859-1</code> encoding. */
085    public static final String ENCODING_ISO_8859_1 = "ISO-8859-1";
086
087    /** Constant for the standard <code>US-ASCII</code> encoding. */
088    public static final String ENCODING_US_ASCII = "US-ASCII";
089
090    /**
091     * Constant for the standard <code>UTF-8</code> encoding.<p>
092     *
093     * Default encoding for JavaScript decodeUriComponent methods is <code>UTF-8</code> by w3c standard.
094     */
095    public static final String ENCODING_UTF_8 = "UTF-8";
096
097    /** The regex pattern to match HTML entities. */
098    private static final Pattern ENTITIY_PATTERN = Pattern.compile("\\&#(\\d+);");
099
100    /** The prefix for HTML entities. */
101    private static final String ENTITY_PREFIX = "&#";
102
103    /** The replacement for HTML entity prefix in parameters. */
104    private static final String ENTITY_REPLACEMENT = "$$";
105
106    /** The log object for this class. */
107    private static final Log LOG = CmsLog.getLog(CmsEncoder.class);
108
109    /** A cache for encoding name lookup. */
110    private static Map<String, String> m_encodingCache = new HashMap<String, String>(16);
111
112    private static Random m_random = new Random();
113
114    /** The plus entity. */
115    private static final String PLUS_ENTITY = ENTITY_PREFIX + "043;";
116
117    /** Pattern for decomposing the authority section of an URI. */
118    public static final Pattern AUTHORITY_PATTERN = Pattern.compile("^(.*?@)?(.*)(:[0-9]+)?$");
119
120    /**
121     * Constructor.<p>
122     */
123    private CmsEncoder() {
124
125        // empty
126    }
127
128    /**
129     * Adjusts the given String by making sure all characters that can be displayed
130     * in the given charset are contained as chars, whereas all other non-displayable
131     * characters are converted to HTML entities.<p>
132     *
133     * Just calls {@link #decodeHtmlEntities(String)} first and feeds the result
134     * to {@link #encodeHtmlEntities(String, String)}. <p>
135     *
136     * @param input the input to adjust the HTML encoding for
137     * @param encoding the charset to encode the result with\
138     *
139     * @return the input with the decoded/encoded HTML entities
140     */
141    public static String adjustHtmlEncoding(String input, String encoding) {
142
143        return encodeHtmlEntities(decodeHtmlEntities(input), encoding);
144    }
145
146    /**
147     * Changes the encoding of a byte array that represents a String.<p>
148     *
149     * @param input the byte array to convert
150     * @param oldEncoding the current encoding of the byte array
151     * @param newEncoding the new encoding of the byte array
152     *
153     * @return the byte array encoded in the new encoding
154     */
155    public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) {
156
157        if ((oldEncoding == null) || (newEncoding == null)) {
158            return input;
159        }
160        if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) {
161            return input;
162        }
163        byte[] result = input;
164        try {
165            result = (new String(input, oldEncoding)).getBytes(newEncoding);
166        } catch (UnsupportedEncodingException e) {
167            // return value will be input value
168        }
169        return result;
170    }
171
172    /**
173     * Converts the host of an URI to Punycode.<p>
174     *
175     * This is needed when we want to do redirects to hosts with host names containing international characters like umlauts.<p>
176     *
177     * @param uriString the URI
178     * @return the converted URI
179     */
180    public static String convertHostToPunycode(String uriString) {
181
182        if (uriString.indexOf(":") >= 0) {
183            try {
184                URI uri = new URI(uriString);
185                String authority = uri.getAuthority(); // getHost won't work when we have non-ASCII domain characters
186                Matcher matcher = AUTHORITY_PATTERN.matcher(authority);
187                if (matcher.matches()) {
188                    authority = Strings.nullToEmpty(matcher.group(1))
189                        + IDN.toASCII(matcher.group(2))
190                        + Strings.nullToEmpty(matcher.group(3));
191                }
192                URI uriWithCorrectedHost = new URI(uri.getScheme(), authority, null, null, null);
193                URIBuilder builder = new URIBuilder(uri);
194                builder.setHost(uriWithCorrectedHost.getHost());
195                builder.setPort(uriWithCorrectedHost.getPort());
196                builder.setUserInfo(uriWithCorrectedHost.getUserInfo());
197                uriString = builder.build().toASCIIString();
198            } catch (URISyntaxException e) {
199                LOG.error(e.getLocalizedMessage(), e);
200            }
201        }
202        return uriString;
203    }
204
205    /**
206     * Creates a String out of a byte array with the specified encoding, falling back
207     * to the system default in case the encoding name is not valid.<p>
208     *
209     * Use this method as a replacement for <code>new String(byte[], encoding)</code>
210     * to avoid possible encoding problems.<p>
211     *
212     * @param bytes the bytes to decode
213     * @param encoding the encoding scheme to use for decoding the bytes
214     *
215     * @return the bytes decoded to a String
216     */
217    public static String createString(byte[] bytes, String encoding) {
218
219        String enc = encoding.intern();
220        if (enc != OpenCms.getSystemInfo().getDefaultEncoding()) {
221            enc = lookupEncoding(enc, null);
222        }
223        if (enc != null) {
224            try {
225                return new String(bytes, enc);
226            } catch (UnsupportedEncodingException e) {
227                // this can _never_ happen since the charset was looked up first
228            }
229        } else {
230            if (LOG.isWarnEnabled()) {
231                LOG.warn(Messages.get().getBundle().key(Messages.ERR_UNSUPPORTED_VM_ENCODING_1, encoding));
232            }
233            enc = OpenCms.getSystemInfo().getDefaultEncoding();
234            try {
235                return new String(bytes, enc);
236            } catch (UnsupportedEncodingException e) {
237                // this can also _never_ happen since the default encoding is always valid
238            }
239        }
240        // this code is unreachable in practice
241        LOG.error(Messages.get().getBundle().key(Messages.ERR_ENCODING_ISSUES_1, encoding));
242        return null;
243    }
244
245    /**
246     * Decodes a String using UTF-8 encoding, which is the standard for http data transmission
247     * with GET ant POST requests.<p>
248     *
249     * @param source the String to decode
250     *
251     * @return String the decoded source String
252     */
253    public static String decode(String source) {
254
255        return decode(source, ENCODING_UTF_8);
256    }
257
258    /**
259     * This method is a substitute for <code>URLDecoder.decode()</code>.
260     * Use this in all OpenCms core classes to ensure the encoding is
261     * always handled the same way.<p>
262     *
263     * In case you don't know what encoding to use, set the value of
264     * the <code>encoding</code> parameter to <code>null</code>.
265     * This method will then default to UTF-8 encoding, which is probably the right one.<p>
266     *
267     * @param source The string to decode
268     * @param encoding The encoding to use (if null, the system default is used)
269     *
270     * @return The decoded source String
271     */
272    public static String decode(String source, String encoding) {
273
274        if (source == null) {
275            return null;
276        }
277        if (encoding != null) {
278            try {
279                return URLDecoder.decode(source, encoding);
280            } catch (java.io.UnsupportedEncodingException e) {
281                // will fallback to default
282            }
283        }
284        // fallback to default decoding
285        try {
286            return URLDecoder.decode(source, ENCODING_UTF_8);
287        } catch (java.io.UnsupportedEncodingException e) {
288            // ignore
289        }
290        return source;
291    }
292
293    /**
294     * Decodes HTML entity references like <code>&amp;#8364;</code>.
295     *
296     * @param input the input to decode the HTML entities in
297     * @return the input with the decoded HTML entities
298     *
299     * @see #encodeHtmlEntities(String, String)
300     */
301    public static String decodeHtmlEntities(String input) {
302
303        Matcher matcher = ENTITIY_PATTERN.matcher(input);
304        StringBuffer result = new StringBuffer(input.length());
305        while (matcher.find()) {
306            String value = matcher.group(1);
307            int c = Integer.valueOf(value).intValue();
308            String replacement = new String(Character.toChars(c));
309            matcher.appendReplacement(result, replacement);
310        }
311        matcher.appendTail(result);
312        return result.toString();
313    }
314
315    /**
316     * Decodes HTML entity references like <code>&amp;#8364;</code> that are contained in the
317     * String to a regular character, but only if that character is contained in the given
318     * encodings charset.<p>
319     *
320     * @param input the input to decode the HTML entities in
321     * @param encoding the charset to decode the input for
322     * @return the input with the decoded HTML entities
323     *
324     * @see #encodeHtmlEntities(String, String)
325     */
326    @Deprecated
327    public static String decodeHtmlEntities(String input, String encoding) {
328
329        Matcher matcher = ENTITIY_PATTERN.matcher(input);
330        StringBuffer result = new StringBuffer(input.length());
331        Charset charset = Charset.forName(encoding);
332        CharsetEncoder encoder = charset.newEncoder();
333
334        while (matcher.find()) {
335            String entity = matcher.group();
336            String value = entity.substring(2, entity.length() - 1);
337            int c = Integer.valueOf(value).intValue();
338
339            if (c < 128) {
340                // first 128 chars are contained in almost every charset
341                entity = new String(new char[] {(char)c});
342                // this is intended as performance improvement since
343                // the canEncode() operation appears quite CPU heavy
344            } else if (encoder.canEncode((char)c)) {
345                // encoder can encode this char
346                entity = new String(new char[] {(char)c});
347            }
348            matcher.appendReplacement(result, entity);
349        }
350        matcher.appendTail(result);
351        return result.toString();
352    }
353
354    /**
355     * Decodes a string used as parameter in an uri in a way independent of other encodings/decodings applied before.<p>
356     *
357     * @param input the encoded parameter string
358     *
359     * @return the decoded parameter string
360     *
361     * @see #encodeParameter(String)
362     */
363    public static String decodeParameter(String input) {
364
365        String result = CmsStringUtil.substitute(input, ENTITY_REPLACEMENT, ENTITY_PREFIX);
366        return CmsEncoder.decodeHtmlEntities(result, OpenCms.getSystemInfo().getDefaultEncoding());
367    }
368
369    /**
370     * Decodes a parameter which has been encoded from a string list using encodeStringsAsBase64Parameter.<p>
371     *
372     * @param data the data to decode
373     * @return the list of strings
374     */
375    public static List<String> decodeStringsFromBase64Parameter(String data) {
376
377        data = StringUtils.replaceChars(data, BASE64_EXTRA_REPLACEMENTS, BASE64_EXTRA);
378        byte[] bytes = deobfuscateBytes(Base64.decodeBase64(data));
379        try {
380            JSONArray json = new JSONArray(new String(bytes, "UTF-8"));
381            List<String> result = Lists.newArrayList();
382            for (int i = 0; i < json.length(); i++) {
383                result.add(json.getString(i));
384            }
385            return result;
386        } catch (UnsupportedEncodingException e) {
387            // TODO Auto-generated catch block
388            e.printStackTrace();
389        } catch (JSONException e) {
390            throw new IllegalArgumentException("Decoding failed: " + data, e);
391        }
392        return null;
393    }
394
395    /**
396     * Encodes a String using UTF-8 encoding, which is the standard for http data transmission
397     * with GET ant POST requests.<p>
398     *
399     * @param source the String to encode
400     *
401     * @return String the encoded source String
402     */
403    public static String encode(String source) {
404
405        return encode(source, ENCODING_UTF_8);
406    }
407
408    /**
409     * This method is a substitute for <code>URLEncoder.encode()</code>.
410     * Use this in all OpenCms core classes to ensure the encoding is
411     * always handled the same way.<p>
412     *
413     * In case you don't know what encoding to use, set the value of
414     * the <code>encoding</code> parameter to <code>null</code>.
415     * This method will then default to UTF-8 encoding, which is probably the right one.<p>
416     *
417     * @param source the String to encode
418     * @param encoding the encoding to use (if null, the system default is used)
419     *
420     * @return the encoded source String
421     */
422    public static String encode(String source, String encoding) {
423
424        if (source == null) {
425            return null;
426        }
427        if (encoding != null) {
428            try {
429                return URLEncoder.encode(source, encoding);
430            } catch (java.io.UnsupportedEncodingException e) {
431                // will fallback to default
432            }
433        }
434        // fallback to default encoding
435        try {
436            return URLEncoder.encode(source, ENCODING_UTF_8);
437        } catch (java.io.UnsupportedEncodingException e) {
438            // ignore
439        }
440        return source;
441    }
442
443    /**
444     * Encodes all characters that are contained in the String which can not displayed
445     * in the given encodings charset with HTML entity references
446     * like <code>&amp;#8364;</code>.<p>
447     *
448     * This is required since a Java String is
449     * internally always stored as Unicode, meaning it can contain almost every character, but
450     * the HTML charset used might not support all such characters.<p>
451     *
452     * @param input the input to encode for HTML
453     * @param encoding the charset to encode the result with
454     *
455     * @return the input with the encoded HTML entities
456     *
457     * @see #decodeHtmlEntities(String, String)
458     */
459    public static String encodeHtmlEntities(String input, String encoding) {
460
461        StringBuffer result = new StringBuffer(input.length() * 2);
462        Charset charset = Charset.forName(encoding);
463        CharsetEncoder encoder = charset.newEncoder();
464        input.codePoints().forEach(codepoint -> {
465            char[] charsForCodepoint = Character.toChars(codepoint);
466            boolean isSimple = (charsForCodepoint.length == 1) && (charsForCodepoint[0] < 128);
467            if (isSimple || encoder.canEncode(new String(charsForCodepoint))) {
468                result.append(charsForCodepoint);
469            } else {
470                result.append(ENTITY_PREFIX);
471                result.append(codepoint);
472                result.append(";");
473            }
474        });
475        return result.toString();
476    }
477
478    /**
479     * Encodes all characters that are contained in the String which can not displayed
480     * in the given encodings charset with Java escaping like <code>\u20ac</code>.<p>
481     *
482     * This can be used to escape values used in Java property files.<p>
483     *
484     * @param input the input to encode for Java
485     * @param encoding the charset to encode the result with
486     *
487     * @return the input with the encoded Java entities
488     */
489    public static String encodeJavaEntities(String input, String encoding) {
490
491        StringBuffer result = new StringBuffer(input.length() * 2);
492        CharBuffer buffer = CharBuffer.wrap(input.toCharArray());
493        Charset charset = Charset.forName(encoding);
494        CharsetEncoder encoder = charset.newEncoder();
495        for (int i = 0; i < buffer.length(); i++) {
496            int c = buffer.get(i);
497            if (c < 128) {
498                // first 128 chars are contained in almost every charset
499                result.append((char)c);
500                // this is intended as performance improvement since
501                // the canEncode() operation appears quite CPU heavy
502            } else if (encoder.canEncode((char)c)) {
503                // encoder can encode this char
504                result.append((char)c);
505            } else {
506                // append Java entity reference
507                result.append("\\u");
508                String hex = Integer.toHexString(c);
509                int pad = 4 - hex.length();
510                for (int p = 0; p < pad; p++) {
511                    result.append('0');
512                }
513                result.append(hex);
514            }
515        }
516        return result.toString();
517    }
518
519    /**
520     * Encodes a string used as parameter in an uri in a way independent of other encodings/decodings applied later.<p>
521     *
522     * Used to ensure that GET parameters are not wrecked by wrong or incompatible configuration settings.
523     * In order to ensure this, the String is first encoded with html entities for any character that cannot encoded
524     * in US-ASCII; additionally, the plus sign is also encoded to avoid problems with the white-space replacer.
525     * Finally, the entity prefix is replaced with characters not used as delimiters in urls.<p>
526     *
527     * @param input the parameter string
528     *
529     * @return the encoded parameter string
530     */
531    public static String encodeParameter(String input) {
532
533        String result = CmsEncoder.encodeHtmlEntities(input, CmsEncoder.ENCODING_US_ASCII);
534        result = CmsStringUtil.substitute(result, "+", PLUS_ENTITY);
535        return CmsStringUtil.substitute(result, ENTITY_PREFIX, ENTITY_REPLACEMENT);
536    }
537
538    /**
539     * Encode a list of strings as base64 data to be used in a request parameter.<p>
540     *
541     * @param strings the strings to encode
542     * @return the resulting base64 data
543     */
544    public static String encodeStringsAsBase64Parameter(List<String> strings) {
545
546        JSONArray array = new JSONArray();
547        for (String string : strings) {
548            array.put(string);
549        }
550        byte[] bytes;
551        try {
552            // use obfuscateBytes here to to make the output look more random
553            bytes = obfuscateBytes(array.toString().getBytes("UTF-8"));
554        } catch (UnsupportedEncodingException e) {
555            // should never happen
556            e.printStackTrace();
557            throw new RuntimeException(e);
558        }
559        String result = Base64.encodeBase64String(bytes);
560        result = StringUtils.replaceChars(result, BASE64_EXTRA, BASE64_EXTRA_REPLACEMENTS);
561        return result;
562    }
563
564    /**
565     * Encodes a String in a way similar to the JavaScript "encodeURIcomponent" function,
566     * using "UTF-8" for character encoding encoding.<p>
567     *
568     * JavaScript "decodeURIcomponent" can decode Strings that have been encoded using this method.<p>
569     *
570     * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p>
571     *
572     * @param source The text to be encoded
573     *
574     * @return The encoded string
575     *
576     * @see #escape(String, String)
577     */
578    public static String escape(String source) {
579
580        return escape(source, ENCODING_UTF_8);
581    }
582
583    /**
584     * Encodes a String in a way similar to the JavaScript "encodeURIcomponent" function.<p>
585     *
586     * JavaScript "decodeURIcomponent" can decode Strings that have been encoded using this method,
587     * provided "UTF-8" has been used as encoding.<p>
588     *
589     * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p>
590     *
591     * @param source The text to be encoded
592     * @param encoding the encoding type
593     *
594     * @return The encoded string
595     */
596    public static String escape(String source, String encoding) {
597
598        // the blank is encoded into "+" not "%20" when using standard encode call
599        return CmsStringUtil.substitute(encode(source, encoding), "+", "%20");
600    }
601
602    /**
603     * Escapes special characters in a HTML-String with their number-based
604     * entity representation, for example &amp; becomes &amp;#38;.<p>
605     *
606     * A character <code>num</code> is replaced if<br>
607     * <code>((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p>
608     *
609     * @param source the String to escape
610     *
611     * @return String the escaped String
612     *
613     * @see #escapeXml(String)
614     */
615    public static String escapeHtml(String source) {
616
617        if (source == null) {
618            return null;
619        }
620        StringBuffer result = new StringBuffer(source.length() * 2);
621        for (int i = 0; i < source.length(); i++) {
622            int ch = source.charAt(i);
623            // avoid escaping already escaped characters
624            if (ch == 38) {
625                int terminatorIndex = source.indexOf(";", i);
626                if (terminatorIndex > 0) {
627                    if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) {
628                        result.append(source.substring(i, terminatorIndex + 1));
629                        // Skip remaining chars up to (and including) ";"
630                        i = terminatorIndex;
631                        continue;
632                    }
633                }
634            }
635            if ((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) {
636                result.append(ENTITY_PREFIX);
637                result.append(ch);
638                result.append(";");
639            } else {
640                result.append((char)ch);
641            }
642        }
643        return new String(result);
644    }
645
646    /**
647     * Escapes non ASCII characters in a HTML-String with their number-based
648     * entity representation, for example &amp; becomes &amp;#38;.<p>
649     *
650     * A character <code>num</code> is replaced if<br>
651     * <code>(ch > 255)</code><p>
652     *
653     * @param source the String to escape
654     *
655     * @return String the escaped String
656     *
657     * @see #escapeXml(String)
658     */
659    public static String escapeNonAscii(String source) {
660
661        if (source == null) {
662            return null;
663        }
664        StringBuffer result = new StringBuffer(source.length() * 2);
665        for (int i = 0; i < source.length(); i++) {
666            int ch = source.charAt(i);
667            if (ch > 255) {
668                result.append(ENTITY_PREFIX);
669                result.append(ch);
670                result.append(";");
671            } else {
672                result.append((char)ch);
673            }
674        }
675        return new String(result);
676    }
677
678    /**
679     * A simple method to avoid injection.<p>
680     *
681     * Replaces all single quotes to double single quotes in the value parameter of the SQL statement.<p>
682     *
683     * @param source the String to escape SQL from
684     * @return the escaped value of the parameter source
685     */
686    public static String escapeSql(String source) {
687
688        return source.replaceAll("'", "''");
689    }
690
691    /**
692     * Escapes the wildcard characters in a string which will be used as the pattern for a SQL LIKE clause.<p>
693     *
694     * @param pattern the pattern
695     * @param escapeChar the character which should be used as the escape character
696     *
697     * @return the escaped pattern
698     */
699    public static String escapeSqlLikePattern(String pattern, char escapeChar) {
700
701        char[] special = new char[] {escapeChar, '%', '_'};
702        String result = pattern;
703        for (char charToEscape : special) {
704            result = result.replaceAll("" + charToEscape, "" + escapeChar + charToEscape);
705        }
706        return result;
707    }
708
709    /**
710     * Encodes a String in a way similar JavaScript "encodeURIcomponent" function.<p>
711     *
712     * Multiple blanks are encoded _multiply_ with <code>%20</code>.<p>
713     *
714     * @param source The text to be encoded
715     * @param encoding the encoding type
716     *
717     * @return The encoded String
718     */
719    public static String escapeWBlanks(String source, String encoding) {
720
721        if (CmsStringUtil.isEmpty(source)) {
722            return source;
723        }
724        StringBuffer ret = new StringBuffer(source.length() * 2);
725
726        // URLEncode the text string
727        // this produces a very similar encoding to JavaSscript encoding,
728        // except the blank which is not encoded into "%20" instead of "+"
729
730        String enc = encode(source, encoding);
731        for (int z = 0; z < enc.length(); z++) {
732            char c = enc.charAt(z);
733            if (c == '+') {
734                ret.append("%20");
735            } else {
736                ret.append(c);
737            }
738        }
739        return ret.toString();
740    }
741
742    /**
743     * Escapes a String so it may be printed as text content or attribute
744     * value in a HTML page or an XML file.<p>
745     *
746     * This method replaces the following characters in a String:
747     * <ul>
748     * <li><b>&lt;</b> with &amp;lt;
749     * <li><b>&gt;</b> with &amp;gt;
750     * <li><b>&amp;</b> with &amp;amp;
751     * <li><b>&quot;</b> with &amp;quot;
752     * </ul><p>
753     *
754     * @param source the string to escape
755     *
756     * @return the escaped string
757     *
758     * @see #escapeHtml(String)
759     */
760    public static String escapeXml(String source) {
761
762        return escapeXml(source, false);
763    }
764
765    /**
766     * Escapes a String so it may be printed as text content or attribute
767     * value in a HTML page or an XML file.<p>
768     *
769     * This method replaces the following characters in a String:
770     * <ul>
771     * <li><b>&lt;</b> with &amp;lt;
772     * <li><b>&gt;</b> with &amp;gt;
773     * <li><b>&amp;</b> with &amp;amp;
774     * <li><b>&quot;</b> with &amp;quot;
775     * </ul><p>
776     *
777     * @param source the string to escape
778     * @param doubleEscape if <code>false</code>, all entities that already are escaped are left untouched
779     *
780     * @return the escaped string
781     *
782     * @see #escapeHtml(String)
783     */
784    public static String escapeXml(String source, boolean doubleEscape) {
785
786        if (source == null) {
787            return null;
788        }
789        StringBuffer result = new StringBuffer(source.length() * 2);
790
791        for (int i = 0; i < source.length(); ++i) {
792            char ch = source.charAt(i);
793            switch (ch) {
794                case '<':
795                    result.append("&lt;");
796                    break;
797                case '>':
798                    result.append("&gt;");
799                    break;
800                case '&':
801                    // don't escape already escaped international and special characters
802                    if (!doubleEscape) {
803                        int terminatorIndex = source.indexOf(";", i);
804                        if (terminatorIndex > 0) {
805                            if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+")) {
806                                result.append(ch);
807                                break;
808                            }
809                        }
810                    }
811                    // note that to other "break" in the above "if" block
812                    result.append("&amp;");
813                    break;
814                case '"':
815                    result.append("&quot;");
816                    break;
817                case '\'':
818                    result.append("&apos;");
819                    break;
820                default:
821                    result.append(ch);
822            }
823        }
824        return new String(result);
825    }
826
827    /**
828     * Checks if a given encoding name is actually supported, and if so
829     * resolves it to it's canonical name, if not it returns the given fallback
830     * value.<p>
831     *
832     * Charsets have a set of aliases. For example, valid aliases for "UTF-8"
833     * are "UTF8", "utf-8" or "utf8". This method resolves any given valid charset name
834     * to it's "canonical" form, so that simple String comparison can be used
835     * when checking charset names internally later.<p>
836     *
837     * Please see <a href="http://www.iana.org/assignments/character-sets">http://www.iana.org/assignments/character-sets</a>
838     * for a list of valid charset alias names.<p>
839     *
840     * @param encoding the encoding to check and resolve
841     * @param fallback the fallback encoding scheme
842     *
843     * @return the resolved encoding name, or the fallback value
844     */
845    public static String lookupEncoding(String encoding, String fallback) {
846
847        String result = m_encodingCache.get(encoding);
848        if (result != null) {
849            return result;
850        }
851
852        try {
853            result = Charset.forName(encoding).name();
854            m_encodingCache.put(encoding, result);
855            return result;
856        } catch (Throwable t) {
857            // we will use the default value as fallback
858        }
859
860        return fallback;
861    }
862
863    /**
864     * Re-decodes a String that has not been correctly decoded and thus has scrambled
865     * character bytes.<p>
866     *
867     * This is an equivalent to the JavaScript "decodeURIComponent" function.
868     * It converts from the default "UTF-8" to the currently selected system encoding.<p>
869     *
870     * @param input the String to convert
871     *
872     * @return String the converted String
873     */
874    public static String redecodeUriComponent(String input) {
875
876        if (input == null) {
877            return input;
878        }
879        return new String(
880            changeEncoding(input.getBytes(), ENCODING_UTF_8, OpenCms.getSystemInfo().getDefaultEncoding()));
881    }
882
883    /**
884     * Decodes a String in a way similar to the JavaScript "decodeURIcomponent" function,
885     * using "UTF-8" for character encoding.<p>
886     *
887     * This method can decode Strings that have been encoded in JavaScript with "encodeURIcomponent".<p>
888     *
889     * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p>
890     *
891     * @param source The String to be decoded
892     *
893     * @return The decoded String
894     */
895    public static String unescape(String source) {
896
897        return unescape(source, ENCODING_UTF_8);
898    }
899
900    /**
901     * Decodes a String in a way similar to the JavaScript "decodeURIcomponent" function.<p>
902     *
903     * This method can decode Strings that have been encoded in JavaScript with "encodeURIcomponent",
904     * provided "UTF-8" is used as encoding.<p>
905     *
906     * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p>
907     *
908     * @param source The String to be decoded
909     * @param encoding the encoding type
910     *
911     * @return The decoded String
912     */
913    public static String unescape(String source, String encoding) {
914
915        if (source == null) {
916            return null;
917        }
918        int len = source.length();
919        // to use standard decoder we need to replace '+' with "%20" (space)
920        StringBuffer preparedSource = new StringBuffer(len);
921        for (int i = 0; i < len; i++) {
922            char c = source.charAt(i);
923            if (c == '+') {
924                preparedSource.append("%20");
925            } else {
926                preparedSource.append(c);
927            }
928        }
929        return decode(preparedSource.toString(), encoding);
930    }
931
932    /**
933     * Decrypts a byte array obfuscated with 'obfuscateBytes'.<p>
934     *
935     * @param source the source
936     * @return the resuvlt
937     */
938    private static byte[] deobfuscateBytes(byte[] source) {
939
940        byte[] result = new byte[source.length - 1];
941        System.arraycopy(source, 1, result, 0, source.length - 1);
942        for (int i = 0; i < result.length; i++) {
943            result[i] = (byte)(0xFF & (result[i] ^ source[0]));
944        }
945        return result;
946    }
947
948    /**
949     * Simple "obfuscation" for byte arrays using random numbers.<p>
950     *
951     * @param source the source array
952     * @return the result
953     */
954    private static byte[] obfuscateBytes(byte[] source) {
955
956        byte[] s = new byte[1];
957        m_random.nextBytes(s);
958        byte[] result = new byte[source.length + 1];
959        System.arraycopy(source, 0, result, 1, source.length);
960        result[0] = s[0];
961        for (int i = 1; i < result.length; i++) {
962            result[i] = (byte)(0xFF & (result[i] ^ s[0]));
963        }
964        return result;
965    }
966
967}