001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.i18n; 029 030import org.opencms.json.JSONArray; 031import org.opencms.json.JSONException; 032import org.opencms.main.CmsLog; 033import org.opencms.main.OpenCms; 034import org.opencms.util.CmsStringUtil; 035 036import java.io.UnsupportedEncodingException; 037import java.net.IDN; 038import java.net.URI; 039import java.net.URISyntaxException; 040import java.net.URLDecoder; 041import java.net.URLEncoder; 042import java.nio.CharBuffer; 043import java.nio.charset.Charset; 044import java.nio.charset.CharsetEncoder; 045import java.util.HashMap; 046import java.util.List; 047import java.util.Map; 048import java.util.Random; 049import java.util.regex.Matcher; 050import java.util.regex.Pattern; 051 052import org.apache.commons.codec.binary.Base64; 053import org.apache.commons.lang3.StringUtils; 054import org.apache.commons.logging.Log; 055import org.apache.http.client.utils.URIBuilder; 056 057import com.google.common.collect.Lists; 058 059/** 060 * The OpenCms CmsEncoder class provides static methods to decode and encode data.<p> 061 * 062 * The methods in this class are substitutes for <code>java.net.URLEncoder.encode()</code> and 063 * <code>java.net.URLDecoder.decode()</code>. Use the methods from this class in all OpenCms 064 * core classes to ensure the encoding is always handled the same way.<p> 065 * 066 * The de- and encoding uses the same coding mechanism as JavaScript, special characters are 067 * replaced with <code>%hex</code> where hex is a two digit hex number.<p> 068 * 069 * <b>Note:</b> On the client side (browser) instead of using the deprecated <code>escape</code> 070 * and <code>unescape</code> JavaScript functions, always the use <code>encodeURIComponent</code> and 071 * <code>decodeURIComponent</code> functions. Only these work properly with unicode characters.<p> 072 * 073 * @since 6.0.0 074 */ 075public final class CmsEncoder { 076 077 /** Non-alphanumeric characters used for Base64 encoding. */ 078 public static final String BASE64_EXTRA = "+/="; 079 080 /** Characters used as replacements for non-alphanumeric Base64 characters when using Base64 for request parameters. */ 081 public static final String BASE64_EXTRA_REPLACEMENTS = "-_."; 082 083 /** Constant for the standard <code>ISO-8859-1</code> encoding. */ 084 public static final String ENCODING_ISO_8859_1 = "ISO-8859-1"; 085 086 /** Constant for the standard <code>US-ASCII</code> encoding. */ 087 public static final String ENCODING_US_ASCII = "US-ASCII"; 088 089 /** 090 * Constant for the standard <code>UTF-8</code> encoding.<p> 091 * 092 * Default encoding for JavaScript decodeUriComponent methods is <code>UTF-8</code> by w3c standard. 093 */ 094 public static final String ENCODING_UTF_8 = "UTF-8"; 095 096 /** The regex pattern to match HTML entities. */ 097 private static final Pattern ENTITIY_PATTERN = Pattern.compile("\\&#(\\d+);"); 098 099 /** The prefix for HTML entities. */ 100 private static final String ENTITY_PREFIX = "&#"; 101 102 /** The replacement for HTML entity prefix in parameters. */ 103 private static final String ENTITY_REPLACEMENT = "$$"; 104 105 /** The log object for this class. */ 106 private static final Log LOG = CmsLog.getLog(CmsEncoder.class); 107 108 /** A cache for encoding name lookup. */ 109 private static Map<String, String> m_encodingCache = new HashMap<String, String>(16); 110 111 private static Random m_random = new Random(); 112 113 /** The plus entity. */ 114 private static final String PLUS_ENTITY = ENTITY_PREFIX + "043;"; 115 116 /** 117 * Constructor.<p> 118 */ 119 private CmsEncoder() { 120 121 // empty 122 } 123 124 /** 125 * Adjusts the given String by making sure all characters that can be displayed 126 * in the given charset are contained as chars, whereas all other non-displayable 127 * characters are converted to HTML entities.<p> 128 * 129 * Just calls {@link #decodeHtmlEntities(String)} first and feeds the result 130 * to {@link #encodeHtmlEntities(String, String)}. <p> 131 * 132 * @param input the input to adjust the HTML encoding for 133 * @param encoding the charset to encode the result with\ 134 * 135 * @return the input with the decoded/encoded HTML entities 136 */ 137 public static String adjustHtmlEncoding(String input, String encoding) { 138 139 return encodeHtmlEntities(decodeHtmlEntities(input), encoding); 140 } 141 142 /** 143 * Changes the encoding of a byte array that represents a String.<p> 144 * 145 * @param input the byte array to convert 146 * @param oldEncoding the current encoding of the byte array 147 * @param newEncoding the new encoding of the byte array 148 * 149 * @return the byte array encoded in the new encoding 150 */ 151 public static byte[] changeEncoding(byte[] input, String oldEncoding, String newEncoding) { 152 153 if ((oldEncoding == null) || (newEncoding == null)) { 154 return input; 155 } 156 if (oldEncoding.trim().equalsIgnoreCase(newEncoding.trim())) { 157 return input; 158 } 159 byte[] result = input; 160 try { 161 result = (new String(input, oldEncoding)).getBytes(newEncoding); 162 } catch (UnsupportedEncodingException e) { 163 // return value will be input value 164 } 165 return result; 166 } 167 168 /** 169 * Converts the host of an URI to Punycode.<p> 170 * 171 * This is needed when we want to do redirects to hosts with host names containing international characters like umlauts.<p> 172 * 173 * @param uriString the URI 174 * @return the converted URI 175 */ 176 public static String convertHostToPunycode(String uriString) { 177 178 if (uriString.indexOf(":") >= 0) { 179 try { 180 URI uri = new URI(uriString); 181 String authority = uri.getAuthority(); // getHost won't work when we have special characters 182 int colonPos = authority.indexOf(':'); 183 if (colonPos >= 0) { 184 authority = IDN.toASCII(authority.substring(0, colonPos)) + authority.substring(colonPos); 185 } else { 186 authority = IDN.toASCII(authority); 187 } 188 URI uriWithCorrectedHost = new URI(uri.getScheme(), authority, null, null, null); 189 URIBuilder builder = new URIBuilder(uri); 190 builder.setHost(uriWithCorrectedHost.getHost()); 191 builder.setPort(uriWithCorrectedHost.getPort()); 192 builder.setUserInfo(uriWithCorrectedHost.getUserInfo()); 193 uriString = builder.build().toASCIIString(); 194 } catch (URISyntaxException e) { 195 LOG.error(e.getLocalizedMessage(), e); 196 } 197 } 198 return uriString; 199 } 200 201 /** 202 * Creates a String out of a byte array with the specified encoding, falling back 203 * to the system default in case the encoding name is not valid.<p> 204 * 205 * Use this method as a replacement for <code>new String(byte[], encoding)</code> 206 * to avoid possible encoding problems.<p> 207 * 208 * @param bytes the bytes to decode 209 * @param encoding the encoding scheme to use for decoding the bytes 210 * 211 * @return the bytes decoded to a String 212 */ 213 public static String createString(byte[] bytes, String encoding) { 214 215 String enc = encoding.intern(); 216 if (enc != OpenCms.getSystemInfo().getDefaultEncoding()) { 217 enc = lookupEncoding(enc, null); 218 } 219 if (enc != null) { 220 try { 221 return new String(bytes, enc); 222 } catch (UnsupportedEncodingException e) { 223 // this can _never_ happen since the charset was looked up first 224 } 225 } else { 226 if (LOG.isWarnEnabled()) { 227 LOG.warn(Messages.get().getBundle().key(Messages.ERR_UNSUPPORTED_VM_ENCODING_1, encoding)); 228 } 229 enc = OpenCms.getSystemInfo().getDefaultEncoding(); 230 try { 231 return new String(bytes, enc); 232 } catch (UnsupportedEncodingException e) { 233 // this can also _never_ happen since the default encoding is always valid 234 } 235 } 236 // this code is unreachable in practice 237 LOG.error(Messages.get().getBundle().key(Messages.ERR_ENCODING_ISSUES_1, encoding)); 238 return null; 239 } 240 241 /** 242 * Decodes a String using UTF-8 encoding, which is the standard for http data transmission 243 * with GET ant POST requests.<p> 244 * 245 * @param source the String to decode 246 * 247 * @return String the decoded source String 248 */ 249 public static String decode(String source) { 250 251 return decode(source, ENCODING_UTF_8); 252 } 253 254 /** 255 * This method is a substitute for <code>URLDecoder.decode()</code>. 256 * Use this in all OpenCms core classes to ensure the encoding is 257 * always handled the same way.<p> 258 * 259 * In case you don't know what encoding to use, set the value of 260 * the <code>encoding</code> parameter to <code>null</code>. 261 * This method will then default to UTF-8 encoding, which is probably the right one.<p> 262 * 263 * @param source The string to decode 264 * @param encoding The encoding to use (if null, the system default is used) 265 * 266 * @return The decoded source String 267 */ 268 public static String decode(String source, String encoding) { 269 270 if (source == null) { 271 return null; 272 } 273 if (encoding != null) { 274 try { 275 return URLDecoder.decode(source, encoding); 276 } catch (java.io.UnsupportedEncodingException e) { 277 // will fallback to default 278 } 279 } 280 // fallback to default decoding 281 try { 282 return URLDecoder.decode(source, ENCODING_UTF_8); 283 } catch (java.io.UnsupportedEncodingException e) { 284 // ignore 285 } 286 return source; 287 } 288 289 /** 290 * Decodes HTML entity references like <code>&#8364;</code>. 291 * 292 * @param input the input to decode the HTML entities in 293 * @return the input with the decoded HTML entities 294 * 295 * @see #encodeHtmlEntities(String, String) 296 */ 297 public static String decodeHtmlEntities(String input) { 298 299 Matcher matcher = ENTITIY_PATTERN.matcher(input); 300 StringBuffer result = new StringBuffer(input.length()); 301 while (matcher.find()) { 302 String value = matcher.group(1); 303 int c = Integer.valueOf(value).intValue(); 304 String replacement = new String(Character.toChars(c)); 305 matcher.appendReplacement(result, replacement); 306 } 307 matcher.appendTail(result); 308 return result.toString(); 309 } 310 311 /** 312 * Decodes HTML entity references like <code>&#8364;</code> that are contained in the 313 * String to a regular character, but only if that character is contained in the given 314 * encodings charset.<p> 315 * 316 * @param input the input to decode the HTML entities in 317 * @param encoding the charset to decode the input for 318 * @return the input with the decoded HTML entities 319 * 320 * @see #encodeHtmlEntities(String, String) 321 */ 322 @Deprecated 323 public static String decodeHtmlEntities(String input, String encoding) { 324 325 Matcher matcher = ENTITIY_PATTERN.matcher(input); 326 StringBuffer result = new StringBuffer(input.length()); 327 Charset charset = Charset.forName(encoding); 328 CharsetEncoder encoder = charset.newEncoder(); 329 330 while (matcher.find()) { 331 String entity = matcher.group(); 332 String value = entity.substring(2, entity.length() - 1); 333 int c = Integer.valueOf(value).intValue(); 334 335 if (c < 128) { 336 // first 128 chars are contained in almost every charset 337 entity = new String(new char[] {(char)c}); 338 // this is intended as performance improvement since 339 // the canEncode() operation appears quite CPU heavy 340 } else if (encoder.canEncode((char)c)) { 341 // encoder can encode this char 342 entity = new String(new char[] {(char)c}); 343 } 344 matcher.appendReplacement(result, entity); 345 } 346 matcher.appendTail(result); 347 return result.toString(); 348 } 349 350 /** 351 * Decodes a string used as parameter in an uri in a way independent of other encodings/decodings applied before.<p> 352 * 353 * @param input the encoded parameter string 354 * 355 * @return the decoded parameter string 356 * 357 * @see #encodeParameter(String) 358 */ 359 public static String decodeParameter(String input) { 360 361 String result = CmsStringUtil.substitute(input, ENTITY_REPLACEMENT, ENTITY_PREFIX); 362 return CmsEncoder.decodeHtmlEntities(result, OpenCms.getSystemInfo().getDefaultEncoding()); 363 } 364 365 /** 366 * Decodes a parameter which has been encoded from a string list using encodeStringsAsBase64Parameter.<p> 367 * 368 * @param data the data to decode 369 * @return the list of strings 370 */ 371 public static List<String> decodeStringsFromBase64Parameter(String data) { 372 373 data = StringUtils.replaceChars(data, BASE64_EXTRA_REPLACEMENTS, BASE64_EXTRA); 374 byte[] bytes = deobfuscateBytes(Base64.decodeBase64(data)); 375 try { 376 JSONArray json = new JSONArray(new String(bytes, "UTF-8")); 377 List<String> result = Lists.newArrayList(); 378 for (int i = 0; i < json.length(); i++) { 379 result.add(json.getString(i)); 380 } 381 return result; 382 } catch (UnsupportedEncodingException e) { 383 // TODO Auto-generated catch block 384 e.printStackTrace(); 385 } catch (JSONException e) { 386 throw new IllegalArgumentException("Decoding failed: " + data, e); 387 } 388 return null; 389 } 390 391 /** 392 * Encodes a String using UTF-8 encoding, which is the standard for http data transmission 393 * with GET ant POST requests.<p> 394 * 395 * @param source the String to encode 396 * 397 * @return String the encoded source String 398 */ 399 public static String encode(String source) { 400 401 return encode(source, ENCODING_UTF_8); 402 } 403 404 /** 405 * This method is a substitute for <code>URLEncoder.encode()</code>. 406 * Use this in all OpenCms core classes to ensure the encoding is 407 * always handled the same way.<p> 408 * 409 * In case you don't know what encoding to use, set the value of 410 * the <code>encoding</code> parameter to <code>null</code>. 411 * This method will then default to UTF-8 encoding, which is probably the right one.<p> 412 * 413 * @param source the String to encode 414 * @param encoding the encoding to use (if null, the system default is used) 415 * 416 * @return the encoded source String 417 */ 418 public static String encode(String source, String encoding) { 419 420 if (source == null) { 421 return null; 422 } 423 if (encoding != null) { 424 try { 425 return URLEncoder.encode(source, encoding); 426 } catch (java.io.UnsupportedEncodingException e) { 427 // will fallback to default 428 } 429 } 430 // fallback to default encoding 431 try { 432 return URLEncoder.encode(source, ENCODING_UTF_8); 433 } catch (java.io.UnsupportedEncodingException e) { 434 // ignore 435 } 436 return source; 437 } 438 439 /** 440 * Encodes all characters that are contained in the String which can not displayed 441 * in the given encodings charset with HTML entity references 442 * like <code>&#8364;</code>.<p> 443 * 444 * This is required since a Java String is 445 * internally always stored as Unicode, meaning it can contain almost every character, but 446 * the HTML charset used might not support all such characters.<p> 447 * 448 * @param input the input to encode for HTML 449 * @param encoding the charset to encode the result with 450 * 451 * @return the input with the encoded HTML entities 452 * 453 * @see #decodeHtmlEntities(String, String) 454 */ 455 public static String encodeHtmlEntities(String input, String encoding) { 456 457 StringBuffer result = new StringBuffer(input.length() * 2); 458 Charset charset = Charset.forName(encoding); 459 CharsetEncoder encoder = charset.newEncoder(); 460 input.codePoints().forEach(codepoint -> { 461 char[] charsForCodepoint = Character.toChars(codepoint); 462 boolean isSimple = (charsForCodepoint.length == 1) && (charsForCodepoint[0] < 128); 463 if (isSimple || encoder.canEncode(new String(charsForCodepoint))) { 464 result.append(charsForCodepoint); 465 } else { 466 result.append(ENTITY_PREFIX); 467 result.append(codepoint); 468 result.append(";"); 469 } 470 }); 471 return result.toString(); 472 } 473 474 /** 475 * Encodes all characters that are contained in the String which can not displayed 476 * in the given encodings charset with Java escaping like <code>\u20ac</code>.<p> 477 * 478 * This can be used to escape values used in Java property files.<p> 479 * 480 * @param input the input to encode for Java 481 * @param encoding the charset to encode the result with 482 * 483 * @return the input with the encoded Java entities 484 */ 485 public static String encodeJavaEntities(String input, String encoding) { 486 487 StringBuffer result = new StringBuffer(input.length() * 2); 488 CharBuffer buffer = CharBuffer.wrap(input.toCharArray()); 489 Charset charset = Charset.forName(encoding); 490 CharsetEncoder encoder = charset.newEncoder(); 491 for (int i = 0; i < buffer.length(); i++) { 492 int c = buffer.get(i); 493 if (c < 128) { 494 // first 128 chars are contained in almost every charset 495 result.append((char)c); 496 // this is intended as performance improvement since 497 // the canEncode() operation appears quite CPU heavy 498 } else if (encoder.canEncode((char)c)) { 499 // encoder can encode this char 500 result.append((char)c); 501 } else { 502 // append Java entity reference 503 result.append("\\u"); 504 String hex = Integer.toHexString(c); 505 int pad = 4 - hex.length(); 506 for (int p = 0; p < pad; p++) { 507 result.append('0'); 508 } 509 result.append(hex); 510 } 511 } 512 return result.toString(); 513 } 514 515 /** 516 * Encodes a string used as parameter in an uri in a way independent of other encodings/decodings applied later.<p> 517 * 518 * Used to ensure that GET parameters are not wrecked by wrong or incompatible configuration settings. 519 * In order to ensure this, the String is first encoded with html entities for any character that cannot encoded 520 * in US-ASCII; additionally, the plus sign is also encoded to avoid problems with the white-space replacer. 521 * Finally, the entity prefix is replaced with characters not used as delimiters in urls.<p> 522 * 523 * @param input the parameter string 524 * 525 * @return the encoded parameter string 526 */ 527 public static String encodeParameter(String input) { 528 529 String result = CmsEncoder.encodeHtmlEntities(input, CmsEncoder.ENCODING_US_ASCII); 530 result = CmsStringUtil.substitute(result, "+", PLUS_ENTITY); 531 return CmsStringUtil.substitute(result, ENTITY_PREFIX, ENTITY_REPLACEMENT); 532 } 533 534 /** 535 * Encode a list of strings as base64 data to be used in a request parameter.<p> 536 * 537 * @param strings the strings to encode 538 * @return the resulting base64 data 539 */ 540 public static String encodeStringsAsBase64Parameter(List<String> strings) { 541 542 JSONArray array = new JSONArray(); 543 for (String string : strings) { 544 array.put(string); 545 } 546 byte[] bytes; 547 try { 548 // use obfuscateBytes here to to make the output look more random 549 bytes = obfuscateBytes(array.toString().getBytes("UTF-8")); 550 } catch (UnsupportedEncodingException e) { 551 // should never happen 552 e.printStackTrace(); 553 throw new RuntimeException(e); 554 } 555 String result = Base64.encodeBase64String(bytes); 556 result = StringUtils.replaceChars(result, BASE64_EXTRA, BASE64_EXTRA_REPLACEMENTS); 557 return result; 558 } 559 560 /** 561 * Encodes a String in a way similar to the JavaScript "encodeURIcomponent" function, 562 * using "UTF-8" for character encoding encoding.<p> 563 * 564 * JavaScript "decodeURIcomponent" can decode Strings that have been encoded using this method.<p> 565 * 566 * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p> 567 * 568 * @param source The text to be encoded 569 * 570 * @return The encoded string 571 * 572 * @see #escape(String, String) 573 */ 574 public static String escape(String source) { 575 576 return escape(source, ENCODING_UTF_8); 577 } 578 579 /** 580 * Encodes a String in a way similar to the JavaScript "encodeURIcomponent" function.<p> 581 * 582 * JavaScript "decodeURIcomponent" can decode Strings that have been encoded using this method, 583 * provided "UTF-8" has been used as encoding.<p> 584 * 585 * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p> 586 * 587 * @param source The text to be encoded 588 * @param encoding the encoding type 589 * 590 * @return The encoded string 591 */ 592 public static String escape(String source, String encoding) { 593 594 // the blank is encoded into "+" not "%20" when using standard encode call 595 return CmsStringUtil.substitute(encode(source, encoding), "+", "%20"); 596 } 597 598 /** 599 * Escapes special characters in a HTML-String with their number-based 600 * entity representation, for example & becomes &#38;.<p> 601 * 602 * A character <code>num</code> is replaced if<br> 603 * <code>((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62)))</code><p> 604 * 605 * @param source the String to escape 606 * 607 * @return String the escaped String 608 * 609 * @see #escapeXml(String) 610 */ 611 public static String escapeHtml(String source) { 612 613 if (source == null) { 614 return null; 615 } 616 StringBuffer result = new StringBuffer(source.length() * 2); 617 for (int i = 0; i < source.length(); i++) { 618 int ch = source.charAt(i); 619 // avoid escaping already escaped characters 620 if (ch == 38) { 621 int terminatorIndex = source.indexOf(";", i); 622 if (terminatorIndex > 0) { 623 if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+|lt|gt|amp|quote")) { 624 result.append(source.substring(i, terminatorIndex + 1)); 625 // Skip remaining chars up to (and including) ";" 626 i = terminatorIndex; 627 continue; 628 } 629 } 630 } 631 if ((ch != 32) && ((ch > 122) || (ch < 48) || (ch == 60) || (ch == 62))) { 632 result.append(ENTITY_PREFIX); 633 result.append(ch); 634 result.append(";"); 635 } else { 636 result.append((char)ch); 637 } 638 } 639 return new String(result); 640 } 641 642 /** 643 * Escapes non ASCII characters in a HTML-String with their number-based 644 * entity representation, for example & becomes &#38;.<p> 645 * 646 * A character <code>num</code> is replaced if<br> 647 * <code>(ch > 255)</code><p> 648 * 649 * @param source the String to escape 650 * 651 * @return String the escaped String 652 * 653 * @see #escapeXml(String) 654 */ 655 public static String escapeNonAscii(String source) { 656 657 if (source == null) { 658 return null; 659 } 660 StringBuffer result = new StringBuffer(source.length() * 2); 661 for (int i = 0; i < source.length(); i++) { 662 int ch = source.charAt(i); 663 if (ch > 255) { 664 result.append(ENTITY_PREFIX); 665 result.append(ch); 666 result.append(";"); 667 } else { 668 result.append((char)ch); 669 } 670 } 671 return new String(result); 672 } 673 674 /** 675 * A simple method to avoid injection.<p> 676 * 677 * Replaces all single quotes to double single quotes in the value parameter of the SQL statement.<p> 678 * 679 * @param source the String to escape SQL from 680 * @return the escaped value of the parameter source 681 */ 682 public static String escapeSql(String source) { 683 684 return source.replaceAll("'", "''"); 685 } 686 687 /** 688 * Escapes the wildcard characters in a string which will be used as the pattern for a SQL LIKE clause.<p> 689 * 690 * @param pattern the pattern 691 * @param escapeChar the character which should be used as the escape character 692 * 693 * @return the escaped pattern 694 */ 695 public static String escapeSqlLikePattern(String pattern, char escapeChar) { 696 697 char[] special = new char[] {escapeChar, '%', '_'}; 698 String result = pattern; 699 for (char charToEscape : special) { 700 result = result.replaceAll("" + charToEscape, "" + escapeChar + charToEscape); 701 } 702 return result; 703 } 704 705 /** 706 * Encodes a String in a way similar JavaScript "encodeURIcomponent" function.<p> 707 * 708 * Multiple blanks are encoded _multiply_ with <code>%20</code>.<p> 709 * 710 * @param source The text to be encoded 711 * @param encoding the encoding type 712 * 713 * @return The encoded String 714 */ 715 public static String escapeWBlanks(String source, String encoding) { 716 717 if (CmsStringUtil.isEmpty(source)) { 718 return source; 719 } 720 StringBuffer ret = new StringBuffer(source.length() * 2); 721 722 // URLEncode the text string 723 // this produces a very similar encoding to JavaSscript encoding, 724 // except the blank which is not encoded into "%20" instead of "+" 725 726 String enc = encode(source, encoding); 727 for (int z = 0; z < enc.length(); z++) { 728 char c = enc.charAt(z); 729 if (c == '+') { 730 ret.append("%20"); 731 } else { 732 ret.append(c); 733 } 734 } 735 return ret.toString(); 736 } 737 738 /** 739 * Escapes a String so it may be printed as text content or attribute 740 * value in a HTML page or an XML file.<p> 741 * 742 * This method replaces the following characters in a String: 743 * <ul> 744 * <li><b><</b> with &lt; 745 * <li><b>></b> with &gt; 746 * <li><b>&</b> with &amp; 747 * <li><b>"</b> with &quot; 748 * </ul><p> 749 * 750 * @param source the string to escape 751 * 752 * @return the escaped string 753 * 754 * @see #escapeHtml(String) 755 */ 756 public static String escapeXml(String source) { 757 758 return escapeXml(source, false); 759 } 760 761 /** 762 * Escapes a String so it may be printed as text content or attribute 763 * value in a HTML page or an XML file.<p> 764 * 765 * This method replaces the following characters in a String: 766 * <ul> 767 * <li><b><</b> with &lt; 768 * <li><b>></b> with &gt; 769 * <li><b>&</b> with &amp; 770 * <li><b>"</b> with &quot; 771 * </ul><p> 772 * 773 * @param source the string to escape 774 * @param doubleEscape if <code>false</code>, all entities that already are escaped are left untouched 775 * 776 * @return the escaped string 777 * 778 * @see #escapeHtml(String) 779 */ 780 public static String escapeXml(String source, boolean doubleEscape) { 781 782 if (source == null) { 783 return null; 784 } 785 StringBuffer result = new StringBuffer(source.length() * 2); 786 787 for (int i = 0; i < source.length(); ++i) { 788 char ch = source.charAt(i); 789 switch (ch) { 790 case '<': 791 result.append("<"); 792 break; 793 case '>': 794 result.append(">"); 795 break; 796 case '&': 797 // don't escape already escaped international and special characters 798 if (!doubleEscape) { 799 int terminatorIndex = source.indexOf(";", i); 800 if (terminatorIndex > 0) { 801 if (source.substring(i + 1, terminatorIndex).matches("#[0-9]+")) { 802 result.append(ch); 803 break; 804 } 805 } 806 } 807 // note that to other "break" in the above "if" block 808 result.append("&"); 809 break; 810 case '"': 811 result.append("""); 812 break; 813 case '\'': 814 result.append("'"); 815 break; 816 default: 817 result.append(ch); 818 } 819 } 820 return new String(result); 821 } 822 823 /** 824 * Checks if a given encoding name is actually supported, and if so 825 * resolves it to it's canonical name, if not it returns the given fallback 826 * value.<p> 827 * 828 * Charsets have a set of aliases. For example, valid aliases for "UTF-8" 829 * are "UTF8", "utf-8" or "utf8". This method resolves any given valid charset name 830 * to it's "canonical" form, so that simple String comparison can be used 831 * when checking charset names internally later.<p> 832 * 833 * Please see <a href="http://www.iana.org/assignments/character-sets">http://www.iana.org/assignments/character-sets</a> 834 * for a list of valid charset alias names.<p> 835 * 836 * @param encoding the encoding to check and resolve 837 * @param fallback the fallback encoding scheme 838 * 839 * @return the resolved encoding name, or the fallback value 840 */ 841 public static String lookupEncoding(String encoding, String fallback) { 842 843 String result = m_encodingCache.get(encoding); 844 if (result != null) { 845 return result; 846 } 847 848 try { 849 result = Charset.forName(encoding).name(); 850 m_encodingCache.put(encoding, result); 851 return result; 852 } catch (Throwable t) { 853 // we will use the default value as fallback 854 } 855 856 return fallback; 857 } 858 859 /** 860 * Re-decodes a String that has not been correctly decoded and thus has scrambled 861 * character bytes.<p> 862 * 863 * This is an equivalent to the JavaScript "decodeURIComponent" function. 864 * It converts from the default "UTF-8" to the currently selected system encoding.<p> 865 * 866 * @param input the String to convert 867 * 868 * @return String the converted String 869 */ 870 public static String redecodeUriComponent(String input) { 871 872 if (input == null) { 873 return input; 874 } 875 return new String( 876 changeEncoding(input.getBytes(), ENCODING_UTF_8, OpenCms.getSystemInfo().getDefaultEncoding())); 877 } 878 879 /** 880 * Decodes a String in a way similar to the JavaScript "decodeURIcomponent" function, 881 * using "UTF-8" for character encoding.<p> 882 * 883 * This method can decode Strings that have been encoded in JavaScript with "encodeURIcomponent".<p> 884 * 885 * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p> 886 * 887 * @param source The String to be decoded 888 * 889 * @return The decoded String 890 */ 891 public static String unescape(String source) { 892 893 return unescape(source, ENCODING_UTF_8); 894 } 895 896 /** 897 * Decodes a String in a way similar to the JavaScript "decodeURIcomponent" function.<p> 898 * 899 * This method can decode Strings that have been encoded in JavaScript with "encodeURIcomponent", 900 * provided "UTF-8" is used as encoding.<p> 901 * 902 * <b>Directly exposed for JSP EL<b>, not through {@link org.opencms.jsp.util.CmsJspElFunctions}.<p> 903 * 904 * @param source The String to be decoded 905 * @param encoding the encoding type 906 * 907 * @return The decoded String 908 */ 909 public static String unescape(String source, String encoding) { 910 911 if (source == null) { 912 return null; 913 } 914 int len = source.length(); 915 // to use standard decoder we need to replace '+' with "%20" (space) 916 StringBuffer preparedSource = new StringBuffer(len); 917 for (int i = 0; i < len; i++) { 918 char c = source.charAt(i); 919 if (c == '+') { 920 preparedSource.append("%20"); 921 } else { 922 preparedSource.append(c); 923 } 924 } 925 return decode(preparedSource.toString(), encoding); 926 } 927 928 /** 929 * Decrypts a byte array obfuscated with 'obfuscateBytes'.<p> 930 * 931 * @param source the source 932 * @return the resuvlt 933 */ 934 private static byte[] deobfuscateBytes(byte[] source) { 935 936 byte[] result = new byte[source.length - 1]; 937 System.arraycopy(source, 1, result, 0, source.length - 1); 938 for (int i = 0; i < result.length; i++) { 939 result[i] = (byte)(0xFF & (result[i] ^ source[0])); 940 } 941 return result; 942 } 943 944 /** 945 * Simple "obfuscation" for byte arrays using random numbers.<p> 946 * 947 * @param source the source array 948 * @return the result 949 */ 950 private static byte[] obfuscateBytes(byte[] source) { 951 952 byte[] s = new byte[1]; 953 m_random.nextBytes(s); 954 byte[] result = new byte[source.length + 1]; 955 System.arraycopy(source, 0, result, 1, source.length); 956 result[0] = s[0]; 957 for (int i = 1; i < result.length; i++) { 958 result[i] = (byte)(0xFF & (result[i] ^ s[0])); 959 } 960 return result; 961 } 962 963}