001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.jsp.decorator; 029 030import org.opencms.file.CmsObject; 031import org.opencms.main.CmsException; 032import org.opencms.main.CmsLog; 033import org.opencms.util.CmsHtmlParser; 034import org.opencms.util.CmsStringUtil; 035 036import java.util.ArrayList; 037import java.util.List; 038 039import org.apache.commons.logging.Log; 040 041import org.htmlparser.Tag; 042import org.htmlparser.Text; 043import org.htmlparser.util.Translate; 044 045/** 046 * The CmsHtmlDecorator is the main object for processing the text decorations.<p> 047 * 048 * It uses the information of a <code>{@link CmsDecoratorConfiguration}</code> to process the 049 * text decorations. 050 * 051 * @since 6.1.3 052 */ 053public class CmsHtmlDecorator extends CmsHtmlParser { 054 055 /** Delimiters for string seperation. */ 056 private static final String[] DELIMITERS = { 057 " ", 058 ",", 059 ".", 060 ";", 061 ":", 062 "!", 063 "(", 064 ")", 065 "'", 066 "?", 067 "/", 068 "\u00A7", 069 "\"", 070 " ", 071 """, 072 "\r\n", 073 "\n"}; 074 075 /** Delimiters for second level string separation. */ 076 private static final String[] DELIMITERS_SECOND_LEVEL = { 077 "-", 078 "@", 079 "/", 080 "⁄", 081 ".", 082 ",", 083 "(", 084 ")", 085 "{", 086 "}", 087 "[", 088 "]", 089 "\"", 090 """, 091 "!", 092 "?", 093 ";", 094 "&", 095 "&", 096 "%", 097 "\u00A7", 098 "§"}; 099 100 /** Steps for forward lookup in workd list. */ 101 private static final int FORWARD_LOOKUP = 10; 102 103 /** The log object for this class. */ 104 private static final Log LOG = CmsLog.getLog(CmsHtmlDecorator.class); 105 106 /** Non translators, strings starting with those values must not be translated. */ 107 private static final String[] NON_TRANSLATORS = {" ", """}; 108 109 /** The decoration configuration.<p> */ 110 I_CmsDecoratorConfiguration m_config; 111 112 /** Decoration bundle to be used by the decorator. */ 113 CmsDecorationBundle m_decorations; 114 115 /** the CmsObject. */ 116 private CmsObject m_cms; 117 118 /** decorate flag. */ 119 private boolean m_decorate; 120 121 /** 122 * Constructor, creates a new, empty CmsHtmlDecorator.<p> 123 * 124 * @param cms the CmsObject 125 * @throws CmsException if something goes wrong 126 */ 127 public CmsHtmlDecorator(CmsObject cms) 128 throws CmsException { 129 130 m_config = new CmsDecoratorConfiguration(cms); 131 m_decorations = m_config.getDecorations(); 132 m_result = new StringBuffer(512); 133 m_echo = true; 134 m_decorate = true; 135 136 } 137 138 /** 139 * Constructor, creates a new CmsHtmlDecorator with a given configuration.<p> 140 * 141 * @param cms the CmsObject 142 * @param config the configuration to be used 143 * 144 */ 145 public CmsHtmlDecorator(CmsObject cms, I_CmsDecoratorConfiguration config) { 146 147 m_config = config; 148 m_decorations = config.getDecorations(); 149 m_result = new StringBuffer(512); 150 m_echo = true; 151 m_decorate = true; 152 m_cms = cms; 153 } 154 155 /** 156 * Splits a String into substrings along the provided delimiter list and returns 157 * the result as a List of Substrings.<p> 158 * 159 * @param source the String to split 160 * @param delimiters the delimiters to split at 161 * @param trim flag to indicate if leading and trailing whitespaces should be omitted 162 * @param includeDelimiters flag to indicate if the delimiters should be included as well 163 * 164 * @return the List of splitted Substrings 165 */ 166 public static List<String> splitAsList(String source, String[] delimiters, boolean trim, boolean includeDelimiters) { 167 168 List<String> result = new ArrayList<String>(); 169 String delimiter = ""; 170 int i = 0; 171 int l = source.length(); 172 int n = -1; 173 int max = Integer.MAX_VALUE; 174 175 // find the next delimiter 176 for (int j = 0; j < delimiters.length; j++) { 177 int delimPos = source.indexOf(delimiters[j]); 178 if (delimPos > -1) { 179 if (delimPos < max) { 180 max = delimPos; 181 n = delimPos; 182 delimiter = delimiters[j]; 183 } 184 } 185 } 186 187 while (n != -1) { 188 // zero - length items are not seen as tokens at start or end 189 if ((i < n) || ((i > 0) && (i < l))) { 190 result.add(trim ? source.substring(i, n).trim() : source.substring(i, n)); 191 // add the delimiter to the list as well 192 if (includeDelimiters && ((n + delimiter.length()) <= l)) { 193 result.add(source.substring(n, n + delimiter.length())); 194 } 195 } else { 196 // add the delimiter to the list as well 197 if (includeDelimiters && source.startsWith(delimiter)) { 198 result.add(delimiter); 199 } 200 } 201 i = n + delimiter.length(); 202 203 // find the next delimiter 204 max = Integer.MAX_VALUE; 205 n = -1; 206 for (int j = 0; j < delimiters.length; j++) { 207 int delimPos = source.indexOf(delimiters[j], i); 208 if (delimPos > -1) { 209 if (delimPos < max) { 210 max = delimPos; 211 n = delimPos; 212 delimiter = delimiters[j]; 213 } 214 } 215 } 216 217 } 218 // is there a non - empty String to cut from the tail? 219 if (n < 0) { 220 n = source.length(); 221 } 222 if (i < n) { 223 result.add(trim ? source.substring(i).trim() : source.substring(i)); 224 } 225 return result; 226 } 227 228 /** 229 * Processes a HTML string and adds text decorations according to the decoration configuration.<p> 230 * 231 * @param html a string holding the HTML code that should be added with text decorations 232 * @param encoding the encoding to be used 233 * @return a HTML string with the decorations added. 234 * @throws Exception if something goes wrong 235 */ 236 public String doDecoration(String html, String encoding) throws Exception { 237 238 return process(html, encoding); 239 } 240 241 /** 242 * Resets the first occurance flags of all decoration objects.<p> 243 * 244 * This is nescessary if decoration objects should be used for processing more than once. * 245 */ 246 public void resetDecorationDefinitions() { 247 248 m_config.resetMarkedDecorations(); 249 } 250 251 /** 252 * @see org.htmlparser.visitors.NodeVisitor#visitStringNode(org.htmlparser.Text) 253 */ 254 @Override 255 public void visitStringNode(Text text) { 256 257 appendText(text.toPlainTextString(), DELIMITERS, true); 258 } 259 260 /** 261 * @see org.htmlparser.visitors.NodeVisitor#visitTag(org.htmlparser.Tag) 262 */ 263 @Override 264 public void visitTag(Tag tag) { 265 266 super.visitTag(tag); 267 // get the tagname 268 String tagname = tag.getTagName(); 269 // this is one of the tags that should not allow decoation 270 if (m_config.isExcluded(tagname)) { 271 m_decorate = false; 272 } else { 273 m_decorate = true; 274 // check if the tag has one of the exclusd attribute 275 if (m_config.isExcludedAttr(tag)) { 276 m_decorate = false; 277 } 278 } 279 280 } 281 282 /** 283 * Appends a text decoration to the output.<p> 284 * 285 * A lookup is made to find a text decoration for each word in the given text. 286 * If a text decoration is found, the word will be decorated and added to the output. 287 * If no text decoration is found, the word alone will be added to the output. 288 * 289 * @param text the text to add a text decoration for 290 * @param delimiters delimiters for text seperation 291 * @param recursive flag for recusrive search 292 */ 293 private void appendText(String text, String[] delimiters, boolean recursive) { 294 295 if (LOG.isDebugEnabled()) { 296 LOG.debug(Messages.get().getBundle().key(Messages.LOG_HTML_DECORATOR_APPEND_TEXT_2, m_config, text)); 297 } 298 299 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(text) && m_decorate) { 300 301 // split the input into single words 302 List<String> wordList = splitAsList(text, delimiters, false, true); 303 int wordCount = wordList.size(); 304 for (int i = 0; i < wordCount; i++) { 305 String word = wordList.get(i); 306 boolean alreadyDecorated = false; 307 if (LOG.isDebugEnabled()) { 308 LOG.debug(Messages.get().getBundle().key( 309 Messages.LOG_HTML_DECORATOR_PROCESS_WORD_2, 310 word, 311 Boolean.valueOf(mustDecode(word, wordList, i)))); 312 } 313 314 // test if the word must be decoded 315 if (mustDecode(word, wordList, i)) { 316 word = Translate.decode(word); 317 if (LOG.isDebugEnabled()) { 318 LOG.debug(Messages.get().getBundle().key(Messages.LOG_HTML_DECORATOR_DECODED_WORD_1, word)); 319 } 320 } 321 322 // test if the word is no delimiter 323 // try to get a decoration if it is not 324 CmsDecorationObject decObj = null; 325 CmsDecorationObject wordDecObj = null; 326 if (!hasDelimiter(word, delimiters)) { 327 wordDecObj = (CmsDecorationObject)m_decorations.get(word); 328 } 329 330 if (LOG.isDebugEnabled()) { 331 LOG.debug(Messages.get().getBundle().key( 332 Messages.LOG_HTML_DECORATOR_DECORATION_FOUND_2, 333 wordDecObj, 334 word)); 335 } 336 337 // if there is a decoration object for this word, we must do the decoration 338 // if not, we must test if the word itself consists of several parts divided by 339 // second level delimiters 340 //if ((decObj == null)) { 341 if (recursive 342 && hasDelimiter(word, DELIMITERS_SECOND_LEVEL) 343 && !startsWithDelimiter(word, DELIMITERS_SECOND_LEVEL)) { 344 // add the following symbol if possible to allow the second level decoration 345 // test to make a forward lookup as well 346 String secondLevel = word; 347 if (i < (wordCount - 1)) { 348 String nextWord = wordList.get(i + 1); 349 if (!nextWord.equals(" ")) { 350 //don't allow HTML entities to be split in the middle during the recursion! 351 String afterNextWord = ""; 352 if (i < (wordCount - 2)) { 353 afterNextWord = wordList.get(i + 2); 354 } 355 if (nextWord.contains("&") && afterNextWord.equals(";")) { 356 secondLevel = word + nextWord + ";"; 357 i += 2; 358 } else { 359 secondLevel = word + nextWord; 360 i++; 361 } 362 } 363 } 364 // check if the result is modified by any second level decoration 365 int sizeBefore = m_result.length(); 366 appendText(secondLevel, DELIMITERS_SECOND_LEVEL, false); 367 if (sizeBefore != m_result.length()) { 368 alreadyDecorated = true; 369 } 370 371 } else { 372 // make a forward lookup to the next elements of the word list to check 373 // if the combination of word and delimiter can be found as a decoration key 374 // an example would be "Dr." wich must be decorated with "Doctor" 375 StringBuffer decKey = new StringBuffer(); 376 decKey.append(word); 377 // calculate how much forward looking must be made 378 int forwardLookup = wordList.size() - i - 1; 379 if (forwardLookup > FORWARD_LOOKUP) { 380 forwardLookup = FORWARD_LOOKUP; 381 } 382 if (i < (wordCount - forwardLookup)) { 383 for (int j = 1; j <= forwardLookup; j++) { 384 decKey.append(wordList.get(i + j)); 385 decObj = (CmsDecorationObject)m_decorations.get(decKey.toString()); 386 if (LOG.isDebugEnabled()) { 387 LOG.debug(Messages.get().getBundle().key( 388 Messages.LOG_HTML_DECORATOR_DECORATION_FOUND_FWL_3, 389 decObj, 390 word, 391 Integer.valueOf(j))); 392 } 393 if (decObj != null) { 394 if (LOG.isDebugEnabled()) { 395 LOG.debug(Messages.get().getBundle().key( 396 Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_DECORATION_1, 397 decObj.getContentDecoration( 398 m_config, 399 decKey.toString(), 400 m_cms.getRequestContext().getLocale().toString()))); 401 } 402 // decorate the current word with the following delimiter 403 m_result.append(decObj.getContentDecoration( 404 m_config, 405 decKey.toString(), 406 m_cms.getRequestContext().getLocale().toString())); 407 // important, we must skip the next element of the list 408 i += j; 409 // reset the decObj 410 alreadyDecorated = true; 411 break; 412 } 413 } 414 } 415 if ((decObj == null) && (wordDecObj == null)) { 416 if (LOG.isDebugEnabled()) { 417 LOG.debug(Messages.get().getBundle().key( 418 Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_WORD_1, 419 word)); 420 } 421 // no decoration was found, use the word alone 422 m_result.append(word); 423 } 424 } 425 //} else { 426 if ((wordDecObj != null) && !alreadyDecorated) { 427 if (LOG.isDebugEnabled()) { 428 LOG.debug(Messages.get().getBundle().key( 429 Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_DECORATION_1, 430 wordDecObj.getContentDecoration( 431 m_config, 432 word, 433 m_cms.getRequestContext().getLocale().toString()))); 434 } 435 // decorate the current word 436 m_result.append(wordDecObj.getContentDecoration( 437 m_config, 438 word, 439 m_cms.getRequestContext().getLocale().toString())); 440 } 441 } 442 } else { 443 if (LOG.isDebugEnabled()) { 444 LOG.debug(Messages.get().getBundle().key( 445 Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_ORIGINALTEXT_1, 446 text)); 447 } 448 m_result.append(text); 449 } 450 m_decorate = true; 451 } 452 453 /** 454 * Checks if a word contains a given delimiter.<p> 455 * 456 * @param word the word to test 457 * @param delimiters array of delimiter strings 458 * @return true if the word contains the delimiter, false otherwiese 459 */ 460 private boolean hasDelimiter(String word, String[] delimiters) { 461 462 boolean delim = false; 463 for (int i = 0; i < delimiters.length; i++) { 464 if (word.indexOf(delimiters[i]) > -1) { 465 delim = true; 466 break; 467 } 468 } 469 return delim; 470 } 471 472 /** 473 * Checks if a word must be decoded.<p> 474 * 475 * The given word is compared to a negative list of words which must not be decoded.<p> 476 * 477 * @param word the word to test 478 * @param wordList the list of words which must not be decoded 479 * @param count the count in the list 480 * 481 * @return true if the word must be decoded, false otherweise 482 */ 483 private boolean mustDecode(String word, List<String> wordList, int count) { 484 485 boolean decode = true; 486 String nextWord = null; 487 488 if (count < (wordList.size() - 1)) { 489 nextWord = wordList.get(count + 1); 490 } 491 // test if the current word contains a "&" and the following with a ";" 492 // if so, we must not decode the word 493 if ((nextWord != null) && (word.indexOf("&") > -1) && nextWord.startsWith(";")) { 494 return false; 495 } else { 496 // now scheck if the word matches one of the non decoder tokens 497 for (int i = 0; i < NON_TRANSLATORS.length; i++) { 498 if (word.startsWith(NON_TRANSLATORS[i])) { 499 decode = false; 500 break; 501 } 502 } 503 } 504 return decode; 505 } 506 507 /** 508 * Checks if a word starts with a given delimiter.<p> 509 * 510 * @param word the word to test 511 * @param delimiters array of delimiter strings 512 * @return true if the word starts with the delimiter, false otherwiese 513 */ 514 private boolean startsWithDelimiter(String word, String[] delimiters) { 515 516 boolean delim = false; 517 for (int i = 0; i < delimiters.length; i++) { 518 if (word.startsWith(delimiters[i])) { 519 delim = true; 520 break; 521 } 522 } 523 return delim; 524 } 525 526}