001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.jsp.decorator;
029
030import org.opencms.file.CmsObject;
031import org.opencms.main.CmsException;
032import org.opencms.main.CmsLog;
033import org.opencms.util.CmsHtmlParser;
034import org.opencms.util.CmsStringUtil;
035
036import java.util.ArrayList;
037import java.util.List;
038
039import org.apache.commons.logging.Log;
040
041import org.htmlparser.Tag;
042import org.htmlparser.Text;
043import org.htmlparser.util.Translate;
044
045/**
046 * The CmsHtmlDecorator is the main object for processing the text decorations.<p>
047 *
048 * It uses the information of a <code>{@link CmsDecoratorConfiguration}</code> to process the
049 * text decorations.
050 *
051 * @since 6.1.3
052 */
053public class CmsHtmlDecorator extends CmsHtmlParser {
054
055    /** Delimiters for string seperation. */
056    private static final String[] DELIMITERS = {
057        " ",
058        ",",
059        ".",
060        ";",
061        ":",
062        "!",
063        "(",
064        ")",
065        "'",
066        "?",
067        "/",
068        "\u00A7",
069        "\"",
070        "&nbsp;",
071        "&quot;",
072        "\r\n",
073        "\n"};
074
075    /** Delimiters for second level string separation. */
076    private static final String[] DELIMITERS_SECOND_LEVEL = {
077        "-",
078        "@",
079        "/",
080        "&frasl;",
081        ".",
082        ",",
083        "(",
084        ")",
085        "{",
086        "}",
087        "[",
088        "]",
089        "\"",
090        "&quot;",
091        "!",
092        "?",
093        ";",
094        "&",
095        "&amp;",
096        "%",
097        "\u00A7",
098        "&sect;"};
099
100    /** Steps for forward lookup in workd list. */
101    private static final int FORWARD_LOOKUP = 10;
102
103    /** The log object for this class. */
104    private static final Log LOG = CmsLog.getLog(CmsHtmlDecorator.class);
105
106    /** Non translators, strings starting with those values must not be translated. */
107    private static final String[] NON_TRANSLATORS = {"&nbsp;", "&quot;"};
108
109    /** The decoration configuration.<p> */
110    I_CmsDecoratorConfiguration m_config;
111
112    /** Decoration bundle to be used by the decorator. */
113    CmsDecorationBundle m_decorations;
114
115    /** the CmsObject. */
116    private CmsObject m_cms;
117
118    /** decorate flag. */
119    private boolean m_decorate;
120
121    /**
122     * Constructor, creates a new, empty CmsHtmlDecorator.<p>
123     *
124     * @param cms the CmsObject
125     * @throws CmsException if something goes wrong
126     */
127    public CmsHtmlDecorator(CmsObject cms)
128    throws CmsException {
129
130        m_config = new CmsDecoratorConfiguration(cms);
131        m_decorations = m_config.getDecorations();
132        m_result = new StringBuffer(512);
133        m_echo = true;
134        m_decorate = true;
135
136    }
137
138    /**
139     * Constructor, creates a new CmsHtmlDecorator with a given configuration.<p>
140     *
141     * @param cms the CmsObject
142     * @param config the configuration to be used
143     *
144     */
145    public CmsHtmlDecorator(CmsObject cms, I_CmsDecoratorConfiguration config) {
146
147        m_config = config;
148        m_decorations = config.getDecorations();
149        m_result = new StringBuffer(512);
150        m_echo = true;
151        m_decorate = true;
152        m_cms = cms;
153    }
154
155    /**
156     * Splits a String into substrings along the provided delimiter list and returns
157     * the result as a List of Substrings.<p>
158     *
159     * @param source the String to split
160     * @param delimiters the delimiters to split at
161     * @param trim flag to indicate if leading and trailing whitespaces should be omitted
162     * @param includeDelimiters flag to indicate if the delimiters should be included as well
163     *
164     * @return the List of splitted Substrings
165     */
166    public static List<String> splitAsList(String source, String[] delimiters, boolean trim, boolean includeDelimiters) {
167
168        List<String> result = new ArrayList<String>();
169        String delimiter = "";
170        int i = 0;
171        int l = source.length();
172        int n = -1;
173        int max = Integer.MAX_VALUE;
174
175        // find the next delimiter
176        for (int j = 0; j < delimiters.length; j++) {
177            int delimPos = source.indexOf(delimiters[j]);
178            if (delimPos > -1) {
179                if (delimPos < max) {
180                    max = delimPos;
181                    n = delimPos;
182                    delimiter = delimiters[j];
183                }
184            }
185        }
186
187        while (n != -1) {
188            // zero - length items are not seen as tokens at start or end
189            if ((i < n) || ((i > 0) && (i < l))) {
190                result.add(trim ? source.substring(i, n).trim() : source.substring(i, n));
191                // add the delimiter to the list as well
192                if (includeDelimiters && ((n + delimiter.length()) <= l)) {
193                    result.add(source.substring(n, n + delimiter.length()));
194                }
195            } else {
196                // add the delimiter to the list as well
197                if (includeDelimiters && source.startsWith(delimiter)) {
198                    result.add(delimiter);
199                }
200            }
201            i = n + delimiter.length();
202
203            // find the next delimiter
204            max = Integer.MAX_VALUE;
205            n = -1;
206            for (int j = 0; j < delimiters.length; j++) {
207                int delimPos = source.indexOf(delimiters[j], i);
208                if (delimPos > -1) {
209                    if (delimPos < max) {
210                        max = delimPos;
211                        n = delimPos;
212                        delimiter = delimiters[j];
213                    }
214                }
215            }
216
217        }
218        // is there a non - empty String to cut from the tail?
219        if (n < 0) {
220            n = source.length();
221        }
222        if (i < n) {
223            result.add(trim ? source.substring(i).trim() : source.substring(i));
224        }
225        return result;
226    }
227
228    /**
229     * Processes a HTML string and adds text decorations according to the decoration configuration.<p>
230     *
231     * @param html a string holding the HTML code that should be added with text decorations
232     * @param encoding the encoding to be used
233     * @return a HTML string with the decorations added.
234     * @throws Exception if something goes wrong
235     */
236    public String doDecoration(String html, String encoding) throws Exception {
237
238        return process(html, encoding);
239    }
240
241    /**
242     * Resets the first occurance flags of all decoration objects.<p>
243     *
244     * This is nescessary if decoration objects should be used for processing more than once.     *
245     */
246    public void resetDecorationDefinitions() {
247
248        m_config.resetMarkedDecorations();
249    }
250
251    /**
252     * @see org.htmlparser.visitors.NodeVisitor#visitStringNode(org.htmlparser.Text)
253     */
254    @Override
255    public void visitStringNode(Text text) {
256
257        appendText(text.toPlainTextString(), DELIMITERS, true);
258    }
259
260    /**
261     * @see org.htmlparser.visitors.NodeVisitor#visitTag(org.htmlparser.Tag)
262     */
263    @Override
264    public void visitTag(Tag tag) {
265
266        super.visitTag(tag);
267        // get the tagname
268        String tagname = tag.getTagName();
269        // this is one of the tags that should not allow decoation
270        if (m_config.isExcluded(tagname)) {
271            m_decorate = false;
272        } else {
273            m_decorate = true;
274            // check if the tag has one of the exclusd attribute
275            if (m_config.isExcludedAttr(tag)) {
276                m_decorate = false;
277            }
278        }
279
280    }
281
282    /**
283     * Appends a text decoration to the output.<p>
284     *
285     * A lookup is made to find a text decoration for each word in the given text.
286     * If a text decoration is found, the word will be decorated and added to the output.
287     * If no text decoration is found, the word alone will be added to the output.
288     *
289     * @param text the text to add a text decoration for
290     * @param delimiters delimiters for text seperation
291     * @param recursive flag for recusrive search
292     */
293    private void appendText(String text, String[] delimiters, boolean recursive) {
294
295        if (LOG.isDebugEnabled()) {
296            LOG.debug(Messages.get().getBundle().key(Messages.LOG_HTML_DECORATOR_APPEND_TEXT_2, m_config, text));
297        }
298
299        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(text) && m_decorate) {
300
301            // split the input into single words
302            List<String> wordList = splitAsList(text, delimiters, false, true);
303            int wordCount = wordList.size();
304            for (int i = 0; i < wordCount; i++) {
305                String word = wordList.get(i);
306                boolean alreadyDecorated = false;
307                if (LOG.isDebugEnabled()) {
308                    LOG.debug(Messages.get().getBundle().key(
309                        Messages.LOG_HTML_DECORATOR_PROCESS_WORD_2,
310                        word,
311                        Boolean.valueOf(mustDecode(word, wordList, i))));
312                }
313
314                // test if the word must be decoded
315                if (mustDecode(word, wordList, i)) {
316                    word = Translate.decode(word);
317                    if (LOG.isDebugEnabled()) {
318                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_HTML_DECORATOR_DECODED_WORD_1, word));
319                    }
320                }
321
322                // test if the word is no delimiter
323                // try to get a decoration if it is not
324                CmsDecorationObject decObj = null;
325                CmsDecorationObject wordDecObj = null;
326                if (!hasDelimiter(word, delimiters)) {
327                    wordDecObj = (CmsDecorationObject)m_decorations.get(word);
328                }
329
330                if (LOG.isDebugEnabled()) {
331                    LOG.debug(Messages.get().getBundle().key(
332                        Messages.LOG_HTML_DECORATOR_DECORATION_FOUND_2,
333                        wordDecObj,
334                        word));
335                }
336
337                // if there is a decoration object for this word, we must do the decoration
338                // if not, we must test if the word itself consists of several parts divided by
339                // second level delimiters
340                //if ((decObj == null)) {
341                if (recursive
342                    && hasDelimiter(word, DELIMITERS_SECOND_LEVEL)
343                    && !startsWithDelimiter(word, DELIMITERS_SECOND_LEVEL)) {
344                    // add the following symbol if possible to allow the second level decoration
345                    // test to make a forward lookup as well
346                    String secondLevel = word;
347                    if (i < (wordCount - 1)) {
348                        String nextWord = wordList.get(i + 1);
349                        if (!nextWord.equals(" ")) {
350                            //don't allow HTML entities to be split in the middle during the recursion!
351                            String afterNextWord = "";
352                            if (i < (wordCount - 2)) {
353                                afterNextWord = wordList.get(i + 2);
354                            }
355                            if (nextWord.contains("&") && afterNextWord.equals(";")) {
356                                secondLevel = word + nextWord + ";";
357                                i += 2;
358                            } else {
359                                secondLevel = word + nextWord;
360                                i++;
361                            }
362                        }
363                    }
364                    // check if the result is modified by any second level decoration
365                    int sizeBefore = m_result.length();
366                    appendText(secondLevel, DELIMITERS_SECOND_LEVEL, false);
367                    if (sizeBefore != m_result.length()) {
368                        alreadyDecorated = true;
369                    }
370
371                } else {
372                    // make a forward lookup to the next elements of the word list to check
373                    // if the combination of word and delimiter can be found as a decoration key
374                    // an example would be "Dr." wich must be decorated with "Doctor"
375                    StringBuffer decKey = new StringBuffer();
376                    decKey.append(word);
377                    // calculate how much forward looking must be made
378                    int forwardLookup = wordList.size() - i - 1;
379                    if (forwardLookup > FORWARD_LOOKUP) {
380                        forwardLookup = FORWARD_LOOKUP;
381                    }
382                    if (i < (wordCount - forwardLookup)) {
383                        for (int j = 1; j <= forwardLookup; j++) {
384                            decKey.append(wordList.get(i + j));
385                            decObj = (CmsDecorationObject)m_decorations.get(decKey.toString());
386                            if (LOG.isDebugEnabled()) {
387                                LOG.debug(Messages.get().getBundle().key(
388                                    Messages.LOG_HTML_DECORATOR_DECORATION_FOUND_FWL_3,
389                                    decObj,
390                                    word,
391                                    Integer.valueOf(j)));
392                            }
393                            if (decObj != null) {
394                                if (LOG.isDebugEnabled()) {
395                                    LOG.debug(Messages.get().getBundle().key(
396                                        Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_DECORATION_1,
397                                        decObj.getContentDecoration(
398                                            m_config,
399                                            decKey.toString(),
400                                            m_cms.getRequestContext().getLocale().toString())));
401                                }
402                                // decorate the current word with the following delimiter
403                                m_result.append(decObj.getContentDecoration(
404                                    m_config,
405                                    decKey.toString(),
406                                    m_cms.getRequestContext().getLocale().toString()));
407                                // important, we must skip the next element of the list
408                                i += j;
409                                // reset the decObj
410                                alreadyDecorated = true;
411                                break;
412                            }
413                        }
414                    }
415                    if ((decObj == null) && (wordDecObj == null)) {
416                        if (LOG.isDebugEnabled()) {
417                            LOG.debug(Messages.get().getBundle().key(
418                                Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_WORD_1,
419                                word));
420                        }
421                        // no decoration was found, use the word alone
422                        m_result.append(word);
423                    }
424                }
425                //} else {
426                if ((wordDecObj != null) && !alreadyDecorated) {
427                    if (LOG.isDebugEnabled()) {
428                        LOG.debug(Messages.get().getBundle().key(
429                            Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_DECORATION_1,
430                            wordDecObj.getContentDecoration(
431                                m_config,
432                                word,
433                                m_cms.getRequestContext().getLocale().toString())));
434                    }
435                    // decorate the current word
436                    m_result.append(wordDecObj.getContentDecoration(
437                        m_config,
438                        word,
439                        m_cms.getRequestContext().getLocale().toString()));
440                }
441            }
442        } else {
443            if (LOG.isDebugEnabled()) {
444                LOG.debug(Messages.get().getBundle().key(
445                    Messages.LOG_HTML_DECORATOR_DECORATION_APPEND_ORIGINALTEXT_1,
446                    text));
447            }
448            m_result.append(text);
449        }
450        m_decorate = true;
451    }
452
453    /**
454     * Checks if a word contains a given delimiter.<p>
455     *
456     * @param word the word to test
457     * @param delimiters array of delimiter strings
458     * @return true if the word contains the delimiter, false otherwiese
459     */
460    private boolean hasDelimiter(String word, String[] delimiters) {
461
462        boolean delim = false;
463        for (int i = 0; i < delimiters.length; i++) {
464            if (word.indexOf(delimiters[i]) > -1) {
465                delim = true;
466                break;
467            }
468        }
469        return delim;
470    }
471
472    /**
473     * Checks if a word must be decoded.<p>
474     *
475     * The given word is compared to a negative list of words which must not be decoded.<p>
476     *
477     * @param word the word to test
478     * @param wordList the list of words which must not be decoded
479     * @param count the count in the list
480     *
481     * @return true if the word must be decoded, false otherweise
482     */
483    private boolean mustDecode(String word, List<String> wordList, int count) {
484
485        boolean decode = true;
486        String nextWord = null;
487
488        if (count < (wordList.size() - 1)) {
489            nextWord = wordList.get(count + 1);
490        }
491        // test if the current word contains a "&" and the following with a ";"
492        // if so, we must not decode the word
493        if ((nextWord != null) && (word.indexOf("&") > -1) && nextWord.startsWith(";")) {
494            return false;
495        } else {
496            // now scheck if the word matches one of the non decoder tokens
497            for (int i = 0; i < NON_TRANSLATORS.length; i++) {
498                if (word.startsWith(NON_TRANSLATORS[i])) {
499                    decode = false;
500                    break;
501                }
502            }
503        }
504        return decode;
505    }
506
507    /**
508     * Checks if a word starts with a given delimiter.<p>
509     *
510     * @param word the word to test
511     * @param delimiters array of delimiter strings
512     * @return true if the word starts with the delimiter, false otherwiese
513     */
514    private boolean startsWithDelimiter(String word, String[] delimiters) {
515
516        boolean delim = false;
517        for (int i = 0; i < delimiters.length; i++) {
518            if (word.startsWith(delimiters[i])) {
519                delim = true;
520                break;
521            }
522        }
523        return delim;
524    }
525
526}