001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import java.util.List;
031
032import org.htmlparser.Remark;
033import org.htmlparser.Tag;
034import org.htmlparser.Text;
035import org.htmlparser.util.ParserException;
036
037/**
038 *
039 * Interface for a combination of a visitor of HTML documents along with the hook to start the
040 * parser / lexer that triggers the visit.
041 * <p>
042 *
043 *
044 *
045 * @since 6.1.3
046 *
047 */
048public interface I_CmsHtmlNodeVisitor {
049
050    /**
051     * Returns the configuartion String of this visitor or the empty String if was not provided
052     * before.
053     * <p>
054     *
055     * @return the configuartion String of this visitor - by this contract never null but an empty
056     *         String if not provided.
057     *
058     * @see #setConfiguration(String)
059     */
060    String getConfiguration();
061
062    /**
063     * Returns the text extraction result.
064     * <p>
065     *
066     * @return the text extraction result
067     */
068    String getResult();
069
070    /**
071     * Extracts the text from the given html content, assuming the given html encoding.
072     * <p>
073     *
074     * @param html the content to extract the plain text from
075     * @param encoding the encoding to use
076     *
077     * @return the text extracted from the given html content
078     *
079     * @throws ParserException if something goes wrong
080     */
081    String process(String html, String encoding) throws ParserException;
082
083    /**
084     * Set a configuartion String for this visitor.
085     * <p>
086     *
087     * This will most likely be done with data from an xsd, custom jsp tag, ...
088     * <p>
089     *
090     * @param configuration the configuration of this visitor to set.
091     */
092    void setConfiguration(String configuration);
093
094    /**
095     * Sets a list of upper case tag names for which parsing / visitng should not correct missing closing tags.<p>
096     *
097     * This has to be used before <code>{@link #process(String, String)}</code> is invoked to take an effect.<p>
098     *
099     * @param noAutoCloseTags a list of upper case tag names for which parsing / visiting
100     *      should not correct missing closing tags to set.
101     */
102    void setNoAutoCloseTags(List<String> noAutoCloseTags);
103
104    /**
105     * Visitor method (callback) invoked when a closing Tag is encountered.
106     * <p>
107     *
108     * @param tag the tag that is ended.
109     *
110     * @see org.htmlparser.visitors.NodeVisitor#visitEndTag(org.htmlparser.Tag)
111     */
112    void visitEndTag(Tag tag);
113
114    /**
115     * Visitor method (callback) invoked when a remark Tag (HTML comment) is encountered.
116     * <p>
117     *
118     * @param remark the remark Tag to visit.
119     *
120     * @see org.htmlparser.visitors.NodeVisitor#visitRemarkNode(org.htmlparser.Remark)
121     */
122    void visitRemarkNode(Remark remark);
123
124    /**
125     *
126     * Visitor method (callback) invoked when a remark Tag (HTML comment) is encountered.
127     * <p>
128     *
129     * @param text the text that is visited.
130     *
131     * @see org.htmlparser.visitors.NodeVisitor#visitStringNode(org.htmlparser.Text)
132     */
133    void visitStringNode(Text text);
134
135    /**
136     * Visitor method (callback) invoked when a starting Tag (HTML comment) is encountered.
137     * <p>
138     *
139     * @param tag the tag that is visited.
140     *
141     * @see org.htmlparser.visitors.NodeVisitor#visitTag(org.htmlparser.Tag)
142     */
143    void visitTag(Tag tag);
144
145}