001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml.types;
029
030import org.opencms.file.CmsObject;
031import org.opencms.i18n.CmsEncoder;
032import org.opencms.main.CmsLog;
033import org.opencms.main.CmsRuntimeException;
034import org.opencms.relations.CmsLink;
035import org.opencms.relations.CmsLinkUpdateUtil;
036import org.opencms.staticexport.CmsLinkProcessor;
037import org.opencms.staticexport.CmsLinkTable;
038import org.opencms.util.CmsHtmlConverter;
039import org.opencms.util.CmsHtmlExtractor;
040import org.opencms.util.CmsStringUtil;
041import org.opencms.xml.CmsXmlGenericWrapper;
042import org.opencms.xml.I_CmsXmlDocument;
043import org.opencms.xml.page.CmsXmlPage;
044import org.opencms.xml.xml2json.I_CmsJsonFormattableValue;
045
046import java.util.Iterator;
047import java.util.Locale;
048
049import org.apache.commons.logging.Log;
050
051import org.dom4j.Attribute;
052import org.dom4j.Element;
053import org.htmlparser.util.ParserException;
054
055/**
056 * Describes the XML content type "OpenCmsHtml".<p>
057 *
058 * @since 6.0.0
059 */
060public class CmsXmlHtmlValue extends A_CmsXmlContentValue implements I_CmsJsonFormattableValue {
061
062    /** The name of this type as used in the XML schema. */
063    public static final String TYPE_NAME = "OpenCmsHtml";
064
065    /** The log object for this class. */
066    private static final Log LOG = CmsLog.getLog(CmsXmlHtmlValue.class);
067
068    /** The schema definition String is located in a text for easier editing. */
069    private static String m_schemaDefinition;
070
071    /** Null value for plain text extraction errors. */
072    private static final String NULL_VALUE = "null";
073
074    /** Base type for single type instances, required for XML pages. */
075    private static final I_CmsXmlSchemaType TYPE_BASE = new CmsXmlHtmlValue("base", "1", "1");
076
077    /** The plain text value of the element node. */
078    private String m_plainTextValue;
079
080    /** The String value of the element node. */
081    private String m_stringValue;
082
083    /**
084     * Creates a new, empty schema type descriptor of type "OpenCmsHtml".<p>
085     */
086    public CmsXmlHtmlValue() {
087
088        // empty constructor is required for class registration
089    }
090
091    /**
092     * Creates a new XML content value of type "OpenCmsHtml".<p>
093     *
094     * @param document the XML content instance this value belongs to
095     * @param element the XML element that contains this value
096     * @param locale the locale this value is created for
097     */
098    public CmsXmlHtmlValue(I_CmsXmlDocument document, Element element, Locale locale) {
099
100        super(document, element, locale, TYPE_BASE);
101    }
102
103    /**
104     * Creates a new XML content value of type "OpenCmsHtml".<p>
105     *
106     * @param document the XML content instance this value belongs to
107     * @param element the XML element that contains this value
108     * @param locale the locale this value is created for
109     * @param type the type instance to create the value for
110     */
111    public CmsXmlHtmlValue(I_CmsXmlDocument document, Element element, Locale locale, I_CmsXmlSchemaType type) {
112
113        super(document, element, locale, type);
114    }
115
116    /**
117     * Creates a new schema type descriptor for the type "OpenCmsHtml".<p>
118     *
119     * @param name the name of the XML node containing the value according to the XML schema
120     * @param minOccurs minimum number of occurrences of this type according to the XML schema
121     * @param maxOccurs maximum number of occurrences of this type according to the XML schema
122     */
123    public CmsXmlHtmlValue(String name, String minOccurs, String maxOccurs) {
124
125        super(name, minOccurs, maxOccurs);
126    }
127
128    /**
129     * @see org.opencms.xml.types.A_CmsXmlContentValue#createValue(I_CmsXmlDocument, org.dom4j.Element, Locale)
130     */
131    public I_CmsXmlContentValue createValue(I_CmsXmlDocument document, Element element, Locale locale) {
132
133        return new CmsXmlHtmlValue(document, element, locale, this);
134    }
135
136    /**
137     * @see org.opencms.xml.types.I_CmsXmlSchemaType#generateXml(org.opencms.file.CmsObject, org.opencms.xml.I_CmsXmlDocument, org.dom4j.Element, java.util.Locale)
138     */
139    @Override
140    public Element generateXml(CmsObject cms, I_CmsXmlDocument document, Element root, Locale locale) {
141
142        Element element = root.addElement(getName());
143        int index = element.getParent().elements(element.getQName()).indexOf(element);
144        element.addAttribute(CmsXmlPage.ATTRIBUTE_NAME, getName() + index);
145        element.addElement(CmsXmlPage.NODE_LINKS);
146        element.addElement(CmsXmlPage.NODE_CONTENT);
147
148        // get the default value from the content handler
149        String defaultValue = document.getHandler().getDefault(cms, this, locale);
150        if (defaultValue != null) {
151            try {
152                I_CmsXmlContentValue value = createValue(document, element, locale);
153                value.setStringValue(cms, defaultValue);
154            } catch (CmsRuntimeException e) {
155                // should not happen if default value is correct
156                LOG.error(
157                    Messages.get().getBundle().key(Messages.ERR_XMLCONTENT_INVALID_ELEM_DEFAULT_1, defaultValue),
158                    e);
159                element.clearContent();
160            }
161        }
162        return element;
163    }
164
165    /**
166     * Returns the link table of this XML page element.<p>
167     *
168     * @return the link table of this XML page element
169     */
170    public CmsLinkTable getLinkTable() {
171
172        CmsLinkTable linkTable = new CmsLinkTable();
173        Element links = m_element.element(CmsXmlPage.NODE_LINKS);
174        if (links != null) {
175            Iterator<Element> itLinks = CmsXmlGenericWrapper.elementIterator(links, CmsXmlPage.NODE_LINK);
176            while (itLinks.hasNext()) {
177                Element lelem = itLinks.next();
178                linkTable.addLink(new CmsLink(lelem));
179            }
180        }
181        return linkTable;
182    }
183
184    /**
185     * @see org.opencms.xml.types.I_CmsXmlContentValue#getPlainText(org.opencms.file.CmsObject)
186     */
187    @Override
188    public String getPlainText(CmsObject cms) {
189
190        if (m_plainTextValue == null) {
191            try {
192                m_plainTextValue = CmsHtmlExtractor.extractText(getStringValue(cms), m_document.getEncoding());
193            } catch (Exception exc) {
194                m_plainTextValue = NULL_VALUE;
195            }
196        }
197        if (m_plainTextValue == NULL_VALUE) {
198            return null;
199        }
200        return m_plainTextValue;
201    }
202
203    /**
204     * @see org.opencms.xml.types.I_CmsXmlSchemaType#getSchemaDefinition()
205     */
206    public String getSchemaDefinition() {
207
208        // the schema definition is located in a separate file for easier editing
209        if (m_schemaDefinition == null) {
210            m_schemaDefinition = readSchemaDefinition("org/opencms/xml/types/XmlHtmlValue.xsd");
211        }
212        return m_schemaDefinition;
213    }
214
215    /**
216     * @see org.opencms.xml.types.I_CmsXmlContentValue#getStringValue(org.opencms.file.CmsObject)
217     */
218    public String getStringValue(CmsObject cms) {
219
220        if (m_stringValue == null) {
221            m_stringValue = createStringValue(cms, m_document);
222        }
223
224        return m_stringValue;
225    }
226
227    /**
228     * @see org.opencms.xml.types.A_CmsXmlContentValue#getTypeName()
229     */
230    public String getTypeName() {
231
232        return TYPE_NAME;
233    }
234
235    /**
236     * @see org.opencms.xml.types.A_CmsXmlContentValue#newInstance(java.lang.String, java.lang.String, java.lang.String)
237     */
238    public I_CmsXmlSchemaType newInstance(String name, String minOccurs, String maxOccurs) {
239
240        return new CmsXmlHtmlValue(name, minOccurs, maxOccurs);
241    }
242
243    /**
244     * @see org.opencms.xml.types.I_CmsXmlContentValue#setStringValue(org.opencms.file.CmsObject, java.lang.String)
245     */
246    public void setStringValue(CmsObject cms, String value) {
247
248        Element content = m_element.element(CmsXmlPage.NODE_CONTENT);
249        Element links = m_element.element(CmsXmlPage.NODE_LINKS);
250        CmsLinkProcessor linkProcessor = null;
251
252        String encoding = m_document.getEncoding();
253        linkProcessor = m_document.getLinkProcessor(cms, new CmsLinkTable());
254
255        String finalValue = value;
256        if (finalValue != null) {
257            // nested CDATA tags are not allowed, so replace CDATA tags with their contents
258            finalValue = finalValue.replaceAll("(?s)// <!\\[CDATA\\[(.*?)// \\]\\]>", "$1"); // special case for embedded Javascript
259            finalValue = finalValue.replaceAll("(?s)<!\\[CDATA\\[(.*?)\\]\\]>", "$1");
260        }
261        if (encoding != null) {
262            // ensure all chars in the given content are valid chars for the selected charset
263            finalValue = CmsEncoder.adjustHtmlEncoding(finalValue, encoding);
264        }
265
266        // remove unnecessary tags if required
267        String contentConversion = m_document.getConversion();
268        if (CmsHtmlConverter.isConversionEnabled(contentConversion)) {
269            CmsHtmlConverter converter = new CmsHtmlConverter(encoding, contentConversion);
270            finalValue = converter.convertToStringSilent(finalValue);
271            finalValue = fixNullCharacters(finalValue);
272        }
273        if (linkProcessor != null) {
274            try {
275                // replace links in HTML by macros and fill link table
276                finalValue = linkProcessor.replaceLinks(finalValue);
277            } catch (Exception exc) {
278                throw new CmsRuntimeException(Messages.get().container(Messages.ERR_HTML_DATA_PROCESSING_0), exc);
279            }
280        }
281
282        content.clearContent();
283        links.clearContent();
284
285        if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(finalValue)) {
286            content.addCDATA(finalValue);
287            if (linkProcessor != null) {
288                // may be null in case of default value generation (i.e. setStringValue(String) was called)
289
290                CmsLinkTable linkTable = linkProcessor.getLinkTable();
291                for (Iterator<CmsLink> i = linkTable.iterator(); i.hasNext();) {
292                    CmsLink link = i.next();
293                    CmsLinkUpdateUtil.updateXmlForHtmlValue(
294                        link,
295                        link.getName(),
296                        links.addElement(CmsXmlPage.NODE_LINK));
297                }
298            }
299        }
300
301        // ensure the String value is re-calculated next time
302        m_stringValue = null;
303    }
304
305    /**
306     * @see org.opencms.xml.xml2json.I_CmsJsonFormattableValue#toJson(org.opencms.file.CmsObject)
307     */
308    public Object toJson(CmsObject cms) {
309
310        return getStringValue(cms);
311    }
312
313    /**
314     * JTidy sometimes erroneouslsy produces HTML containing 'null' characters (Unicode code point 0), which are
315     * invalid in an XML document. Until we find a way to prevent JTidy doing that, we remove the null characters
316     * from the HTML, and log a warning.<p>
317     *
318     * @param jtidyOutput the JTidy output
319     * @return the output with null characters removed
320     */
321    protected String fixNullCharacters(String jtidyOutput) {
322
323        String outputWithoutNullChars = jtidyOutput.replaceAll("\u0000", "");
324        if (jtidyOutput.length() != outputWithoutNullChars.length()) {
325            String context = "";
326            if (m_document.getFile() != null) {
327                context = "(file=" + m_document.getFile().getRootPath() + ")";
328            }
329            LOG.warn("HTML cleanup produced invalid null characters in output. " + context);
330            LOG.debug("HTML cleanup output = " + jtidyOutput);
331        }
332        return outputWithoutNullChars;
333    }
334
335    /**
336     * Creates the String value for this HTML value element.<p>
337     *
338     * @param cms an initialized instance of a CmsObject
339     * @param document the XML document this value belongs to
340     *
341     * @return the String value for this HTML value element
342     */
343    private String createStringValue(CmsObject cms, I_CmsXmlDocument document) {
344
345        Element data = m_element.element(CmsXmlPage.NODE_CONTENT);
346        if (data == null) {
347            String content = m_element.getText();
348            m_element.clearContent();
349            int index = m_element.getParent().elements(m_element.getQName()).indexOf(m_element);
350            m_element.addAttribute(CmsXmlPage.ATTRIBUTE_NAME, getName() + index);
351            m_element.addElement(CmsXmlPage.NODE_LINKS);
352            m_element.addElement(CmsXmlPage.NODE_CONTENT).addCDATA(content);
353            data = m_element.element(CmsXmlPage.NODE_CONTENT);
354        }
355        Attribute enabled = m_element.attribute(CmsXmlPage.ATTRIBUTE_ENABLED);
356
357        String content = "";
358        if ((enabled == null) || Boolean.valueOf(enabled.getText()).booleanValue()) {
359
360            content = data.getText();
361
362            CmsLinkTable linkTable = getLinkTable();
363            if (!linkTable.isEmpty()) {
364
365                // link processing: replace macros with links
366                CmsLinkProcessor linkProcessor = document.getLinkProcessor(cms, linkTable);
367                try {
368                    content = linkProcessor.processLinks(content);
369                } catch (ParserException e) {
370                    // should better not happen
371                    LOG.error(Messages.get().getBundle().key(Messages.ERR_XMLCONTENT_LINK_PROCESS_FAILED_0), e);
372                }
373            }
374        }
375        return content;
376    }
377}