001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.util;
029
030import org.opencms.i18n.CmsEncoder;
031import org.opencms.main.OpenCms;
032
033import java.io.IOException;
034import java.io.StringWriter;
035import java.io.Writer;
036
037import org.xml.sax.Attributes;
038import org.xml.sax.SAXException;
039import org.xml.sax.ext.LexicalHandler;
040import org.xml.sax.helpers.DefaultHandler;
041
042/**
043 * Simple SAX event handler that generates a XML (or HTML) file from the events caught.<p>
044 *
045 * This can be used for writing large XML files where keeping a DOM structure
046 * in memory might cause out-of-memory issues, like e.g. when writing the
047 * OpenCms export files.<p>
048 *
049 * It can also be used if a <code>{@link org.xml.sax.ContentHandler}</code> is needed that should
050 * generate a XML / HTML file from a series of SAX events.<p>
051 *
052 * @since 6.0.0
053 */
054public class CmsXmlSaxWriter extends DefaultHandler implements LexicalHandler {
055
056    /** The indentation to use. */
057    private static final String INDENT_STR = "\t";
058
059    /** The file encoding to use. */
060    private String m_encoding;
061
062    /**
063     * Indicates if characters that are not part of the selected encoding
064     * are to be replaced with the XML <code>&amp;#123;</code> entity representation
065     * in the generated output (not in CDATA elements).
066     */
067    private boolean m_escapeUnknownChars;
068
069    /** Indicates if XML entities are to be encoded in the generated output (not in CDATA elements). */
070    private boolean m_escapeXml;
071
072    /** The indentation level. */
073    private int m_indentLevel;
074
075    /** Indicates if a CDATA node is still open. */
076    private boolean m_isCdata;
077
078    /** The last element name written to the output. */
079    private String m_lastElementName;
080
081    /** Indicates if a CDATA node needs to be opened. */
082    private boolean m_openCdata;
083
084    /** Indicates if an element tag is still open. */
085    private boolean m_openElement;
086
087    /** The Writer to write the output to. */
088    private Writer m_writer;
089
090    /**
091     * Creates a SAX event handler that generates XML / HTML Strings from the events caught
092     * using a new <code>{@link StringWriter}</code> and the OpenCms default encoding.<p>
093     */
094    public CmsXmlSaxWriter() {
095
096        this(new StringWriter(), OpenCms.getSystemInfo().getDefaultEncoding());
097    }
098
099    /**
100     * Creates a SAX event handler that generates XML / HTML Strings from the events caught
101     * using a new <code>{@link StringWriter}</code> and the given encoding.<p>
102     *
103     * @param encoding the encoding for the XML file
104     */
105    public CmsXmlSaxWriter(String encoding) {
106
107        this(new StringWriter(), encoding);
108    }
109
110    /**
111     * Creates a SAX event handler that generates XML / HTML Strings from the events caught
112     * using a new <code>{@link StringWriter}</code> and the given encoding.<p>
113     *
114     * @param writer the Writer to write to output to
115     */
116    public CmsXmlSaxWriter(Writer writer) {
117
118        this(writer, OpenCms.getSystemInfo().getDefaultEncoding());
119    }
120
121    /**
122     * A SAX event handler that generates XML / HTML Strings from the events caught and writes them
123     * to the given Writer.<p>
124     *
125     * @param writer the Writer to write to output to
126     * @param encoding the encoding for the XML file
127     */
128    public CmsXmlSaxWriter(Writer writer, String encoding) {
129
130        m_writer = writer;
131        m_encoding = encoding;
132        m_indentLevel = 0;
133        m_escapeXml = true;
134        m_escapeUnknownChars = false;
135    }
136
137    /**
138     * @see org.xml.sax.ContentHandler#characters(char[], int, int)
139     */
140    @Override
141    public void characters(char[] buf, int offset, int len) throws SAXException {
142
143        if (len == 0) {
144            return;
145        }
146        if (m_openElement) {
147            write(">");
148            m_openElement = false;
149        }
150        if (m_openCdata) {
151            write("<![CDATA[");
152            m_openCdata = false;
153        }
154        if (m_escapeXml && !m_isCdata) {
155            // XML should be escaped and we are not in a CDATA node
156            String escaped = new String(buf, offset, len);
157            // escape HTML entities ('<' becomes '&lt;')
158            escaped = CmsEncoder.escapeXml(escaped, true);
159            if (m_escapeUnknownChars) {
160                // escape all chars that can not be displayed in the selected encoding (using '&#123;' entities)
161                escaped = CmsEncoder.adjustHtmlEncoding(escaped, getEncoding());
162            }
163            write(escaped);
164        } else {
165            // no escaping or in CDATA node
166            write(new String(buf, offset, len));
167        }
168    }
169
170    /**
171     * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int)
172     */
173    public void comment(char[] ch, int start, int length) {
174
175        // ignore
176    }
177
178    /**
179     * @see org.xml.sax.ext.LexicalHandler#endCDATA()
180     */
181    public void endCDATA() throws SAXException {
182
183        if (!m_openCdata) {
184            write("]]>");
185        }
186        m_openCdata = false;
187        m_isCdata = false;
188    }
189
190    /**
191     * @see org.xml.sax.ContentHandler#endDocument()
192     */
193    @Override
194    public void endDocument() throws SAXException {
195
196        try {
197            if (m_openElement) {
198                write("/>");
199                m_openElement = false;
200            }
201            writeNewLine();
202            m_writer.flush();
203        } catch (IOException e) {
204            throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e);
205        }
206    }
207
208    /**
209     * @see org.xml.sax.ext.LexicalHandler#endDTD()
210     */
211    public void endDTD() {
212
213        // NOOP
214    }
215
216    /**
217     * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
218     */
219    @Override
220    public void endElement(String namespaceURI, String localName, String qualifiedName) throws SAXException {
221
222        String elementName = resolveName(localName, qualifiedName);
223        if (m_openElement) {
224            write("/>");
225        } else {
226            if (!elementName.equals(m_lastElementName)) {
227                writeNewLine();
228            }
229            write("</");
230            write(elementName);
231            write(">");
232        }
233        m_openElement = false;
234        m_indentLevel--;
235    }
236
237    /**
238     * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String)
239     */
240    public void endEntity(String name) {
241
242        // NOOP
243    }
244
245    /**
246     * Returns the encoding this XML Sax writer was initialized with.<p>
247     *
248     * @return the encoding this XML Sax writer was initialized with
249     */
250    public String getEncoding() {
251
252        return m_encoding;
253    }
254
255    /**
256     * Returns the Writer where the XML is written to.<p>
257     *
258     * @return the Writer where the XML is written to
259     */
260    public Writer getWriter() {
261
262        return m_writer;
263    }
264
265    /**
266     * Returns <code>true</code> if charactes that are not part of the selected encoding
267     * are to be replaced with the HTML <code>&amp;#123;</code> entity representation
268     * in the generated output (not in CDATA elements).<p>
269     *
270     * @return <code>true</code> if charactes that are not part of the selected encoding
271     *      are to be replaced with the HTML entity representation
272     */
273    public boolean isEscapeUnknownChars() {
274
275        return m_escapeUnknownChars;
276    }
277
278    /**
279     * Returns <code>true</code> if XML entities are to be encoded in the generated output (not in CDATA elements).<p>
280     *
281     * @return <code>true</code> if XML entities are to be encoded in the generated output (not in CDATA elements)
282     */
283    public boolean isEscapeXml() {
284
285        return m_escapeXml;
286    }
287
288    /**
289     * Sets the encoding to use for the generated output.<p>
290     *
291     * @param value the encoding to use for the generated output
292     */
293    public void setEncoding(String value) {
294
295        m_encoding = value;
296    }
297
298    /**
299     * If set to <code>true</code>, then charactes that are not part of the selected encoding
300     * are to be replaced with the XML <code>&amp;#123;</code> entity representation
301     * in the generated output (not in CDATA elements).<p>
302     *
303     * @param value indicates to escape unknown characters with XML entities or not
304     */
305    public void setEscapeUnknownChars(boolean value) {
306
307        m_escapeUnknownChars = value;
308    }
309
310    /**
311     * If set to <code>true</code>, then
312     * XML entities are to be encoded in the generated output (not in CDATA elements).<p>
313     *
314     * @param value indicates to to escape characters with XML entities or not
315     */
316    public void setEscapeXml(boolean value) {
317
318        m_escapeXml = value;
319    }
320
321    /**
322     * @see org.xml.sax.ext.LexicalHandler#startCDATA()
323     */
324    public void startCDATA() {
325
326        m_openCdata = true;
327        m_isCdata = true;
328    }
329
330    /**
331     * @see org.xml.sax.ContentHandler#startDocument()
332     */
333    @Override
334    public void startDocument() throws SAXException {
335
336        write("<?xml version=\"1.0\" encoding=\"");
337        write(m_encoding);
338        write("\"?>");
339        writeNewLine();
340    }
341
342    /**
343     * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String)
344     */
345    public void startDTD(String name, String publicId, String systemId) throws SAXException {
346
347        write("<!DOCTYPE ");
348        write(name);
349        if (publicId != null) {
350            write(" PUBLIC \"");
351            write(publicId);
352            write("\"");
353        }
354        if (systemId != null) {
355            write(" SYSTEM \"");
356            write(systemId);
357            write("\"");
358        }
359        write(">");
360        writeNewLine();
361    }
362
363    /**
364     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
365     */
366    @Override
367    public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes)
368    throws SAXException {
369
370        if (m_openElement) {
371            write(">");
372            m_openElement = false;
373        }
374        // increase indent and write linebreak
375        m_indentLevel++;
376        writeNewLine();
377        // get element name and write entry
378        m_lastElementName = resolveName(localName, qualifiedName);
379        write("<");
380        write(m_lastElementName);
381        if (attributes != null) {
382            for (int i = 0; i < attributes.getLength(); i++) {
383                write(" ");
384                write(resolveName(attributes.getLocalName(i), attributes.getQName(i)));
385                write("=\"");
386                String value = attributes.getValue(i);
387                if (m_escapeXml) {
388                    // XML should be escaped
389                    // escape HTML entities ('<' becomes '&lt;')
390                    value = CmsEncoder.escapeXml(value, true);
391                    if (m_escapeUnknownChars) {
392                        // escape all chars that can not be displayed in the selected encoding (using '&#123;' entities)
393                        value = CmsEncoder.adjustHtmlEncoding(value, getEncoding());
394                    }
395                }
396                write(value);
397                write("\"");
398            }
399        }
400        m_openElement = true;
401    }
402
403    /**
404     * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String)
405     */
406    public void startEntity(String name) {
407
408        // ignore
409    }
410
411    /**
412     * Resolves the local vs. the qualified name.<p>
413     *
414     * If the local name is the empty String "", the qualified name is used.<p>
415     *
416     * @param localName the local name
417     * @param qualifiedName the qualified XML 1.0 name
418     * @return the resolved name to use
419     */
420    private String resolveName(String localName, String qualifiedName) {
421
422        if ((localName == null) || (localName.length() == 0)) {
423            return qualifiedName;
424        } else {
425            return localName;
426        }
427    }
428
429    /**
430     * Writes s String to the output stream.<p>
431     *
432     * @param s the String to write
433     * @throws SAXException in case of I/O errors
434     */
435    private void write(String s) throws SAXException {
436
437        try {
438            m_writer.write(s);
439        } catch (IOException e) {
440            throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e);
441        }
442    }
443
444    /**
445     * Writes a linebreak to the output stream, also handles the indentation.<p>
446     *
447     * @throws SAXException in case of I/O errors
448     */
449    private void writeNewLine() throws SAXException {
450
451        try {
452            // write new line
453            m_writer.write("\r\n");
454            // write indentation
455            for (int i = 1; i < m_indentLevel; i++) {
456                m_writer.write(INDENT_STR);
457            }
458            // flush the stream
459            m_writer.flush();
460        } catch (IOException e) {
461            throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e);
462        }
463    }
464}