001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.util; 029 030import org.opencms.i18n.CmsEncoder; 031import org.opencms.main.OpenCms; 032 033import java.io.IOException; 034import java.io.StringWriter; 035import java.io.Writer; 036 037import org.xml.sax.Attributes; 038import org.xml.sax.SAXException; 039import org.xml.sax.ext.LexicalHandler; 040import org.xml.sax.helpers.DefaultHandler; 041 042/** 043 * Simple SAX event handler that generates a XML (or HTML) file from the events caught.<p> 044 * 045 * This can be used for writing large XML files where keeping a DOM structure 046 * in memory might cause out-of-memory issues, like e.g. when writing the 047 * OpenCms export files.<p> 048 * 049 * It can also be used if a <code>{@link org.xml.sax.ContentHandler}</code> is needed that should 050 * generate a XML / HTML file from a series of SAX events.<p> 051 * 052 * @since 6.0.0 053 */ 054public class CmsXmlSaxWriter extends DefaultHandler implements LexicalHandler { 055 056 /** The indentation to use. */ 057 private static final String INDENT_STR = "\t"; 058 059 /** The file encoding to use. */ 060 private String m_encoding; 061 062 /** 063 * Indicates if characters that are not part of the selected encoding 064 * are to be replaced with the XML <code>&#123;</code> entity representation 065 * in the generated output (not in CDATA elements). 066 */ 067 private boolean m_escapeUnknownChars; 068 069 /** Indicates if XML entities are to be encoded in the generated output (not in CDATA elements). */ 070 private boolean m_escapeXml; 071 072 /** The indentation level. */ 073 private int m_indentLevel; 074 075 /** Indicates if a CDATA node is still open. */ 076 private boolean m_isCdata; 077 078 /** The last element name written to the output. */ 079 private String m_lastElementName; 080 081 /** Indicates if a CDATA node needs to be opened. */ 082 private boolean m_openCdata; 083 084 /** Indicates if an element tag is still open. */ 085 private boolean m_openElement; 086 087 /** The Writer to write the output to. */ 088 private Writer m_writer; 089 090 /** 091 * Creates a SAX event handler that generates XML / HTML Strings from the events caught 092 * using a new <code>{@link StringWriter}</code> and the OpenCms default encoding.<p> 093 */ 094 public CmsXmlSaxWriter() { 095 096 this(new StringWriter(), OpenCms.getSystemInfo().getDefaultEncoding()); 097 } 098 099 /** 100 * Creates a SAX event handler that generates XML / HTML Strings from the events caught 101 * using a new <code>{@link StringWriter}</code> and the given encoding.<p> 102 * 103 * @param encoding the encoding for the XML file 104 */ 105 public CmsXmlSaxWriter(String encoding) { 106 107 this(new StringWriter(), encoding); 108 } 109 110 /** 111 * Creates a SAX event handler that generates XML / HTML Strings from the events caught 112 * using a new <code>{@link StringWriter}</code> and the given encoding.<p> 113 * 114 * @param writer the Writer to write to output to 115 */ 116 public CmsXmlSaxWriter(Writer writer) { 117 118 this(writer, OpenCms.getSystemInfo().getDefaultEncoding()); 119 } 120 121 /** 122 * A SAX event handler that generates XML / HTML Strings from the events caught and writes them 123 * to the given Writer.<p> 124 * 125 * @param writer the Writer to write to output to 126 * @param encoding the encoding for the XML file 127 */ 128 public CmsXmlSaxWriter(Writer writer, String encoding) { 129 130 m_writer = writer; 131 m_encoding = encoding; 132 m_indentLevel = 0; 133 m_escapeXml = true; 134 m_escapeUnknownChars = false; 135 } 136 137 /** 138 * @see org.xml.sax.ContentHandler#characters(char[], int, int) 139 */ 140 @Override 141 public void characters(char[] buf, int offset, int len) throws SAXException { 142 143 if (len == 0) { 144 return; 145 } 146 if (m_openElement) { 147 write(">"); 148 m_openElement = false; 149 } 150 if (m_openCdata) { 151 write("<![CDATA["); 152 m_openCdata = false; 153 } 154 if (m_escapeXml && !m_isCdata) { 155 // XML should be escaped and we are not in a CDATA node 156 String escaped = new String(buf, offset, len); 157 // escape HTML entities ('<' becomes '<') 158 escaped = CmsEncoder.escapeXml(escaped, true); 159 if (m_escapeUnknownChars) { 160 // escape all chars that can not be displayed in the selected encoding (using '{' entities) 161 escaped = CmsEncoder.adjustHtmlEncoding(escaped, getEncoding()); 162 } 163 write(escaped); 164 } else { 165 // no escaping or in CDATA node 166 write(new String(buf, offset, len)); 167 } 168 } 169 170 /** 171 * @see org.xml.sax.ext.LexicalHandler#comment(char[], int, int) 172 */ 173 public void comment(char[] ch, int start, int length) { 174 175 // ignore 176 } 177 178 /** 179 * @see org.xml.sax.ext.LexicalHandler#endCDATA() 180 */ 181 public void endCDATA() throws SAXException { 182 183 if (!m_openCdata) { 184 write("]]>"); 185 } 186 m_openCdata = false; 187 m_isCdata = false; 188 } 189 190 /** 191 * @see org.xml.sax.ContentHandler#endDocument() 192 */ 193 @Override 194 public void endDocument() throws SAXException { 195 196 try { 197 if (m_openElement) { 198 write("/>"); 199 m_openElement = false; 200 } 201 writeNewLine(); 202 m_writer.flush(); 203 } catch (IOException e) { 204 throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e); 205 } 206 } 207 208 /** 209 * @see org.xml.sax.ext.LexicalHandler#endDTD() 210 */ 211 public void endDTD() { 212 213 // NOOP 214 } 215 216 /** 217 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String) 218 */ 219 @Override 220 public void endElement(String namespaceURI, String localName, String qualifiedName) throws SAXException { 221 222 String elementName = resolveName(localName, qualifiedName); 223 if (m_openElement) { 224 write("/>"); 225 } else { 226 if (!elementName.equals(m_lastElementName)) { 227 writeNewLine(); 228 } 229 write("</"); 230 write(elementName); 231 write(">"); 232 } 233 m_openElement = false; 234 m_indentLevel--; 235 } 236 237 /** 238 * @see org.xml.sax.ext.LexicalHandler#endEntity(java.lang.String) 239 */ 240 public void endEntity(String name) { 241 242 // NOOP 243 } 244 245 /** 246 * Returns the encoding this XML Sax writer was initialized with.<p> 247 * 248 * @return the encoding this XML Sax writer was initialized with 249 */ 250 public String getEncoding() { 251 252 return m_encoding; 253 } 254 255 /** 256 * Returns the Writer where the XML is written to.<p> 257 * 258 * @return the Writer where the XML is written to 259 */ 260 public Writer getWriter() { 261 262 return m_writer; 263 } 264 265 /** 266 * Returns <code>true</code> if charactes that are not part of the selected encoding 267 * are to be replaced with the HTML <code>&#123;</code> entity representation 268 * in the generated output (not in CDATA elements).<p> 269 * 270 * @return <code>true</code> if charactes that are not part of the selected encoding 271 * are to be replaced with the HTML entity representation 272 */ 273 public boolean isEscapeUnknownChars() { 274 275 return m_escapeUnknownChars; 276 } 277 278 /** 279 * Returns <code>true</code> if XML entities are to be encoded in the generated output (not in CDATA elements).<p> 280 * 281 * @return <code>true</code> if XML entities are to be encoded in the generated output (not in CDATA elements) 282 */ 283 public boolean isEscapeXml() { 284 285 return m_escapeXml; 286 } 287 288 /** 289 * Sets the encoding to use for the generated output.<p> 290 * 291 * @param value the encoding to use for the generated output 292 */ 293 public void setEncoding(String value) { 294 295 m_encoding = value; 296 } 297 298 /** 299 * If set to <code>true</code>, then charactes that are not part of the selected encoding 300 * are to be replaced with the XML <code>&#123;</code> entity representation 301 * in the generated output (not in CDATA elements).<p> 302 * 303 * @param value indicates to escape unknown characters with XML entities or not 304 */ 305 public void setEscapeUnknownChars(boolean value) { 306 307 m_escapeUnknownChars = value; 308 } 309 310 /** 311 * If set to <code>true</code>, then 312 * XML entities are to be encoded in the generated output (not in CDATA elements).<p> 313 * 314 * @param value indicates to to escape characters with XML entities or not 315 */ 316 public void setEscapeXml(boolean value) { 317 318 m_escapeXml = value; 319 } 320 321 /** 322 * @see org.xml.sax.ext.LexicalHandler#startCDATA() 323 */ 324 public void startCDATA() { 325 326 m_openCdata = true; 327 m_isCdata = true; 328 } 329 330 /** 331 * @see org.xml.sax.ContentHandler#startDocument() 332 */ 333 @Override 334 public void startDocument() throws SAXException { 335 336 write("<?xml version=\"1.0\" encoding=\""); 337 write(m_encoding); 338 write("\"?>"); 339 writeNewLine(); 340 } 341 342 /** 343 * @see org.xml.sax.ext.LexicalHandler#startDTD(java.lang.String, java.lang.String, java.lang.String) 344 */ 345 public void startDTD(String name, String publicId, String systemId) throws SAXException { 346 347 write("<!DOCTYPE "); 348 write(name); 349 if (publicId != null) { 350 write(" PUBLIC \""); 351 write(publicId); 352 write("\""); 353 } 354 if (systemId != null) { 355 write(" SYSTEM \""); 356 write(systemId); 357 write("\""); 358 } 359 write(">"); 360 writeNewLine(); 361 } 362 363 /** 364 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) 365 */ 366 @Override 367 public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes) 368 throws SAXException { 369 370 if (m_openElement) { 371 write(">"); 372 m_openElement = false; 373 } 374 // increase indent and write linebreak 375 m_indentLevel++; 376 writeNewLine(); 377 // get element name and write entry 378 m_lastElementName = resolveName(localName, qualifiedName); 379 write("<"); 380 write(m_lastElementName); 381 if (attributes != null) { 382 for (int i = 0; i < attributes.getLength(); i++) { 383 write(" "); 384 write(resolveName(attributes.getLocalName(i), attributes.getQName(i))); 385 write("=\""); 386 String value = attributes.getValue(i); 387 if (m_escapeXml) { 388 // XML should be escaped 389 // escape HTML entities ('<' becomes '<') 390 value = CmsEncoder.escapeXml(value, true); 391 if (m_escapeUnknownChars) { 392 // escape all chars that can not be displayed in the selected encoding (using '{' entities) 393 value = CmsEncoder.adjustHtmlEncoding(value, getEncoding()); 394 } 395 } 396 write(value); 397 write("\""); 398 } 399 } 400 m_openElement = true; 401 } 402 403 /** 404 * @see org.xml.sax.ext.LexicalHandler#startEntity(java.lang.String) 405 */ 406 public void startEntity(String name) { 407 408 // ignore 409 } 410 411 /** 412 * Resolves the local vs. the qualified name.<p> 413 * 414 * If the local name is the empty String "", the qualified name is used.<p> 415 * 416 * @param localName the local name 417 * @param qualifiedName the qualified XML 1.0 name 418 * @return the resolved name to use 419 */ 420 private String resolveName(String localName, String qualifiedName) { 421 422 if ((localName == null) || (localName.length() == 0)) { 423 return qualifiedName; 424 } else { 425 return localName; 426 } 427 } 428 429 /** 430 * Writes s String to the output stream.<p> 431 * 432 * @param s the String to write 433 * @throws SAXException in case of I/O errors 434 */ 435 private void write(String s) throws SAXException { 436 437 try { 438 m_writer.write(s); 439 } catch (IOException e) { 440 throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e); 441 } 442 } 443 444 /** 445 * Writes a linebreak to the output stream, also handles the indentation.<p> 446 * 447 * @throws SAXException in case of I/O errors 448 */ 449 private void writeNewLine() throws SAXException { 450 451 try { 452 // write new line 453 m_writer.write("\r\n"); 454 // write indentation 455 for (int i = 1; i < m_indentLevel; i++) { 456 m_writer.write(INDENT_STR); 457 } 458 // flush the stream 459 m_writer.flush(); 460 } catch (IOException e) { 461 throw new SAXException(Messages.get().getBundle().key(Messages.ERR_IOERROR_0), e); 462 } 463 } 464}