001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.file.CmsResource; 031import org.opencms.main.CmsLog; 032import org.opencms.util.CmsStringUtil; 033import org.opencms.xml.content.CmsXmlContent; 034 035import java.io.ByteArrayInputStream; 036import java.io.ByteArrayOutputStream; 037import java.io.IOException; 038import java.io.InputStream; 039import java.io.OutputStream; 040import java.io.StringReader; 041import java.io.StringWriter; 042import java.io.UnsupportedEncodingException; 043import java.util.Arrays; 044import java.util.List; 045import java.util.stream.Collectors; 046 047import javax.xml.parsers.SAXParserFactory; 048 049import org.apache.commons.logging.Log; 050import org.apache.xerces.parsers.SAXParser; 051 052import org.dom4j.Attribute; 053import org.dom4j.Document; 054import org.dom4j.DocumentException; 055import org.dom4j.Element; 056import org.dom4j.Node; 057import org.dom4j.io.DOMReader; 058import org.dom4j.io.DOMWriter; 059import org.dom4j.io.OutputFormat; 060import org.dom4j.io.SAXReader; 061import org.dom4j.io.XMLWriter; 062import org.xml.sax.EntityResolver; 063import org.xml.sax.InputSource; 064import org.xml.sax.SAXException; 065import org.xml.sax.SAXNotRecognizedException; 066import org.xml.sax.SAXNotSupportedException; 067import org.xml.sax.XMLReader; 068import org.xml.sax.helpers.XMLReaderFactory; 069 070/** 071 * Provides some basic XML handling utilities.<p> 072 * 073 * @since 6.0.0 074 */ 075public final class CmsXmlUtils { 076 077 /** 078 * This class is only used to expose the XML parser configuration implementation name.<p> 079 */ 080 private static class ParserImpl extends SAXParser { 081 082 /** 083 * Constructor.<p> 084 */ 085 ParserImpl() { 086 087 super(); 088 } 089 090 /** 091 * Returns the implementation name of the used XML parser configuration.<p> 092 * 093 * @return the implementation name 094 */ 095 String getConfigImplName() { 096 097 if (fConfiguration != null) { 098 return fConfiguration.getClass().getName(); 099 } else { 100 return null; 101 } 102 } 103 } 104 105 /** The log object for this class. */ 106 private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); 107 108 /** Key of the SAX parser configuration system property. */ 109 private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration"; 110 111 /** Key of the SAX parser factory system property. */ 112 private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory"; 113 114 /** Key of the XML reader system property. */ 115 private static final String XML_READER_KEY = "org.xml.sax.driver"; 116 117 /** 118 * Prevents instances of this class from being generated.<p> 119 */ 120 private CmsXmlUtils() { 121 122 // noop 123 } 124 125 /** 126 * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p> 127 * 128 * Use this method if it's uncertain if the given arguments are starting or ending with 129 * a slash "/".<p> 130 * 131 * Examples:<br> 132 * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br> 133 * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br> 134 * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p> 135 * 136 * @param prefix the prefix Xpath 137 * @param suffix the suffix Xpath 138 * 139 * @return the concatenated Xpath build from prefix and suffix 140 */ 141 public static String concatXpath(String prefix, String suffix) { 142 143 if (suffix == null) { 144 // ensure suffix is not null 145 suffix = ""; 146 } else { 147 if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { 148 // remove leading '/' form suffix 149 suffix = suffix.substring(1); 150 } 151 } 152 if (prefix != null) { 153 StringBuffer result = new StringBuffer(32); 154 result.append(prefix); 155 if (!CmsResource.isFolder(prefix) && (suffix.length() > 0)) { 156 result.append('/'); 157 } 158 result.append(suffix); 159 return result.toString(); 160 } 161 return suffix; 162 } 163 164 /** 165 * Converts an org.dom4j.Document to a org.w3c.dom.Document. 166 * 167 * @param doc the document to convert 168 * @return the converted document 169 */ 170 public static org.w3c.dom.Document convertDocumentFromDom4jToW3C(Document doc) throws DocumentException { 171 172 return new DOMWriter().write(doc); 173 } 174 175 /** 176 * Converts an org.w3c.dom.Document to an org.dom4j.Document. 177 * 178 * @param doc the document to convert 179 * @return the converted document 180 */ 181 public static Document convertDocumentFromW3CToDom4j(org.w3c.dom.Document doc) { 182 183 org.dom4j.io.DOMReader reader = new DOMReader(); 184 return reader.read(doc); 185 } 186 187 /** 188 * Translates a simple lookup path to the simplified Xpath format used for 189 * the internal bookmarks.<p> 190 * 191 * Examples:<br> 192 * <code>title</code> becomes <code>title[1]</code><br> 193 * <code>title[1]</code> is left untouched<br> 194 * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br> 195 * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p> 196 * 197 * Note: If the name already has the format <code>title[1]</code> then provided index parameter 198 * is ignored.<p> 199 * 200 * @param path the path to get the simplified Xpath for 201 * @param index the index to append (if required) 202 * 203 * @return the simplified Xpath for the given name 204 */ 205 public static String createXpath(String path, int index) { 206 207 if (path.indexOf('/') > -1) { 208 // this is a complex path over more then 1 node 209 StringBuffer result = new StringBuffer(path.length() + 32); 210 211 // split the path into sub elements 212 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 213 int end = elements.size() - 1; 214 for (int i = 0; i <= end; i++) { 215 // append [i] to path element if required 216 result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); 217 if (i < end) { 218 // append path delimiter if not final path element 219 result.append('/'); 220 } 221 } 222 return result.toString(); 223 } 224 225 // this path has only 1 node, append [index] if required 226 return createXpathElementCheck(path, index); 227 } 228 229 /** 230 * Appends the provided index parameter in square brackets to the given name, 231 * like <code>path[index]</code>.<p> 232 * 233 * This method is used if it's clear that some path does not have 234 * a square bracket already appended.<p> 235 * 236 * @param path the path append the index to 237 * @param index the index to append 238 * 239 * @return the simplified Xpath for the given name 240 */ 241 public static String createXpathElement(String path, int index) { 242 243 StringBuffer result = new StringBuffer(path.length() + 5); 244 result.append(path); 245 result.append('['); 246 result.append(index); 247 result.append(']'); 248 return result.toString(); 249 } 250 251 /** 252 * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p> 253 * 254 * This method is used if it's uncertain if some path does have 255 * a square bracket already appended or not.<p> 256 * 257 * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 258 * is ignored.<p> 259 * 260 * @param path the path to get the simplified Xpath for 261 * @param index the index to append (if required) 262 * 263 * @return the simplified Xpath for the given name 264 */ 265 public static String createXpathElementCheck(String path, int index) { 266 267 if (path.charAt(path.length() - 1) == ']') { 268 // path is already in the form "title[1]" 269 // ignore provided index and return the path "as is" 270 return path; 271 } 272 273 // append index in square brackets 274 return createXpathElement(path, index); 275 } 276 277 /** 278 * Returns the first Xpath element from the provided path, 279 * without the index value.<p> 280 * 281 * Examples:<br> 282 * <code>title</code> is left untouched<br> 283 * <code>title[1]</code> becomes <code>title</code><br> 284 * <code>title/subtitle</code> becomes <code>title</code><br> 285 * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p> 286 * 287 * @param path the path to get the first Xpath element from 288 * 289 * @return the first Xpath element from the provided path 290 */ 291 public static String getFirstXpathElement(String path) { 292 293 int pos = path.indexOf('/'); 294 if (pos >= 0) { 295 path = path.substring(0, pos); 296 } 297 298 return CmsXmlUtils.removeXpathIndex(path); 299 } 300 301 /** 302 * Returns the last Xpath element from the provided path, 303 * without the index value.<p> 304 * 305 * Examples:<br> 306 * <code>title</code> is left untouched<br> 307 * <code>title[1]</code> becomes <code>title</code><br> 308 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 309 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p> 310 * 311 * @param path the path to get the last Xpath element from 312 * 313 * @return the last Xpath element from the provided path 314 */ 315 public static String getLastXpathElement(String path) { 316 317 int pos = path.lastIndexOf('/'); 318 if (pos >= 0) { 319 path = path.substring(pos + 1); 320 } 321 322 return CmsXmlUtils.removeXpathIndex(path); 323 } 324 325 /** 326 * Returns the last Xpath element from the provided path. 327 * 328 * 329 * Examples:<br> 330 * <code>title</code> is left untouched<br> 331 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 332 * 333 * @param path the path to get the last Xpath element from 334 * 335 * @return the last Xpath element from the provided path 336 */ 337 public static String getLastXpathElementWithIndex(String path) { 338 339 int pos = path.lastIndexOf('/'); 340 if (pos >= 0) { 341 path = path.substring(pos + 1); 342 } 343 return path; 344 } 345 346 /** 347 * Helper method to get the version number from a schema's/content's XML document. 348 * 349 * @param doc the document 350 * @return the version (returns 0 if no version is set) 351 */ 352 public static int getSchemaVersion(Document doc) { 353 354 if (doc == null) { 355 LOG.info("getSchemaVersion called with null document"); 356 return 0; 357 } 358 Element root = doc.getRootElement(); 359 Attribute versionAttr = root.attribute(CmsXmlContent.A_VERSION); 360 if (versionAttr != null) { 361 try { 362 return Integer.parseInt(versionAttr.getValue()); 363 } catch (Exception e) { 364 LOG.error(e.getLocalizedMessage(), e); 365 } 366 } 367 return 0; 368 } 369 370 /** 371 * Returns the last Xpath index from the given path.<p> 372 * 373 * Examples:<br> 374 * <code>title</code> returns the empty String<p> 375 * <code>title[1]</code> returns <code>[1]</code><p> 376 * <code>title/subtitle</code> returns them empty String<p> 377 * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p> 378 * 379 * @param path the path to extract the Xpath index from 380 * 381 * @return the last Xpath index from the given path 382 */ 383 public static String getXpathIndex(String path) { 384 385 int pos1 = path.lastIndexOf('/'); 386 int pos2 = path.lastIndexOf('['); 387 if ((pos2 < 0) || (pos1 > pos2)) { 388 return ""; 389 } 390 391 return path.substring(pos2); 392 } 393 394 /** 395 * Returns the last Xpath index from the given path as integer.<p> 396 * 397 * Examples:<br> 398 * <code>title</code> returns 1<p> 399 * <code>title[1]</code> returns 1<p> 400 * <code>title/subtitle</code> returns 1<p> 401 * <code>title[1]/subtitle[2]</code> returns 2<p> 402 * 403 * @param path the path to extract the Xpath index from 404 * 405 * @return the last Xpath index from the given path as integer 406 */ 407 public static int getXpathIndexInt(String path) { 408 409 int pos1 = path.lastIndexOf('/'); 410 int pos2 = path.lastIndexOf('['); 411 if ((pos2 < 0) || (pos1 > pos2)) { 412 return 1; 413 } 414 415 String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); 416 try { 417 return Integer.parseInt(idxStr); 418 } catch (NumberFormatException e) { 419 // NOOP 420 } 421 return 1; 422 } 423 424 /** 425 * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p> 426 * This is done for performance improvements only.<p> 427 */ 428 public static void initSystemProperties() { 429 430 String implName; 431 // initialize system properties 432 if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) { 433 implName = SAXParserFactory.newInstance().getClass().getName(); 434 LOG.info("Setting sax parser factory impl property to " + implName); 435 System.setProperty(SAX_PARSER_FACTORY_KEY, implName); 436 } 437 if (System.getProperty(XML_READER_KEY) == null) { 438 SAXReader reader = new SAXReader(); 439 try { 440 implName = reader.getXMLReader().getClass().getName(); 441 LOG.info("Setting xml reader impl property to " + implName); 442 System.setProperty(XML_READER_KEY, implName); 443 } catch (SAXException e) { 444 LOG.error("Error evaluating XMLReader impl.", e); 445 } 446 } 447 if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) { 448 ParserImpl saxParser = new ParserImpl(); 449 implName = saxParser.getConfigImplName(); 450 if (implName != null) { 451 LOG.info("Setting xml parser configuration impl property to " + implName); 452 System.setProperty(SAX_PARSER_CONFIG_KEY, implName); 453 } 454 } 455 } 456 457 /** 458 * Returns <code>true</code> if the given path is a Xpath with 459 * at least 2 elements.<p> 460 * 461 * Examples:<br> 462 * <code>title</code> returns <code>false</code><br> 463 * <code>title[1]</code> returns <code>false</code><br> 464 * <code>title/subtitle</code> returns <code>true</code><br> 465 * <code>title[1]/subtitle[1]</code> returns <code>true</code><p> 466 * 467 * @param path the path to check 468 * @return true if the given path is a Xpath with at least 2 elements 469 */ 470 public static boolean isDeepXpath(String path) { 471 472 return path.indexOf('/') > 0; 473 } 474 475 /** 476 * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p> 477 * 478 * @param document the XML document to marshal 479 * @param out the output stream to write to 480 * @param encoding the encoding to use 481 * @return the output stream with the xml content 482 * @throws CmsXmlException if something goes wrong 483 */ 484 public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { 485 486 try { 487 OutputFormat format = OutputFormat.createPrettyPrint(); 488 format.setEncoding(encoding); 489 490 XMLWriter writer = new XMLWriter(out, format); 491 492 writer.write(document); 493 writer.close(); 494 495 } catch (Exception e) { 496 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 497 } 498 499 return out; 500 } 501 502 /** 503 * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p> 504 * 505 * @param document the XML document to marshal 506 * @param encoding the encoding to use 507 * @return the marshalled XML document 508 * @throws CmsXmlException if something goes wrong 509 */ 510 public static String marshal(Document document, String encoding) throws CmsXmlException { 511 512 ByteArrayOutputStream out = new ByteArrayOutputStream(); 513 marshal(document, out, encoding); 514 try { 515 return out.toString(encoding); 516 } catch (UnsupportedEncodingException e) { 517 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); 518 } 519 } 520 521 /** 522 * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p> 523 * 524 * @param node the XML node to marshal 525 * @param encoding the encoding to use 526 * 527 * @return the string with the xml content 528 * 529 * @throws CmsXmlException if something goes wrong 530 */ 531 public static String marshal(Node node, String encoding) throws CmsXmlException { 532 533 ByteArrayOutputStream out = new ByteArrayOutputStream(); 534 try { 535 OutputFormat format = OutputFormat.createPrettyPrint(); 536 format.setEncoding(encoding); 537 format.setSuppressDeclaration(true); 538 539 XMLWriter writer = new XMLWriter(out, format); 540 writer.setEscapeText(false); 541 542 writer.write(node); 543 writer.close(); 544 } catch (Exception e) { 545 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 546 } 547 return new String(out.toByteArray()); 548 } 549 550 /** 551 * Removes all Xpath indices from the given path.<p> 552 * 553 * Example:<br> 554 * <code>title</code> is left untouched<br> 555 * <code>title[1]</code> becomes <code>title</code><br> 556 * <code>title/subtitle</code> is left untouched<br> 557 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 558 * 559 * @param path the path to remove the Xpath index from 560 * 561 * @return the path with all Xpath indices removed 562 */ 563 public static String removeAllXpathIndices(String path) { 564 565 return path.replaceAll("\\[[0-9]+\\]", ""); 566 } 567 568 /** 569 * Removes the first Xpath element from the path.<p> 570 * 571 * If the provided path does not contain a "/" character, 572 * it is returned unchanged.<p> 573 * 574 * <p>Examples:<br> 575 * <code>title</code> is left untouched<br> 576 * <code>title[1]</code> is left untouched<br> 577 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 578 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 579 * 580 * @param path the Xpath to remove the first element from 581 * 582 * @return the path with the first element removed 583 */ 584 public static String removeFirstXpathElement(String path) { 585 586 int pos = path.indexOf('/'); 587 if (pos < 0) { 588 return path; 589 } 590 591 return path.substring(pos + 1); 592 } 593 594 /** 595 * Removes the last complex Xpath element from the path.<p> 596 * 597 * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. 598 * 599 * <p>Example:<br> 600 * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p> 601 * 602 * @param path the Xpath to remove the last element from 603 * 604 * @return the path with the last element removed 605 */ 606 public static String removeLastComplexXpathElement(String path) { 607 608 int pos = path.lastIndexOf('/'); 609 if (pos < 0) { 610 return path; 611 } 612 // count ' chars 613 int p = pos; 614 int count = -1; 615 while (p > 0) { 616 count++; 617 p = path.indexOf("\'", p + 1); 618 } 619 String parentPath = path.substring(0, pos); 620 if ((count % 2) == 0) { 621 // if substring is complete 622 return parentPath; 623 } 624 // if not complete 625 p = parentPath.lastIndexOf("'"); 626 if (p >= 0) { 627 // complete it if possible 628 return removeLastComplexXpathElement(parentPath.substring(0, p)); 629 } 630 return parentPath; 631 } 632 633 /** 634 * Removes the last Xpath element from the path.<p> 635 * 636 * If the provided path does not contain a "/" character, 637 * it is returned unchanged.<p> 638 * 639 * <p>Examples:<br> 640 * <code>title</code> is left untouched<br> 641 * <code>title[1]</code> is left untouched<br> 642 * <code>title/subtitle</code> becomes <code>title</code><br> 643 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p> 644 * 645 * @param path the Xpath to remove the last element from 646 * 647 * @return the path with the last element removed 648 */ 649 public static String removeLastXpathElement(String path) { 650 651 int pos = path.lastIndexOf('/'); 652 if (pos < 0) { 653 return path; 654 } 655 656 return path.substring(0, pos); 657 } 658 659 /** 660 * Removes all Xpath index information from the given input path.<p> 661 * 662 * Examples:<br> 663 * <code>title</code> is left untouched<br> 664 * <code>title[1]</code> becomes <code>title</code><br> 665 * <code>title/subtitle</code> is left untouched<br> 666 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 667 * 668 * @param path the path to remove the Xpath index information from 669 * 670 * @return the simplified Xpath for the given name 671 */ 672 public static String removeXpath(String path) { 673 674 if (path.indexOf('/') > -1) { 675 // this is a complex path over more then 1 node 676 StringBuffer result = new StringBuffer(path.length() + 32); 677 678 // split the path into sub-elements 679 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 680 int end = elements.size() - 1; 681 for (int i = 0; i <= end; i++) { 682 // remove [i] from path element if required 683 result.append(removeXpathIndex(elements.get(i))); 684 if (i < end) { 685 // append path delimiter if not final path element 686 result.append('/'); 687 } 688 } 689 return result.toString(); 690 } 691 692 // this path has only 1 node, remove last index if required 693 return removeXpathIndex(path); 694 } 695 696 /** 697 * Removes the last Xpath index from the given path.<p> 698 * 699 * Examples:<br> 700 * <code>title</code> is left untouched<br> 701 * <code>title[1]</code> becomes <code>title</code><br> 702 * <code>title/subtitle</code> is left untouched<br> 703 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p> 704 * 705 * @param path the path to remove the Xpath index from 706 * 707 * @return the path with the last Xpath index removed 708 */ 709 public static String removeXpathIndex(String path) { 710 711 int pos1 = path.lastIndexOf('/'); 712 int pos2 = path.lastIndexOf('['); 713 if ((pos2 < 0) || (pos1 > pos2)) { 714 return path; 715 } 716 717 return path.substring(0, pos2); 718 } 719 720 /** 721 * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 722 * 723 * Examples:<br> 724 * <code>title/</code> becomes <code>title</code><br> 725 * <code>/title[1]/</code> becomes <code>title[1]</code><br> 726 * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br> 727 * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p> 728 * 729 * @param path the path to process 730 * @return the input with a leading and a trailing slash removed 731 */ 732 public static String simplifyXpath(String path) { 733 734 StringBuffer result = new StringBuffer(path); 735 if (result.charAt(0) == '/') { 736 result.deleteCharAt(0); 737 } 738 int pos = result.length() - 1; 739 if (result.charAt(pos) == '/') { 740 result.deleteCharAt(pos); 741 } 742 return result.toString(); 743 } 744 745 /** 746 * Splits a content value path into its components, ignoring leading or trailing slashes.<p> 747 * 748 * Note: this does not work for XPaths in general, only for the paths used to identify values in OpenCms contents.<p> 749 * 750 * @param xpath the xpath 751 * 752 * @return the path components 753 */ 754 public static List<String> splitXpath(String xpath) { 755 756 return Arrays.stream(xpath.split("/")).filter(s -> !s.isEmpty()).collect(Collectors.toList()); 757 758 } 759 760 /** 761 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 762 * 763 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 764 * 765 * @param xmlData the XML data in a byte array 766 * @param resolver the XML entity resolver to use 767 * 768 * @return the base object initialized with the unmarshalled XML document 769 * 770 * @throws CmsXmlException if something goes wrong 771 * 772 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 773 */ 774 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 775 776 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); 777 } 778 779 /** 780 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 781 * 782 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 783 * 784 * @param xmlData the XML data in a byte array 785 * @param resolver the XML entity resolver to use 786 * @param validate if the reader should try to validate the xml code 787 * 788 * @return the base object initialized with the unmarshalled XML document 789 * 790 * @throws CmsXmlException if something goes wrong 791 * 792 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 793 */ 794 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) 795 throws CmsXmlException { 796 797 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); 798 } 799 800 /** 801 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 802 * 803 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 804 * 805 * Important: The encoding provided will NOT be used during unmarshalling, 806 * the XML parser will do this on the base of the information in the source String. 807 * The encoding is used for initializing the created instance of the document, 808 * which means it will be used when marshalling the document again later.<p> 809 * 810 * @param source the XML input source to use 811 * @param resolver the XML entity resolver to use 812 * 813 * @return the unmarshalled XML document 814 * 815 * @throws CmsXmlException if something goes wrong 816 */ 817 public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { 818 819 return unmarshalHelper(source, resolver, false); 820 } 821 822 /** 823 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 824 * 825 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 826 * 827 * Important: The encoding provided will NOT be used during unmarshalling, 828 * the XML parser will do this on the base of the information in the source String. 829 * The encoding is used for initializing the created instance of the document, 830 * which means it will be used when marshalling the document again later.<p> 831 * 832 * @param source the XML input source to use 833 * @param resolver the XML entity resolver to use 834 * @param validate if the reader should try to validate the xml code 835 * 836 * @return the unmarshalled XML document 837 * 838 * @throws CmsXmlException if something goes wrong 839 */ 840 public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) 841 throws CmsXmlException { 842 843 if (null == source) { 844 throw new CmsXmlException(Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "source==null!")); 845 } 846 847 try { 848 SAXReader reader = new SAXReader(); 849 if (resolver != null) { 850 reader.setEntityResolver(resolver); 851 } 852 reader.setMergeAdjacentText(true); 853 reader.setStripWhitespaceText(true); 854 if (!validate) { 855 reader.setValidation(false); 856 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 857 } else { 858 reader.setValidation(true); 859 } 860 return reader.read(source); 861 } catch (DocumentException e) { 862 String systemId = source != null ? source.getSystemId() : "???"; 863 throw new CmsXmlException( 864 Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"), 865 e); 866 } catch (SAXException e) { 867 String systemId = source != null ? source.getSystemId() : "???"; 868 throw new CmsXmlException( 869 Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"), 870 e); 871 } 872 } 873 874 /** 875 * Helper to unmarshal (read) xml contents from a String into a document.<p> 876 * 877 * Using this method ensures that the OpenCms XML entitiy resolver is used.<p> 878 * 879 * @param xmlData the xml data in a String 880 * @param resolver the XML entity resolver to use 881 * @return the base object initialized with the unmarshalled XML document 882 * @throws CmsXmlException if something goes wrong 883 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 884 */ 885 public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { 886 887 return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); 888 } 889 890 /** 891 * Validates the structure of a XML document contained in a byte array 892 * with the DTD or XML schema used by the document.<p> 893 * 894 * @param xmlData a byte array containing a XML document that should be validated 895 * @param resolver the XML entity resolver to use 896 * 897 * @throws CmsXmlException if the validation fails 898 */ 899 public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 900 901 validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); 902 } 903 904 /** 905 * Validates the structure of a XML document with the DTD or XML schema used 906 * by the document.<p> 907 * 908 * @param document a XML document that should be validated 909 * @param encoding the encoding to use when marshalling the XML document (required) 910 * @param resolver the XML entity resolver to use 911 * 912 * @throws CmsXmlException if the validation fails 913 */ 914 public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) 915 throws CmsXmlException { 916 917 // generate bytes from document 918 byte[] xmlData = ((ByteArrayOutputStream)marshal( 919 document, 920 new ByteArrayOutputStream(512), 921 encoding)).toByteArray(); 922 validateXmlStructure(xmlData, resolver); 923 } 924 925 /** 926 * Validates the structure of a XML document contained in a byte array 927 * with the DTD or XML schema used by the document.<p> 928 * 929 * @param xmlStream a source providing a XML document that should be validated 930 * @param resolver the XML entity resolver to use 931 * 932 * @throws CmsXmlException if the validation fails 933 */ 934 public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { 935 936 XMLReader reader; 937 try { 938 reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); 939 } catch (SAXException e) { 940 // xerces parser not available - no schema validation possible 941 if (LOG.isWarnEnabled()) { 942 LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); 943 } 944 // no validation of the content is possible 945 return; 946 } 947 // turn on validation 948 try { 949 reader.setFeature("http://xml.org/sax/features/validation", true); 950 // turn on schema validation 951 reader.setFeature("http://apache.org/xml/features/validation/schema", true); 952 // configure namespace support 953 reader.setFeature("http://xml.org/sax/features/namespaces", true); 954 reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); 955 } catch (SAXNotRecognizedException e) { 956 // should not happen as Xerces 2 support this feature 957 if (LOG.isWarnEnabled()) { 958 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); 959 } 960 // no validation of the content is possible 961 return; 962 } catch (SAXNotSupportedException e) { 963 // should not happen as Xerces 2 support this feature 964 if (LOG.isWarnEnabled()) { 965 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); 966 } 967 // no validation of the content is possible 968 return; 969 } 970 971 // add an error handler which turns any errors into XML 972 CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); 973 reader.setErrorHandler(errorHandler); 974 975 if (resolver != null) { 976 // set the resolver for the "opencms://" URIs 977 reader.setEntityResolver(resolver); 978 } 979 980 try { 981 reader.parse(new InputSource(xmlStream)); 982 } catch (IOException e) { 983 // should not happen since we read form a byte array 984 if (LOG.isErrorEnabled()) { 985 LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); 986 } 987 return; 988 } catch (SAXException e) { 989 // some exceptions will still be thrown even if they are sent to the error handler 990 if (LOG.isErrorEnabled()) { 991 LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); 992 } 993 } 994 995 if (errorHandler.getErrors().elements().size() > 0) { 996 // there was at last one validation error, so throw an exception 997 StringWriter out = new StringWriter(256); 998 OutputFormat format = OutputFormat.createPrettyPrint(); 999 XMLWriter writer = new XMLWriter(out, format); 1000 try { 1001 writer.write(errorHandler.getErrors()); 1002 writer.write(errorHandler.getWarnings()); 1003 writer.close(); 1004 } catch (IOException e) { 1005 // should not happen since we write to a StringWriter 1006 if (LOG.isErrorEnabled()) { 1007 LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); 1008 } 1009 } 1010 // generate String from XML for display of document in error message 1011 throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); 1012 } 1013 } 1014}