001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.file.CmsResource;
031import org.opencms.main.CmsLog;
032import org.opencms.util.CmsStringUtil;
033import org.opencms.xml.content.CmsXmlContent;
034
035import java.io.ByteArrayInputStream;
036import java.io.ByteArrayOutputStream;
037import java.io.IOException;
038import java.io.InputStream;
039import java.io.OutputStream;
040import java.io.StringReader;
041import java.io.StringWriter;
042import java.io.UnsupportedEncodingException;
043import java.util.Arrays;
044import java.util.List;
045import java.util.stream.Collectors;
046
047import javax.xml.parsers.SAXParserFactory;
048
049import org.apache.commons.logging.Log;
050import org.apache.xerces.parsers.SAXParser;
051
052import org.dom4j.Attribute;
053import org.dom4j.Document;
054import org.dom4j.DocumentException;
055import org.dom4j.Element;
056import org.dom4j.Node;
057import org.dom4j.io.DOMReader;
058import org.dom4j.io.DOMWriter;
059import org.dom4j.io.OutputFormat;
060import org.dom4j.io.SAXReader;
061import org.dom4j.io.XMLWriter;
062import org.xml.sax.EntityResolver;
063import org.xml.sax.InputSource;
064import org.xml.sax.SAXException;
065import org.xml.sax.SAXNotRecognizedException;
066import org.xml.sax.SAXNotSupportedException;
067import org.xml.sax.XMLReader;
068import org.xml.sax.helpers.XMLReaderFactory;
069
070/**
071 * Provides some basic XML handling utilities.<p>
072 *
073 * @since 6.0.0
074 */
075public final class CmsXmlUtils {
076
077    /**
078     * This class is only used to expose the XML parser configuration implementation name.<p>
079     */
080    private static class ParserImpl extends SAXParser {
081
082        /**
083         * Constructor.<p>
084         */
085        ParserImpl() {
086
087            super();
088        }
089
090        /**
091         * Returns the implementation name of the used XML parser configuration.<p>
092         *
093         * @return the implementation name
094         */
095        String getConfigImplName() {
096
097            if (fConfiguration != null) {
098                return fConfiguration.getClass().getName();
099            } else {
100                return null;
101            }
102        }
103    }
104
105    /** The log object for this class. */
106    private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class);
107
108    /** Key of the SAX parser configuration system property. */
109    private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration";
110
111    /** Key of the SAX parser factory system property. */
112    private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory";
113
114    /** Key of the XML reader system property. */
115    private static final String XML_READER_KEY = "org.xml.sax.driver";
116
117    /**
118     * Prevents instances of this class from being generated.<p>
119     */
120    private CmsXmlUtils() {
121
122        // noop
123    }
124
125    /**
126     * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p>
127     *
128     * Use this method if it's uncertain if the given arguments are starting or ending with
129     * a slash "/".<p>
130     *
131     * Examples:<br>
132     * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br>
133     * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br>
134     * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p>
135     *
136     * @param prefix the prefix Xpath
137     * @param suffix the suffix Xpath
138     *
139     * @return the concatenated Xpath build from prefix and suffix
140     */
141    public static String concatXpath(String prefix, String suffix) {
142
143        if (suffix == null) {
144            // ensure suffix is not null
145            suffix = "";
146        } else {
147            if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) {
148                // remove leading '/' form suffix
149                suffix = suffix.substring(1);
150            }
151        }
152        if (prefix != null) {
153            StringBuffer result = new StringBuffer(32);
154            result.append(prefix);
155            if (!CmsResource.isFolder(prefix) && (suffix.length() > 0)) {
156                result.append('/');
157            }
158            result.append(suffix);
159            return result.toString();
160        }
161        return suffix;
162    }
163
164    /**
165     * Converts an org.dom4j.Document to a org.w3c.dom.Document.
166     *
167     * @param doc the document to convert
168     * @return the converted document
169     */
170    public static org.w3c.dom.Document convertDocumentFromDom4jToW3C(Document doc) throws DocumentException {
171
172        return new DOMWriter().write(doc);
173    }
174
175    /**
176     * Converts an org.w3c.dom.Document to an org.dom4j.Document.
177     *
178     * @param doc the document to convert
179     * @return the converted document
180     */
181    public static Document convertDocumentFromW3CToDom4j(org.w3c.dom.Document doc) {
182
183        org.dom4j.io.DOMReader reader = new DOMReader();
184        return reader.read(doc);
185    }
186
187    /**
188     * Translates a simple lookup path to the simplified Xpath format used for
189     * the internal bookmarks.<p>
190     *
191     * Examples:<br>
192     * <code>title</code> becomes <code>title[1]</code><br>
193     * <code>title[1]</code> is left untouched<br>
194     * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br>
195     * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p>
196     *
197     * Note: If the name already has the format <code>title[1]</code> then provided index parameter
198     * is ignored.<p>
199     *
200     * @param path the path to get the simplified Xpath for
201     * @param index the index to append (if required)
202     *
203     * @return the simplified Xpath for the given name
204     */
205    public static String createXpath(String path, int index) {
206
207        if (path.indexOf('/') > -1) {
208            // this is a complex path over more then 1 node
209            StringBuffer result = new StringBuffer(path.length() + 32);
210
211            // split the path into sub elements
212            List<String> elements = CmsStringUtil.splitAsList(path, '/');
213            int end = elements.size() - 1;
214            for (int i = 0; i <= end; i++) {
215                // append [i] to path element if required
216                result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1));
217                if (i < end) {
218                    // append path delimiter if not final path element
219                    result.append('/');
220                }
221            }
222            return result.toString();
223        }
224
225        // this path has only 1 node, append [index] if required
226        return createXpathElementCheck(path, index);
227    }
228
229    /**
230     * Appends the provided index parameter in square brackets to the given name,
231     * like <code>path[index]</code>.<p>
232     *
233     * This method is used if it's clear that some path does not have
234     * a square bracket already appended.<p>
235     *
236     * @param path the path append the index to
237     * @param index the index to append
238     *
239     * @return the simplified Xpath for the given name
240     */
241    public static String createXpathElement(String path, int index) {
242
243        StringBuffer result = new StringBuffer(path.length() + 5);
244        result.append(path);
245        result.append('[');
246        result.append(index);
247        result.append(']');
248        return result.toString();
249    }
250
251    /**
252     * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p>
253     *
254     * This method is used if it's uncertain if some path does have
255     * a square bracket already appended or not.<p>
256     *
257     * Note: If the name already has the format <code>title[1]</code>, then provided index parameter
258     * is ignored.<p>
259     *
260     * @param path the path to get the simplified Xpath for
261     * @param index the index to append (if required)
262     *
263     * @return the simplified Xpath for the given name
264     */
265    public static String createXpathElementCheck(String path, int index) {
266
267        if (path.charAt(path.length() - 1) == ']') {
268            // path is already in the form "title[1]"
269            // ignore provided index and return the path "as is"
270            return path;
271        }
272
273        // append index in square brackets
274        return createXpathElement(path, index);
275    }
276
277    /**
278     * Returns the first Xpath element from the provided path,
279     * without the index value.<p>
280     *
281     * Examples:<br>
282     * <code>title</code> is left untouched<br>
283     * <code>title[1]</code> becomes <code>title</code><br>
284     * <code>title/subtitle</code> becomes <code>title</code><br>
285     * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p>
286     *
287     * @param path the path to get the first Xpath element from
288     *
289     * @return the first Xpath element from the provided path
290     */
291    public static String getFirstXpathElement(String path) {
292
293        int pos = path.indexOf('/');
294        if (pos >= 0) {
295            path = path.substring(0, pos);
296        }
297
298        return CmsXmlUtils.removeXpathIndex(path);
299    }
300
301    /**
302     * Returns the last Xpath element from the provided path,
303     * without the index value.<p>
304     *
305     * Examples:<br>
306     * <code>title</code> is left untouched<br>
307     * <code>title[1]</code> becomes <code>title</code><br>
308     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
309     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p>
310     *
311     * @param path the path to get the last Xpath element from
312     *
313     * @return the last Xpath element from the provided path
314     */
315    public static String getLastXpathElement(String path) {
316
317        int pos = path.lastIndexOf('/');
318        if (pos >= 0) {
319            path = path.substring(pos + 1);
320        }
321
322        return CmsXmlUtils.removeXpathIndex(path);
323    }
324
325    /**
326     * Returns the last Xpath element from the provided path.
327     *
328     *
329     * Examples:<br>
330     * <code>title</code> is left untouched<br>
331     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
332     *
333     * @param path the path to get the last Xpath element from
334     *
335     * @return the last Xpath element from the provided path
336     */
337    public static String getLastXpathElementWithIndex(String path) {
338
339        int pos = path.lastIndexOf('/');
340        if (pos >= 0) {
341            path = path.substring(pos + 1);
342        }
343        return path;
344    }
345
346    /**
347     * Helper method to get the version number from a schema's/content's XML document.
348     *
349     * @param doc the document
350     * @return the version (returns 0 if no version is set)
351     */
352    public static int getSchemaVersion(Document doc) {
353
354        if (doc == null) {
355            LOG.info("getSchemaVersion called with null document");
356            return 0;
357        }
358        Element root = doc.getRootElement();
359        Attribute versionAttr = root.attribute(CmsXmlContent.A_VERSION);
360        if (versionAttr != null) {
361            try {
362                return Integer.parseInt(versionAttr.getValue());
363            } catch (Exception e) {
364                LOG.error(e.getLocalizedMessage(), e);
365            }
366        }
367        return 0;
368    }
369
370    /**
371     * Returns the last Xpath index from the given path.<p>
372     *
373     * Examples:<br>
374     * <code>title</code> returns the empty String<p>
375     * <code>title[1]</code> returns <code>[1]</code><p>
376     * <code>title/subtitle</code> returns them empty String<p>
377     * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p>
378     *
379     * @param path the path to extract the Xpath index from
380     *
381     * @return  the last Xpath index from the given path
382     */
383    public static String getXpathIndex(String path) {
384
385        int pos1 = path.lastIndexOf('/');
386        int pos2 = path.lastIndexOf('[');
387        if ((pos2 < 0) || (pos1 > pos2)) {
388            return "";
389        }
390
391        return path.substring(pos2);
392    }
393
394    /**
395     * Returns the last Xpath index from the given path as integer.<p>
396     *
397     * Examples:<br>
398     * <code>title</code> returns 1<p>
399     * <code>title[1]</code> returns 1<p>
400     * <code>title/subtitle</code> returns 1<p>
401     * <code>title[1]/subtitle[2]</code> returns 2<p>
402     *
403     * @param path the path to extract the Xpath index from
404     *
405     * @return the last Xpath index from the given path as integer
406     */
407    public static int getXpathIndexInt(String path) {
408
409        int pos1 = path.lastIndexOf('/');
410        int pos2 = path.lastIndexOf('[');
411        if ((pos2 < 0) || (pos1 > pos2)) {
412            return 1;
413        }
414
415        String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']'));
416        try {
417            return Integer.parseInt(idxStr);
418        } catch (NumberFormatException e) {
419            // NOOP
420        }
421        return 1;
422    }
423
424    /**
425     * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p>
426     * This is done for performance improvements only.<p>
427     */
428    public static void initSystemProperties() {
429
430        String implName;
431        // initialize system properties
432        if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) {
433            implName = SAXParserFactory.newInstance().getClass().getName();
434            LOG.info("Setting sax parser factory impl property to " + implName);
435            System.setProperty(SAX_PARSER_FACTORY_KEY, implName);
436        }
437        if (System.getProperty(XML_READER_KEY) == null) {
438            SAXReader reader = new SAXReader();
439            try {
440                implName = reader.getXMLReader().getClass().getName();
441                LOG.info("Setting xml reader impl property to " + implName);
442                System.setProperty(XML_READER_KEY, implName);
443            } catch (SAXException e) {
444                LOG.error("Error evaluating XMLReader impl.", e);
445            }
446        }
447        if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) {
448            ParserImpl saxParser = new ParserImpl();
449            implName = saxParser.getConfigImplName();
450            if (implName != null) {
451                LOG.info("Setting xml parser configuration impl property to " + implName);
452                System.setProperty(SAX_PARSER_CONFIG_KEY, implName);
453            }
454        }
455    }
456
457    /**
458     * Returns <code>true</code> if the given path is a Xpath with
459     * at least 2 elements.<p>
460     *
461     * Examples:<br>
462     * <code>title</code> returns <code>false</code><br>
463     * <code>title[1]</code> returns <code>false</code><br>
464     * <code>title/subtitle</code> returns <code>true</code><br>
465     * <code>title[1]/subtitle[1]</code> returns <code>true</code><p>
466     *
467     * @param path the path to check
468     * @return true if the given path is a Xpath with at least 2 elements
469     */
470    public static boolean isDeepXpath(String path) {
471
472        return path.indexOf('/') > 0;
473    }
474
475    /**
476     * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p>
477     *
478     * @param document the XML document to marshal
479     * @param out the output stream to write to
480     * @param encoding the encoding to use
481     * @return the output stream with the xml content
482     * @throws CmsXmlException if something goes wrong
483     */
484    public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException {
485
486        try {
487            OutputFormat format = OutputFormat.createPrettyPrint();
488            format.setEncoding(encoding);
489
490            XMLWriter writer = new XMLWriter(out, format);
491
492            writer.write(document);
493            writer.close();
494
495        } catch (Exception e) {
496            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
497        }
498
499        return out;
500    }
501
502    /**
503     * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p>
504     *
505     * @param document the XML document to marshal
506     * @param encoding the encoding to use
507     * @return the marshalled XML document
508     * @throws CmsXmlException if something goes wrong
509     */
510    public static String marshal(Document document, String encoding) throws CmsXmlException {
511
512        ByteArrayOutputStream out = new ByteArrayOutputStream();
513        marshal(document, out, encoding);
514        try {
515            return out.toString(encoding);
516        } catch (UnsupportedEncodingException e) {
517            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e);
518        }
519    }
520
521    /**
522     * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p>
523     *
524     * @param node the XML node to marshal
525     * @param encoding the encoding to use
526     *
527     * @return the string with the xml content
528     *
529     * @throws CmsXmlException if something goes wrong
530     */
531    public static String marshal(Node node, String encoding) throws CmsXmlException {
532
533        ByteArrayOutputStream out = new ByteArrayOutputStream();
534        try {
535            OutputFormat format = OutputFormat.createPrettyPrint();
536            format.setEncoding(encoding);
537            format.setSuppressDeclaration(true);
538
539            XMLWriter writer = new XMLWriter(out, format);
540            writer.setEscapeText(false);
541
542            writer.write(node);
543            writer.close();
544        } catch (Exception e) {
545            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
546        }
547        return new String(out.toByteArray());
548    }
549
550    /**
551     * Removes all Xpath indices from the given path.<p>
552     *
553     * Example:<br>
554     * <code>title</code> is left untouched<br>
555     * <code>title[1]</code> becomes <code>title</code><br>
556     * <code>title/subtitle</code> is left untouched<br>
557     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
558     *
559     * @param path the path to remove the Xpath index from
560     *
561     * @return the path with all Xpath indices removed
562     */
563    public static String removeAllXpathIndices(String path) {
564
565        return path.replaceAll("\\[[0-9]+\\]", "");
566    }
567
568    /**
569     * Removes the first Xpath element from the path.<p>
570     *
571     * If the provided path does not contain a "/" character,
572     * it is returned unchanged.<p>
573     *
574     * <p>Examples:<br>
575     * <code>title</code> is left untouched<br>
576     * <code>title[1]</code> is left untouched<br>
577     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
578     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
579     *
580     * @param path the Xpath to remove the first element from
581     *
582     * @return the path with the first element removed
583     */
584    public static String removeFirstXpathElement(String path) {
585
586        int pos = path.indexOf('/');
587        if (pos < 0) {
588            return path;
589        }
590
591        return path.substring(pos + 1);
592    }
593
594    /**
595     * Removes the last complex Xpath element from the path.<p>
596     *
597     * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths.
598     *
599     * <p>Example:<br>
600     * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p>
601     *
602     * @param path the Xpath to remove the last element from
603     *
604     * @return the path with the last element removed
605     */
606    public static String removeLastComplexXpathElement(String path) {
607
608        int pos = path.lastIndexOf('/');
609        if (pos < 0) {
610            return path;
611        }
612        // count ' chars
613        int p = pos;
614        int count = -1;
615        while (p > 0) {
616            count++;
617            p = path.indexOf("\'", p + 1);
618        }
619        String parentPath = path.substring(0, pos);
620        if ((count % 2) == 0) {
621            // if substring is complete
622            return parentPath;
623        }
624        // if not complete
625        p = parentPath.lastIndexOf("'");
626        if (p >= 0) {
627            // complete it if possible
628            return removeLastComplexXpathElement(parentPath.substring(0, p));
629        }
630        return parentPath;
631    }
632
633    /**
634     * Removes the last Xpath element from the path.<p>
635     *
636     * If the provided path does not contain a "/" character,
637     * it is returned unchanged.<p>
638     *
639     * <p>Examples:<br>
640     * <code>title</code> is left untouched<br>
641     * <code>title[1]</code> is left untouched<br>
642     * <code>title/subtitle</code> becomes <code>title</code><br>
643     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p>
644     *
645     * @param path the Xpath to remove the last element from
646     *
647     * @return the path with the last element removed
648     */
649    public static String removeLastXpathElement(String path) {
650
651        int pos = path.lastIndexOf('/');
652        if (pos < 0) {
653            return path;
654        }
655
656        return path.substring(0, pos);
657    }
658
659    /**
660     * Removes all Xpath index information from the given input path.<p>
661     *
662     * Examples:<br>
663     * <code>title</code> is left untouched<br>
664     * <code>title[1]</code> becomes <code>title</code><br>
665     * <code>title/subtitle</code> is left untouched<br>
666     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
667     *
668     * @param path the path to remove the Xpath index information from
669     *
670     * @return the simplified Xpath for the given name
671     */
672    public static String removeXpath(String path) {
673
674        if (path.indexOf('/') > -1) {
675            // this is a complex path over more then 1 node
676            StringBuffer result = new StringBuffer(path.length() + 32);
677
678            // split the path into sub-elements
679            List<String> elements = CmsStringUtil.splitAsList(path, '/');
680            int end = elements.size() - 1;
681            for (int i = 0; i <= end; i++) {
682                // remove [i] from path element if required
683                result.append(removeXpathIndex(elements.get(i)));
684                if (i < end) {
685                    // append path delimiter if not final path element
686                    result.append('/');
687                }
688            }
689            return result.toString();
690        }
691
692        // this path has only 1 node, remove last index if required
693        return removeXpathIndex(path);
694    }
695
696    /**
697     * Removes the last Xpath index from the given path.<p>
698     *
699     * Examples:<br>
700     * <code>title</code> is left untouched<br>
701     * <code>title[1]</code> becomes <code>title</code><br>
702     * <code>title/subtitle</code> is left untouched<br>
703     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p>
704     *
705     * @param path the path to remove the Xpath index from
706     *
707     * @return the path with the last Xpath index removed
708     */
709    public static String removeXpathIndex(String path) {
710
711        int pos1 = path.lastIndexOf('/');
712        int pos2 = path.lastIndexOf('[');
713        if ((pos2 < 0) || (pos1 > pos2)) {
714            return path;
715        }
716
717        return path.substring(0, pos2);
718    }
719
720    /**
721     * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p>
722     *
723     * Examples:<br>
724     * <code>title/</code> becomes <code>title</code><br>
725     * <code>/title[1]/</code> becomes <code>title[1]</code><br>
726     * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br>
727     * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p>
728     *
729     * @param path the path to process
730     * @return the input with a leading and a trailing slash removed
731     */
732    public static String simplifyXpath(String path) {
733
734        StringBuffer result = new StringBuffer(path);
735        if (result.charAt(0) == '/') {
736            result.deleteCharAt(0);
737        }
738        int pos = result.length() - 1;
739        if (result.charAt(pos) == '/') {
740            result.deleteCharAt(pos);
741        }
742        return result.toString();
743    }
744
745    /**
746     * Splits a content value path into its components, ignoring leading or trailing slashes.<p>
747     *
748     * Note: this does not work for XPaths in general, only for the paths used to identify values in OpenCms contents.<p>
749     *
750     * @param xpath the xpath
751     *
752     * @return the path components
753     */
754    public static List<String> splitXpath(String xpath) {
755
756        return Arrays.stream(xpath.split("/")).filter(s -> !s.isEmpty()).collect(Collectors.toList());
757
758    }
759
760    /**
761     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
762     *
763     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
764     *
765     * @param xmlData the XML data in a byte array
766     * @param resolver the XML entity resolver to use
767     *
768     * @return the base object initialized with the unmarshalled XML document
769     *
770     * @throws CmsXmlException if something goes wrong
771     *
772     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
773     */
774    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
775
776        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver);
777    }
778
779    /**
780     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
781     *
782     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
783     *
784     * @param xmlData the XML data in a byte array
785     * @param resolver the XML entity resolver to use
786     * @param validate if the reader should try to validate the xml code
787     *
788     * @return the base object initialized with the unmarshalled XML document
789     *
790     * @throws CmsXmlException if something goes wrong
791     *
792     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
793     */
794    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate)
795    throws CmsXmlException {
796
797        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate);
798    }
799
800    /**
801     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
802     *
803     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
804     *
805     * Important: The encoding provided will NOT be used during unmarshalling,
806     * the XML parser will do this on the base of the information in the source String.
807     * The encoding is used for initializing the created instance of the document,
808     * which means it will be used when marshalling the document again later.<p>
809     *
810     * @param source the XML input source to use
811     * @param resolver the XML entity resolver to use
812     *
813     * @return the unmarshalled XML document
814     *
815     * @throws CmsXmlException if something goes wrong
816     */
817    public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException {
818
819        return unmarshalHelper(source, resolver, false);
820    }
821
822    /**
823     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
824     *
825     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
826     *
827     * Important: The encoding provided will NOT be used during unmarshalling,
828     * the XML parser will do this on the base of the information in the source String.
829     * The encoding is used for initializing the created instance of the document,
830     * which means it will be used when marshalling the document again later.<p>
831     *
832     * @param source the XML input source to use
833     * @param resolver the XML entity resolver to use
834     * @param validate if the reader should try to validate the xml code
835     *
836     * @return the unmarshalled XML document
837     *
838     * @throws CmsXmlException if something goes wrong
839     */
840    public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate)
841    throws CmsXmlException {
842
843        if (null == source) {
844            throw new CmsXmlException(Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "source==null!"));
845        }
846
847        try {
848            SAXReader reader = new SAXReader();
849            if (resolver != null) {
850                reader.setEntityResolver(resolver);
851            }
852            reader.setMergeAdjacentText(true);
853            reader.setStripWhitespaceText(true);
854            if (!validate) {
855                reader.setValidation(false);
856                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
857            } else {
858                reader.setValidation(true);
859            }
860            return reader.read(source);
861        } catch (DocumentException e) {
862            String systemId = source != null ? source.getSystemId() : "???";
863            throw new CmsXmlException(
864                Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"),
865                e);
866        } catch (SAXException e) {
867            String systemId = source != null ? source.getSystemId() : "???";
868            throw new CmsXmlException(
869                Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"),
870                e);
871        }
872    }
873
874    /**
875     * Helper to unmarshal (read) xml contents from a String into a document.<p>
876     *
877     * Using this method ensures that the OpenCms XML entitiy resolver is used.<p>
878     *
879     * @param xmlData the xml data in a String
880     * @param resolver the XML entity resolver to use
881     * @return the base object initialized with the unmarshalled XML document
882     * @throws CmsXmlException if something goes wrong
883     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
884     */
885    public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException {
886
887        return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver);
888    }
889
890    /**
891     * Validates the structure of a XML document contained in a byte array
892     * with the DTD or XML schema used by the document.<p>
893     *
894     * @param xmlData a byte array containing a XML document that should be validated
895     * @param resolver the XML entity resolver to use
896     *
897     * @throws CmsXmlException if the validation fails
898     */
899    public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
900
901        validateXmlStructure(new ByteArrayInputStream(xmlData), resolver);
902    }
903
904    /**
905     * Validates the structure of a XML document with the DTD or XML schema used
906     * by the document.<p>
907     *
908     * @param document a XML document that should be validated
909     * @param encoding the encoding to use when marshalling the XML document (required)
910     * @param resolver the XML entity resolver to use
911     *
912     * @throws CmsXmlException if the validation fails
913     */
914    public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver)
915    throws CmsXmlException {
916
917        // generate bytes from document
918        byte[] xmlData = ((ByteArrayOutputStream)marshal(
919            document,
920            new ByteArrayOutputStream(512),
921            encoding)).toByteArray();
922        validateXmlStructure(xmlData, resolver);
923    }
924
925    /**
926     * Validates the structure of a XML document contained in a byte array
927     * with the DTD or XML schema used by the document.<p>
928     *
929     * @param xmlStream a source providing a XML document that should be validated
930     * @param resolver the XML entity resolver to use
931     *
932     * @throws CmsXmlException if the validation fails
933     */
934    public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException {
935
936        XMLReader reader;
937        try {
938            reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
939        } catch (SAXException e) {
940            // xerces parser not available - no schema validation possible
941            if (LOG.isWarnEnabled()) {
942                LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e);
943            }
944            // no validation of the content is possible
945            return;
946        }
947        // turn on validation
948        try {
949            reader.setFeature("http://xml.org/sax/features/validation", true);
950            // turn on schema validation
951            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
952            // configure namespace support
953            reader.setFeature("http://xml.org/sax/features/namespaces", true);
954            reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
955        } catch (SAXNotRecognizedException e) {
956            // should not happen as Xerces 2 support this feature
957            if (LOG.isWarnEnabled()) {
958                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e);
959            }
960            // no validation of the content is possible
961            return;
962        } catch (SAXNotSupportedException e) {
963            // should not happen as Xerces 2 support this feature
964            if (LOG.isWarnEnabled()) {
965                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e);
966            }
967            // no validation of the content is possible
968            return;
969        }
970
971        // add an error handler which turns any errors into XML
972        CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler();
973        reader.setErrorHandler(errorHandler);
974
975        if (resolver != null) {
976            // set the resolver for the "opencms://" URIs
977            reader.setEntityResolver(resolver);
978        }
979
980        try {
981            reader.parse(new InputSource(xmlStream));
982        } catch (IOException e) {
983            // should not happen since we read form a byte array
984            if (LOG.isErrorEnabled()) {
985                LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e);
986            }
987            return;
988        } catch (SAXException e) {
989            // some exceptions will still be thrown even if they are sent to the error handler
990            if (LOG.isErrorEnabled()) {
991                LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e);
992            }
993        }
994
995        if (errorHandler.getErrors().elements().size() > 0) {
996            // there was at last one validation error, so throw an exception
997            StringWriter out = new StringWriter(256);
998            OutputFormat format = OutputFormat.createPrettyPrint();
999            XMLWriter writer = new XMLWriter(out, format);
1000            try {
1001                writer.write(errorHandler.getErrors());
1002                writer.write(errorHandler.getWarnings());
1003                writer.close();
1004            } catch (IOException e) {
1005                // should not happen since we write to a StringWriter
1006                if (LOG.isErrorEnabled()) {
1007                    LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e);
1008                }
1009            }
1010            // generate String from XML for display of document in error message
1011            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString()));
1012        }
1013    }
1014}