001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.pdftools;
029
030import org.opencms.file.CmsObject;
031import org.opencms.pdftools.dtds.FailingEntityResolver;
032import org.opencms.pdftools.dtds.XhtmlEntityResolver;
033
034import java.io.ByteArrayInputStream;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037
038import javax.xml.parsers.DocumentBuilder;
039import javax.xml.parsers.DocumentBuilderFactory;
040import javax.xml.parsers.ParserConfigurationException;
041
042import org.w3c.dom.Document;
043import org.xhtmlrenderer.pdf.ITextRenderer;
044import org.xhtmlrenderer.util.XRLog;
045import org.xml.sax.EntityResolver;
046import org.xml.sax.SAXException;
047
048/**
049 * This class uses the flying-saucer library to convert an XHTML document to a PDF document.<p>
050 */
051public class CmsPdfConverter {
052
053    static {
054        // send logging from flyingsaucer to opencms log
055        System.getProperties().setProperty("xr.util-logging.loggingEnabled", "true");
056        XRLog.setLoggingEnabled(true);
057        XRLog.setLoggerImpl(new CmsXRLogAdapter());
058    }
059
060    /** Entity resolver which loads cached DTDs instead of fetching DTDs from the web. */
061    private EntityResolver m_entityResolver = new XhtmlEntityResolver(new FailingEntityResolver());
062
063    /**
064     * Creates a new instance.<p>
065     */
066    public CmsPdfConverter() {
067
068        // do nothing
069    }
070
071    /**
072     * Converts XHTML data to a PDF document.<p>
073     *
074     * @param cms the current CMS context
075     * @param xhtmlData the XHTML as a byte array
076     * @param uri the uri to use for error messages in the XML parser
077     *
078     * @return the PDF data as a byte array
079     *
080     * @throws Exception if something goes wrong
081     */
082    public byte[] convertXhtmlToPdf(CmsObject cms, byte[] xhtmlData, String uri) throws Exception {
083
084        Document doc = readDocument(xhtmlData);
085        ITextRenderer renderer = new ITextRenderer();
086        CmsPdfUserAgent userAgent = new CmsPdfUserAgent(cms);
087        userAgent.setSharedContext(renderer.getSharedContext());
088        renderer.getSharedContext().setUserAgentCallback(userAgent);
089        renderer.setDocument(doc, uri);
090        renderer.layout();
091        ByteArrayOutputStream out = new ByteArrayOutputStream();
092        renderer.createPDF(out);
093        return out.toByteArray();
094    }
095
096    /**
097     * Reads an XHTML document from a byte array.<p>
098     *
099     * @param xhtmlData the XHTML data
100     * @return the document which was read from the data
101     *
102     * @throws ParserConfigurationException
103     * @throws SAXException
104     * @throws IOException
105     */
106    private Document readDocument(byte[] xhtmlData) throws ParserConfigurationException, SAXException, IOException {
107
108        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
109        docBuilderFactory.setValidating(false);
110        docBuilderFactory.setNamespaceAware(true);
111        DocumentBuilder docbuilder = docBuilderFactory.newDocumentBuilder();
112        // use special entity resolver so we don't fetch the DTDs from w3.org, which would be slow
113        docbuilder.setEntityResolver(m_entityResolver);
114        Document doc = docbuilder.parse(new ByteArrayInputStream(xhtmlData));
115        return doc;
116    }
117}