001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml.page;
029
030import org.opencms.file.CmsFile;
031import org.opencms.file.CmsObject;
032import org.opencms.file.CmsPropertyDefinition;
033import org.opencms.file.CmsResource;
034import org.opencms.file.CmsResourceFilter;
035import org.opencms.file.types.CmsResourceTypeXmlContent;
036import org.opencms.file.types.CmsResourceTypeXmlPage;
037import org.opencms.i18n.CmsEncoder;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsLog;
040import org.opencms.main.OpenCms;
041import org.opencms.xml.CmsXmlEntityResolver;
042import org.opencms.xml.CmsXmlException;
043import org.opencms.xml.CmsXmlUtils;
044import org.opencms.xml.I_CmsXmlDocument;
045import org.opencms.xml.content.CmsXmlContentFactory;
046import org.opencms.xml.types.I_CmsXmlSchemaType;
047
048import java.io.UnsupportedEncodingException;
049import java.util.Locale;
050
051import javax.servlet.ServletRequest;
052
053import org.apache.commons.logging.Log;
054
055import org.dom4j.Document;
056import org.dom4j.DocumentHelper;
057import org.dom4j.Element;
058import org.xml.sax.EntityResolver;
059
060/**
061 * Provides factory methods to unmarshal (read) an XML page object.<p>
062 *
063 * @since 6.0.0
064 */
065public final class CmsXmlPageFactory {
066
067    /** The log object for this class. */
068    private static final Log LOG = CmsLog.getLog(CmsXmlPageFactory.class);
069
070    /**
071     * No instances of this class should be created.<p>
072     */
073    private CmsXmlPageFactory() {
074
075        // noop
076    }
077
078    /**
079     * Creates a valid XML page document,
080     * containing one empty element in the given locale.<p>
081     *
082     * @param locale the locale to create the XML page for
083     *
084     * @return a valid XML page document
085     */
086    public static Document createDocument(Locale locale) {
087
088        Document doc = DocumentHelper.createDocument();
089        Element pages = doc.addElement(CmsXmlPage.NODE_PAGES);
090        pages.add(I_CmsXmlSchemaType.XSI_NAMESPACE);
091        pages.addAttribute(
092            I_CmsXmlSchemaType.XSI_NAMESPACE_ATTRIBUTE_NO_SCHEMA_LOCATION,
093            CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID);
094
095        Element page = pages.addElement(CmsXmlPage.NODE_PAGE);
096        page.addAttribute(CmsXmlPage.ATTRIBUTE_LANGUAGE, locale.toString());
097
098        return doc;
099    }
100
101    /**
102     * Creates a valid XML page String representation,
103     * containing one empty element in the given locale.<p>
104     *
105     * @param locale the locale to create the XML page for
106     * @param encoding the encoding to use when creating the String from the XML
107     *
108     * @return a valid XML page document as a String
109     */
110    public static String createDocument(Locale locale, String encoding) {
111
112        try {
113            return CmsXmlUtils.marshal(createDocument(locale), encoding);
114        } catch (CmsXmlException e) {
115            // this should never happen
116            LOG.error(Messages.get().getBundle().key(Messages.ERR_XML_PAGE_FACT_CREATE_DOC_0), e);
117            return null;
118        }
119    }
120
121    /**
122     * Factory method to unmarshal (read) a XML page instance from a byte array
123     * that contains XML data.<p>
124     *
125     * When unmarshalling, the encoding is read directly from the XML header.
126     * The given encoding is used only when marshalling the XML again later.<p>
127     *
128     * @param xmlData the XML data in a byte array
129     * @param encoding the encoding to use when marshalling the XML page later
130     * @param resolver the XML entity resolver to use
131     *
132     * @return a XML page instance unmarshalled from the byte array
133     *
134     * @throws CmsXmlException if something goes wrong
135     */
136    public static CmsXmlPage unmarshal(byte[] xmlData, String encoding, EntityResolver resolver)
137    throws CmsXmlException {
138
139        return new CmsXmlPage(CmsXmlUtils.unmarshalHelper(xmlData, resolver), encoding);
140    }
141
142    /**
143     * Factory method to unmarshal (read) a XML page instance from a OpenCms VFS file
144     * that contains XML data.<p>
145     *
146     * @param cms the current cms object
147     * @param file the file with the XML data to unmarshal
148     *
149     * @return a XML page instance unmarshalled from the provided file
150     *
151     * @throws CmsXmlException if something goes wrong
152     */
153    public static CmsXmlPage unmarshal(CmsObject cms, CmsFile file) throws CmsXmlException {
154
155        return CmsXmlPageFactory.unmarshal(cms, file, true);
156    }
157
158    /**
159     * Factory method to unmarshal (read) a XML page instance from a OpenCms VFS file
160     * that contains XML data, using wither the encoding set
161     * in the XML file header, or the encoding set in the VFS file property.<p>
162     *
163     * If you are not sure about the implications of the encoding issues,
164     * use {@link #unmarshal(CmsObject, CmsFile)} instead.<p>
165     *
166     * @param cms the current OpenCms user context
167     * @param file the file with the XML data to unmarshal
168     * @param keepEncoding if true, the encoding spefified in the XML header is used,
169     *    otherwise the encoding from the VFS file property is used
170     *
171     * @return a XML page instance unmarshalled from the provided file
172     *
173     * @throws CmsXmlException if something goes wrong
174     */
175    public static CmsXmlPage unmarshal(CmsObject cms, CmsFile file, boolean keepEncoding) throws CmsXmlException {
176
177        byte[] content = file.getContents();
178
179        String fileName = cms.getSitePath(file);
180        boolean allowRelative = false;
181        try {
182            allowRelative = Boolean.valueOf(
183                cms.readPropertyObject(fileName, CmsXmlPage.PROPERTY_ALLOW_RELATIVE, false).getValue()).booleanValue();
184        } catch (CmsException e) {
185            // allowRelative will be false
186        }
187
188        String encoding = null;
189        try {
190            encoding = cms.readPropertyObject(
191                fileName,
192                CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING,
193                true).getValue();
194        } catch (CmsException e) {
195            // encoding will be null
196        }
197        if (encoding == null) {
198            encoding = OpenCms.getSystemInfo().getDefaultEncoding();
199        } else {
200            encoding = CmsEncoder.lookupEncoding(encoding, null);
201            if (encoding == null) {
202                throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_PAGE_FACT_INVALID_ENC_1, fileName));
203            }
204        }
205
206        CmsXmlPage newPage;
207        if (content.length > 0) {
208            // content is initialized
209            if (keepEncoding) {
210                // use the encoding from the content
211                newPage = unmarshal(content, encoding, new CmsXmlEntityResolver(cms));
212            } else {
213                // use the encoding from the file property
214                // this usually only triggered by a save operation
215                try {
216                    String contentStr = new String(content, encoding);
217                    newPage = unmarshal(contentStr, encoding, new CmsXmlEntityResolver(cms));
218                } catch (UnsupportedEncodingException e) {
219                    // this will not happen since the encodig has already been validated
220                    throw new CmsXmlException(
221                        Messages.get().container(Messages.ERR_XML_PAGE_FACT_INVALID_ENC_1, fileName),
222                        e);
223                }
224            }
225        } else {
226            // content is empty
227            newPage = new CmsXmlPage(cms.getRequestContext().getLocale(), encoding);
228        }
229
230        newPage.setFile(file);
231        newPage.setAllowRelativeLinks(allowRelative);
232
233        return newPage;
234    }
235
236    /**
237     * Factory method to unmarshal (read) a XML page instance from
238     * a resource, using the request attributes as cache.<p>
239     *
240     * @param cms the current OpenCms user context
241     * @param resource the resource to unmarshal
242     * @param req the current request
243     *
244     * @return the unmarshaled XML page, or null if the given resource was not of type {@link CmsResourceTypeXmlPage}
245     *
246     * @throws CmsException in something goes wrong
247     */
248    public static CmsXmlPage unmarshal(CmsObject cms, CmsResource resource, ServletRequest req) throws CmsException {
249
250        String rootPath = resource.getRootPath();
251
252        if (!CmsResourceTypeXmlPage.isXmlPage(resource)) {
253            // sanity check: resource must be of type XML page
254            throw new CmsXmlException(
255                Messages.get().container(Messages.ERR_XML_PAGE_FACT_NO_XMLPAGE_TYPE_1, cms.getSitePath(resource)));
256        }
257
258        // try to get the requested page form the current request attributes
259        CmsXmlPage page = (CmsXmlPage)req.getAttribute(rootPath);
260
261        if (page == null) {
262            // unmarshal XML structure from the file content
263            page = unmarshal(cms, cms.readFile(resource));
264            // store the page that was read as request attribute for future read requests
265            req.setAttribute(rootPath, page);
266        }
267
268        return page;
269    }
270
271    /**
272     * Factory method to unmarshal (read) a XML document instance from
273     * a filename in the VFS, using the request attributes as cache.<p>
274     *
275     * @param cms the current OpenCms user context
276     * @param filename the filename of the resource to unmarshal
277     * @param req the current request
278     *
279     * @return the unmarshaled XML document, or <code>null</code> if the given resource was not of type {@link I_CmsXmlDocument}
280     *
281     * @throws CmsException in something goes wrong
282     */
283    public static I_CmsXmlDocument unmarshal(CmsObject cms, String filename, ServletRequest req) throws CmsException {
284
285        // add site root to filename
286        String rootPath = cms.getRequestContext().addSiteRoot(filename);
287
288        // try to get the requested page form the current request attributes
289        I_CmsXmlDocument doc = (I_CmsXmlDocument)req.getAttribute(rootPath);
290
291        if (doc != null) {
292            return doc;
293        }
294
295        // always use "ignore expiration" filter, date validity must be checked before calling this if required
296        CmsFile file = cms.readFile(filename, CmsResourceFilter.IGNORE_EXPIRATION);
297
298        if (CmsResourceTypeXmlPage.isXmlPage(file)) {
299            // file is of type XML page
300            doc = CmsXmlPageFactory.unmarshal(cms, file);
301        } else if (CmsResourceTypeXmlContent.isXmlContent(file)) {
302            // file is of type XML content
303            doc = CmsXmlContentFactory.unmarshal(cms, file);
304        } else {
305            // sanity check: file type not an A_CmsXmlDocument
306            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_PAGE_FACT_NO_XML_DOCUMENT_1, file));
307        }
308
309        // store the page that was read as request attribute for future read requests
310        req.setAttribute(rootPath, doc);
311
312        return doc;
313    }
314
315    /**
316     * Factory method to unmarshal (read) a XML page instance from a String
317     * that contains XML data.<p>
318     *
319     * When unmarshalling, the encoding is read directly from the XML header.
320     * The given encoding is used only when marshalling the XML again later.<p>
321     *
322     * @param xmlData the XML data in a String
323     * @param encoding the encoding to use when marshalling the XML page later
324     * @param resolver the XML entity resolver to use
325     *
326     * @return a XML page instance unmarshalled from the String
327     *
328     * @throws CmsXmlException if something goes wrong
329     */
330    public static CmsXmlPage unmarshal(String xmlData, String encoding, EntityResolver resolver)
331    throws CmsXmlException {
332
333        return new CmsXmlPage(CmsXmlUtils.unmarshalHelper(xmlData, resolver), encoding);
334    }
335}