001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.db.CmsPublishList;
031import org.opencms.db.CmsResourceState;
032import org.opencms.db.I_CmsProjectDriver;
033import org.opencms.file.CmsFile;
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsProject;
036import org.opencms.file.CmsResource;
037import org.opencms.file.CmsResourceFilter;
038import org.opencms.i18n.CmsMessageContainer;
039import org.opencms.lock.CmsLockUtil;
040import org.opencms.main.CmsException;
041import org.opencms.main.CmsLog;
042import org.opencms.main.I_CmsEventListener;
043import org.opencms.main.OpenCms;
044import org.opencms.report.I_CmsReport;
045import org.opencms.security.CmsRole;
046import org.opencms.util.CmsFileUtil;
047
048import java.io.ByteArrayInputStream;
049import java.io.ByteArrayOutputStream;
050import java.io.IOException;
051import java.io.InputStream;
052import java.util.ArrayList;
053import java.util.List;
054
055import javax.xml.parsers.DocumentBuilder;
056import javax.xml.parsers.DocumentBuilderFactory;
057import javax.xml.transform.Transformer;
058import javax.xml.transform.TransformerException;
059import javax.xml.transform.TransformerFactory;
060import javax.xml.transform.stream.StreamResult;
061import javax.xml.transform.stream.StreamSource;
062
063import org.apache.commons.logging.Log;
064
065import org.w3c.dom.Document;
066
067/**
068 * Transforms all resources of a given type by
069 */
070public class CmsXmlFileTransformer {
071
072    /** Logger instance for this class. */
073    private static final Log LOG = CmsLog.getLog(CmsXmlFileTransformer.class);
074
075    /** The CmsObject for working on offline resources. */
076    private CmsObject m_offlineCms;
077
078    /** The CmsObject for working on online resources. */
079    private CmsObject m_onlineCms;
080
081    /** The path. */
082    private String m_path;
083
084    /** The type name. */
085    private String m_type;
086
087    /** The bytes of the XSL transformation. */
088    private byte[] m_xslt;
089
090    /** The transformer factory. */
091    private TransformerFactory m_transformerFactory;
092
093    /** The report to write to. */
094    private I_CmsReport m_report;
095
096    /** The origin of the XSL transform. */
097    private String m_xslName;
098
099    /**
100     * Creates a new instance.
101     *
102     * @param cms the CMS context
103     * @param path the ancestor folder under which files should be processed
104     * @param type the resource type which should be processed
105     * @param xslName a string containing information about where the XSL transform is coming from
106     * @param xslStream the stream to read the XSL transformation from
107     * @param report the report to write to
108     *
109     * @throws CmsException if something goes wrong
110     * @throws IOException if an IO error occurs
111     */
112    public CmsXmlFileTransformer(
113        CmsObject cms,
114        String path,
115        String type,
116        String xslName,
117        InputStream xslStream,
118        I_CmsReport report)
119    throws CmsException, IOException {
120
121        OpenCms.getRoleManager().checkRole(cms, CmsRole.ROOT_ADMIN);
122        m_xslName = xslName;
123        m_transformerFactory = TransformerFactory.newInstance();
124        m_offlineCms = OpenCms.initCmsObject(cms);
125        m_offlineCms.getRequestContext().setSiteRoot("");
126        m_onlineCms = OpenCms.initCmsObject(cms);
127        m_onlineCms.getRequestContext().setSiteRoot("");
128        m_offlineCms.getRequestContext().setCurrentProject(getTempfileProject(cms));
129        m_onlineCms.getRequestContext().setCurrentProject(cms.readProject(CmsProject.ONLINE_PROJECT_NAME));
130        m_path = OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(path);
131        m_type = type;
132        m_xslt = CmsFileUtil.readFully(xslStream);
133        m_report = report;
134    }
135
136    /**
137     * Performs the transformation on all resources of the configured type.
138     *
139     * @throws CmsException if something goes wrong
140     */
141    public void run() throws CmsException {
142
143        m_report.println(message("XSL transform: " + m_xslName));
144        m_report.println(message("Path: " + m_path));
145        m_report.println(message("Type: " + m_type));
146        try {
147            List<CmsResource> resources = m_offlineCms.readResources(
148                m_path,
149                CmsResourceFilter.ALL.addRequireType(OpenCms.getResourceManager().getResourceType(m_type)),
150                true);
151            processResources(resources);
152            OpenCms.getEventManager().fireEvent(I_CmsEventListener.EVENT_CLEAR_CACHES);
153        } catch (CmsException e) {
154            m_report.println(e);
155            throw e;
156        }
157    }
158
159    /**
160     * Gets the online path for the resource.
161     *
162     * @param res the resource
163     * @return the online path
164     * @throws CmsException if something goes wrong
165     */
166    private String getOnlinePath(CmsResource res) throws CmsException {
167
168        return m_onlineCms.readResource(res.getStructureId(), CmsResourceFilter.ALL).getRootPath();
169    }
170
171    /**
172     * Gets the temporary project.
173     *
174     * @param cms the current CMS context
175     * @return the temporary project
176     * @throws CmsException if something goes wrong
177     */
178    private CmsProject getTempfileProject(CmsObject cms) throws CmsException {
179
180        try {
181            return cms.readProject(I_CmsProjectDriver.TEMP_FILE_PROJECT_NAME);
182        } catch (CmsException e) {
183            return cms.createTempfileProject();
184        }
185    }
186
187    /**
188     * Helper for creating a message container from a literal string message string.
189     *
190     * @param content the message string
191     * @return the message container
192     */
193    private CmsMessageContainer message(String content) {
194
195        content = CmsXmlFileTransformer.class.getSimpleName() + ": " + content;
196        return org.opencms.report.Messages.get().container(org.opencms.report.Messages.RPT_GENERIC_1, content);
197    }
198
199    /**
200     * Checks if we need to update the content.
201     *
202     * @param oldContent the old content
203     * @param content the new content
204     *
205     * @return true if we need to update the content
206     */
207    private boolean needToUpdate(byte[] oldContent, byte[] content) {
208
209        if (content == null) {
210            return false;
211        }
212        if (sameXml(oldContent, content)) {
213            return false;
214        }
215        return true;
216
217    }
218
219    /**
220     * Processes the list of resources.
221     *
222     * @param resources the resources to process
223     */
224    private void processResources(List<CmsResource> resources) {
225
226        List<String> changedPaths = new ArrayList<>();
227        for (CmsResource resource : resources) {
228            boolean changed = false;
229            CmsResourceState state = resource.getState();
230            m_report.println(message("Processing " + resource.getRootPath()));
231            try (AutoCloseable c = CmsLockUtil.withLockedResources(m_offlineCms, resource)) {
232                if (state.isNew()) {
233                    byte[] content = readOfflineContent(resource);
234                    byte[] newContent = transformContent(content);
235                    if (needToUpdate(content, newContent)) {
236                        changed = true;
237                        writeContent(resource, newContent);
238                    }
239                } else if (state.isUnchanged()) {
240                    if (!resource.getRootPath().equals(getOnlinePath(resource))) {
241                        m_report.println(
242                            message("Warning: Skipping " + resource.getRootPath() + " because of path inconsistency."));
243                        continue;
244                    }
245                    byte[] content = readOfflineContent(resource);
246                    byte[] newContent = transformContent(content);
247                    if (needToUpdate(content, newContent)) {
248                        changed = true;
249                        writeContent(resource, newContent);
250                        publishFile(resource);
251                    }
252                } else if (state.isDeleted()) {
253                    m_report.println(message("Skipping " + resource.getRootPath() + " because it is deleted."));
254                } else if (state.isChanged()) {
255                    if (!resource.getRootPath().equals(getOnlinePath(resource))) {
256                        byte[] content = readOfflineContent(resource);
257                        byte[] newContent = transformContent(content);
258                        if (needToUpdate(content, newContent)) {
259                            changed = true;
260                            writeContent(resource, newContent);
261                        }
262                        m_report.println(
263                            message("Warning: Not publishing " + resource.getRootPath() + " because it is moved."));
264                    } else {
265                        byte[] offlineContent = readOfflineContent(resource);
266                        byte[] onlineContent = readOnlineContent(resource);
267                        byte[] newOfflineContent = transformContent(offlineContent);
268                        byte[] newOnlineContent = transformContent(onlineContent);
269                        if (needToUpdate(offlineContent, newOfflineContent)
270                            || needToUpdate(onlineContent, newOnlineContent)) {
271                            changed = true;
272                            if (newOfflineContent == null) {
273                                // the case where the onlne transformation works and actually changes something,
274                                // but transforming the offline content fails for some reason
275                                newOfflineContent = offlineContent;
276                            }
277                            try {
278                                writeContent(resource, newOnlineContent);
279                                publishFile(resource);
280                            } finally {
281                                // Put this in a finally block so we write back the offline content even if the preceding step fails
282                                if (m_offlineCms.getLock(resource).isUnlocked()) {
283                                    m_offlineCms.lockResourceTemporary(resource);
284                                }
285                                writeContent(resource, newOfflineContent);
286                            }
287                        }
288                    }
289                }
290            } catch (Exception e) {
291                LOG.error(e.getLocalizedMessage(), e);
292                m_report.println(e);
293            } finally {
294                if (changed) {
295                    changedPaths.add(resource.getRootPath());
296                }
297            }
298        }
299        m_report.println();
300        m_report.println(message("Summary of changed resources: "));
301        m_report.println();
302        for (String path : changedPaths) {
303            m_report.println(message(path));
304        }
305    }
306
307    /**
308     * Publishes a single file.
309     *
310     * @param resource the resource to publish
311     * @throws CmsException if something goes wrong
312     */
313    private void publishFile(CmsResource resource) throws CmsException {
314
315        CmsPublishList pubList = OpenCms.getPublishManager().getPublishList(
316            m_offlineCms,
317            m_offlineCms.readResource(resource.getStructureId(), CmsResourceFilter.ALL),
318            false);
319        OpenCms.getPublishManager().publishProject(m_offlineCms, m_report, pubList);
320        OpenCms.getPublishManager().waitWhileRunning();
321
322    }
323
324    /**
325     * Reads the offline contents of a resource.
326     *
327     * @param res the resource
328     * @return the offline contents
329     *
330     * @throws CmsException if something goes wrong
331     */
332    private byte[] readOfflineContent(CmsResource res) throws CmsException {
333
334        return m_offlineCms.readFile(res).getContents();
335    }
336
337    /**
338     * Reads the online contents of a resource.
339     *
340     * @param res the resource
341     * @return the online contents
342     *
343     * @throws CmsException if something goes wrong
344     */
345    private byte[] readOnlineContent(CmsResource res) throws CmsException {
346
347        return m_onlineCms.readFile(
348            m_onlineCms.readResource(res.getStructureId(), CmsResourceFilter.ALL)).getContents();
349    }
350
351    /**
352     * Lenient XML comparison that ignores distinctions like CDATA vs normal text nodes and superfluous whitespace.
353     *
354     * @param xml1 the bytes of the first XML document
355     * @param xml2 the bytes of the second XML document
356     * @return true if the XML is equivalent
357     *
358     */
359    private boolean sameXml(byte[] xml1, byte[] xml2) {
360
361        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
362        dbf.setNamespaceAware(true);
363        dbf.setCoalescing(true);
364        dbf.setIgnoringElementContentWhitespace(true);
365        dbf.setIgnoringComments(true);
366        try {
367            DocumentBuilder db = dbf.newDocumentBuilder();
368            db.setEntityResolver(new CmsXmlEntityResolver(m_offlineCms));
369            Document doc1 = db.parse(new ByteArrayInputStream(xml1));
370            doc1.normalizeDocument();
371            Document doc2 = db.parse(new ByteArrayInputStream(xml2));
372            doc2.normalizeDocument();
373            return doc1.isEqualNode(doc2);
374        } catch (Exception e) {
375            LOG.error(e.getLocalizedMessage(), e);
376            m_report.println(e);
377            return false;
378        }
379
380    }
381
382    /**
383     * Transforms the content using hte XSL transformation.
384     *
385     * @param content the content bytes
386     * @return the transformed contents
387     *
388     * @throws TransformerException if something goes wrong with the XSL transformation
389     */
390    private byte[] transformContent(byte[] content) throws TransformerException {
391
392        Transformer transformer = m_transformerFactory.newTransformer(
393            new StreamSource(new ByteArrayInputStream(m_xslt)));
394        ByteArrayOutputStream baos = new ByteArrayOutputStream();
395        transformer.transform(new StreamSource(new ByteArrayInputStream(content)), new StreamResult(baos));
396        byte[] result = baos.toByteArray();
397        return result;
398
399    }
400
401    /**
402     * Writes the content back to the given file.
403     *
404     * @param res the resource to write
405     * @param content the content to write to the resource
406     *
407     * @return true if the content was updated
408     *
409     * @throws CmsException if something goes wrong
410     */
411    private boolean writeContent(CmsResource res, byte[] content) throws CmsException {
412
413        CmsFile file = m_offlineCms.readFile(res);
414        file.setContents(content);
415        m_offlineCms.writeFile(file);
416        return true;
417
418    }
419
420}