001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.configuration.CmsConfigurationManager;
031import org.opencms.db.CmsDriverManager;
032import org.opencms.db.CmsPublishedResource;
033import org.opencms.file.CmsFile;
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsResource;
036import org.opencms.file.CmsResourceFilter;
037import org.opencms.main.CmsEvent;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsLog;
040import org.opencms.main.I_CmsEventListener;
041import org.opencms.main.OpenCms;
042import org.opencms.monitor.CmsMemoryMonitor;
043import org.opencms.util.CmsCollectionsGenericWrapper;
044import org.opencms.util.CmsFileUtil;
045import org.opencms.util.CmsUUID;
046import org.opencms.xml.page.CmsXmlPage;
047
048import java.io.ByteArrayInputStream;
049import java.io.IOException;
050import java.io.InputStream;
051import java.util.List;
052import java.util.Map;
053import java.util.concurrent.ConcurrentHashMap;
054
055import org.apache.commons.logging.Log;
056
057import org.xml.sax.EntityResolver;
058import org.xml.sax.InputSource;
059
060/**
061 * Resolves XML entities (e.g. external DTDs) in the OpenCms VFS.<p>
062 *
063 * Also provides a cache for XML content schema definitions.<p>
064 *
065 * @since 6.0.0
066 */
067public class CmsXmlEntityResolver implements EntityResolver, I_CmsEventListener {
068
069    /** Maximum size of the content definition cache. */
070    public static final int CONTENT_DEFINITION_CACHE_SIZE = 2048;
071
072    /** Scheme for files which should be retrieved from the classpath. */
073    public static final String INTERNAL_SCHEME = "internal://";
074
075    /** The scheme to identify a file in the OpenCms VFS. */
076    public static final String OPENCMS_SCHEME = "opencms://";
077
078    /**
079     * A list of string pairs used to translate legacy system ids to a new form. The first component of each pair
080     * is the prefix which should be replaced by the second component of that pair.
081     */
082    private static final String[][] LEGACY_TRANSLATIONS = {
083        {"opencms://system/modules/org.opencms.ade.config/schemas/", "internal://org/opencms/xml/adeconfig/"},
084        {
085            "opencms://system/modules/org.opencms.ade.containerpage/schemas/",
086            "internal://org/opencms/xml/containerpage/"},
087        {"opencms://system/modules/org.opencms.ade.sitemap/schemas/", "internal://org/opencms/xml/adeconfig/sitemap/"},
088        {"opencms://system/modules/org.opencms.ugc/schemas/", "internal://org/opencms/ugc/"},
089        {"opencms://system/modules/org.opencms.jsp.search/schemas/", "internal://org/opencms/jsp/search/"}
090
091    };
092
093    /** The log object for this class. */
094    private static final Log LOG = CmsLog.getLog(CmsXmlEntityResolver.class);
095
096    /** A temporary cache for XML content definitions. */
097    private static Map<String, CmsXmlContentDefinition> m_cacheContentDefinitions;
098
099    /** A permanent cache to avoid multiple readings of often used files from the VFS. */
100    private static Map<String, byte[]> m_cachePermanent;
101
102    /** A temporary cache to avoid multiple readings of often used files from the VFS. */
103    private static Map<String, byte[]> m_cacheTemporary;
104
105    /** The location of the XML page XML schema. */
106    private static final String XMLPAGE_OLD_DTD_LOCATION = "org/opencms/xml/page/xmlpage.dtd";
107
108    /** The (old) DTD address of the OpenCms xmlpage (used in 5.3.5). */
109    private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_1 = "http://www.opencms.org/dtd/6.0/xmlpage.dtd";
110
111    /** The (old) DTD address of the OpenCms xmlpage (used until 5.3.5). */
112    private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_2 = "/system/shared/page.dtd";
113
114    /** The location of the xmlpage XSD. */
115    private static final String XMLPAGE_XSD_LOCATION = "org/opencms/xml/page/xmlpage.xsd";
116
117    /** The cms object to use for VFS access (will be initialized with "Guest" permissions). */
118    private CmsObject m_cms;
119
120    /**
121     * Creates a new XML entity resolver based on the provided CmsObject.<p>
122     *
123     * If the provided CmsObject is null, then the OpenCms VFS is not
124     * searched for XML entities, however the internal cache and
125     * other OpenCms internal entities not in the VFS are still resolved.<p>
126     *
127     * @param cms the cms context to use for resolving XML files from the OpenCms VFS
128     */
129    public CmsXmlEntityResolver(CmsObject cms) {
130
131        initCaches();
132        m_cms = cms;
133    }
134
135    /**
136     * Adds a system ID URL to to internal permanent cache.<p>
137     *
138     * This cache will NOT be cleared automatically.<p>
139     *
140     * @param systemId the system ID to add
141     * @param content the content of the system id
142     */
143    public static void cacheSystemId(String systemId, byte[] content) {
144
145        initCaches();
146        m_cachePermanent.put(systemId, content);
147    }
148
149    /**
150     * Checks if a given system ID URL is in the internal permanent cache.<p>
151     *
152     * This check is required to see if a XML content is based on a file that actually exists in the OpenCms VFS,
153     * or if the schema has been just cached without a VFS file.<p>
154     *
155     * @param systemId the system id ID check
156     *
157     * @return <code>true</code> if the system ID is in the internal permanent cache, <code>false</code> otherwise
158     */
159    public static boolean isCachedSystemId(String systemId) {
160
161        if (m_cachePermanent != null) {
162            return m_cachePermanent.containsKey(systemId);
163        }
164        return false;
165    }
166
167    /**
168     * Checks whether the given schema id is an internal schema id or is translated to an internal schema id.<p>
169     * @param schema the schema id
170     * @return true if the given schema id is an internal schema id or translated to an internal schema id
171     */
172    public static boolean isInternalId(String schema) {
173
174        String translatedId = translateLegacySystemId(schema);
175        if (translatedId.startsWith(INTERNAL_SCHEME)) {
176            return true;
177        }
178        return false;
179    }
180
181    /**
182     * Initialize the OpenCms XML entity resolver.<p>
183     *
184     * @param adminCms an initialized OpenCms user context with "Administrator" role permissions
185     * @param typeSchemaBytes the base widget type XML schema definitions
186     *
187     * @see CmsXmlContentTypeManager#initialize(CmsObject)
188     */
189    protected static void initialize(CmsObject adminCms, byte[] typeSchemaBytes) {
190
191        // create the resolver to register as event listener
192        CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(adminCms);
193
194        // register this object as event listener
195        OpenCms.addCmsEventListener(
196            resolver,
197            new int[] {
198                I_CmsEventListener.EVENT_CLEAR_CACHES,
199                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
200                I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
201                I_CmsEventListener.EVENT_RESOURCE_MOVED,
202                I_CmsEventListener.EVENT_RESOURCE_DELETED});
203
204        // cache the base widget type XML schema definitions
205        cacheSystemId(CmsXmlContentDefinition.XSD_INCLUDE_OPENCMS, typeSchemaBytes);
206    }
207
208    /**
209     * Initializes the internal caches for permanent and temporary system IDs.<p>
210     */
211    private static void initCaches() {
212
213        if (m_cacheTemporary == null) {
214            m_cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(1024);
215
216            m_cachePermanent = new ConcurrentHashMap<String, byte[]>(32);
217
218            m_cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap(CONTENT_DEFINITION_CACHE_SIZE);
219        }
220        if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_1_CORE_OBJECT) {
221            if ((OpenCms.getMemoryMonitor() != null)
222                && !OpenCms.getMemoryMonitor().isMonitoring(CmsXmlEntityResolver.class.getName() + ".cacheTemporary")) {
223                // reinitialize the caches after the memory monitor is set up
224                Map<String, byte[]> cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(128);
225                cacheTemporary.putAll(m_cacheTemporary);
226                m_cacheTemporary = cacheTemporary;
227                OpenCms.getMemoryMonitor().register(
228                    CmsXmlEntityResolver.class.getName() + ".cacheTemporary",
229                    cacheTemporary);
230
231                Map<String, byte[]> cachePermanent = new ConcurrentHashMap<String, byte[]>(32);
232                cachePermanent.putAll(m_cachePermanent);
233                m_cachePermanent = cachePermanent;
234                OpenCms.getMemoryMonitor().register(
235                    CmsXmlEntityResolver.class.getName() + ".cachePermanent",
236                    cachePermanent);
237
238                Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap(
239                    CONTENT_DEFINITION_CACHE_SIZE);
240                cacheContentDefinitions.putAll(m_cacheContentDefinitions);
241                m_cacheContentDefinitions = cacheContentDefinitions;
242                OpenCms.getMemoryMonitor().register(
243                    CmsXmlEntityResolver.class.getName() + ".cacheContentDefinitions",
244                    cacheContentDefinitions);
245            }
246        }
247    }
248
249    /**
250     * Translates a legacy system id to a new form.<p>
251     *
252     * @param systemId the original system id
253     * @return the new system id
254     */
255    private static String translateLegacySystemId(String systemId) {
256
257        String result = systemId;
258        for (String[] translation : LEGACY_TRANSLATIONS) {
259            if (systemId.startsWith(translation[0])) {
260                // replace prefix with second component if it matches the first component
261                result = translation[1] + systemId.substring(translation[0].length());
262                break;
263            }
264        }
265        if (OpenCms.getRepositoryManager() != null) {
266            result = OpenCms.getResourceManager().getXsdTranslator().translateResource(result);
267        }
268        return result;
269    }
270
271    /**
272     * Caches an XML content definition based on the given system id and the online / offline status
273     * of this entity resolver instance.<p>
274     *
275     * @param systemId the system id to use as cache key
276     * @param contentDefinition the content definition to cache
277     */
278    public void cacheContentDefinition(String systemId, CmsXmlContentDefinition contentDefinition) {
279
280        String cacheKey = getCacheKeyForCurrentProject(systemId);
281        m_cacheContentDefinitions.put(cacheKey, contentDefinition);
282        if (LOG.isDebugEnabled()) {
283            LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYSTEM_ID_1, cacheKey));
284        }
285    }
286
287    /**
288     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
289     */
290    public void cmsEvent(CmsEvent event) {
291
292        CmsResource resource;
293        switch (event.getType()) {
294            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
295                // only flush cache if a schema definition where published
296                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
297                if (isSchemaDefinitionInPublishList(publishHistoryId)) {
298                    m_cacheTemporary.clear();
299                    m_cacheContentDefinitions.clear();
300                    if (LOG.isDebugEnabled()) {
301                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0));
302                    }
303                }
304                break;
305            case I_CmsEventListener.EVENT_CLEAR_CACHES:
306                // flush cache
307                m_cacheTemporary.clear();
308                m_cacheContentDefinitions.clear();
309                if (LOG.isDebugEnabled()) {
310                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0));
311                }
312                break;
313            case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
314                Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
315                if ((change != null) && change.equals(Integer.valueOf(CmsDriverManager.NOTHING_CHANGED))) {
316                    // skip lock & unlock
317                    return;
318                }
319                resource = (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE);
320                uncacheSystemId(resource.getRootPath());
321                break;
322            case I_CmsEventListener.EVENT_RESOURCE_DELETED:
323            case I_CmsEventListener.EVENT_RESOURCE_MOVED:
324                List<CmsResource> resources = CmsCollectionsGenericWrapper.list(
325                    event.getData().get(I_CmsEventListener.KEY_RESOURCES));
326                for (int i = 0; i < resources.size(); i++) {
327                    resource = resources.get(i);
328                    uncacheSystemId(resource.getRootPath());
329                }
330                break;
331            default:
332                // no operation
333        }
334    }
335
336    /**
337     * Looks up the given XML content definition system id in the internal content definition cache.<p>
338     *
339     * @param systemId the system id of the XML content definition to look up
340     *
341     * @return the XML content definition found, or null if no definition is cached for the given system id
342     */
343    public CmsXmlContentDefinition getCachedContentDefinition(String systemId) {
344
345        String cacheKey = getCacheKeyForCurrentProject(systemId);
346        CmsXmlContentDefinition result = m_cacheContentDefinitions.get(cacheKey);
347        if ((result != null) && LOG.isDebugEnabled()) {
348            LOG.debug(Messages.get().getBundle().key(Messages.LOG_CACHE_LOOKUP_SUCCEEDED_1, cacheKey));
349        }
350        return result;
351    }
352
353    /**
354     * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String)
355     */
356    public InputSource resolveEntity(String publicId, String systemId) throws IOException {
357
358        // lookup the system id caches first
359        byte[] content;
360        systemId = translateLegacySystemId(systemId);
361        content = m_cachePermanent.get(systemId);
362        if (content != null) {
363            // permanent cache contains system id
364            return createInputSource(content, systemId);
365        } else if (systemId.equals(CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID)) {
366
367            // XML page XSD reference
368            try (InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_XSD_LOCATION)) {
369                content = CmsFileUtil.readFully(stream);
370                // cache the XML page DTD
371                m_cachePermanent.put(systemId, content);
372                return createInputSource(content, systemId);
373            } catch (Throwable t) {
374                LOG.error(
375                    Messages.get().getBundle().key(Messages.LOG_XMLPAGE_XSD_NOT_FOUND_1, XMLPAGE_XSD_LOCATION),
376                    t);
377            }
378
379        } else if (systemId.equals(XMLPAGE_OLD_DTD_SYSTEM_ID_1) || systemId.endsWith(XMLPAGE_OLD_DTD_SYSTEM_ID_2)) {
380
381            // XML page DTD reference
382            try (InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_OLD_DTD_LOCATION)) {
383                // cache the XML page DTD
384                content = CmsFileUtil.readFully(stream);
385                m_cachePermanent.put(systemId, content);
386                return createInputSource(content, systemId);
387            } catch (Throwable t) {
388                LOG.error(
389                    Messages.get().getBundle().key(Messages.LOG_XMLPAGE_DTD_NOT_FOUND_1, XMLPAGE_OLD_DTD_LOCATION),
390                    t);
391            }
392        } else if ((m_cms != null) && systemId.startsWith(OPENCMS_SCHEME)) {
393
394            // opencms:// VFS reference
395            String cacheSystemId = systemId.substring(OPENCMS_SCHEME.length() - 1);
396            String cacheKey = getCacheKey(
397                cacheSystemId,
398                m_cms.getRequestContext().getCurrentProject().isOnlineProject());
399            // look up temporary cache
400            content = m_cacheTemporary.get(cacheKey);
401            if (content != null) {
402                return createInputSource(content, systemId);
403            }
404            String storedSiteRoot = m_cms.getRequestContext().getSiteRoot();
405            try {
406                // content not cached, read from VFS
407                m_cms.getRequestContext().setSiteRoot("/");
408                CmsFile file = m_cms.readFile(cacheSystemId, CmsResourceFilter.IGNORE_EXPIRATION);
409                content = file.getContents();
410                // store content in cache
411                m_cacheTemporary.put(cacheKey, content);
412                if (LOG.isDebugEnabled()) {
413                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYS_ID_1, cacheKey));
414                }
415                return createInputSource(content, systemId);
416            } catch (CmsException e) {
417                throw new IOException(
418                    Messages.get().getBundle().key(Messages.LOG_ENTITY_RESOLVE_FAILED_1, systemId),
419                    e);
420            } finally {
421                m_cms.getRequestContext().setSiteRoot(storedSiteRoot);
422            }
423
424        } else if (systemId.startsWith(INTERNAL_SCHEME)) {
425            String location = systemId.substring(INTERNAL_SCHEME.length());
426            try (InputStream stream = getClass().getClassLoader().getResourceAsStream(location)) {
427                content = CmsFileUtil.readFully(stream);
428                m_cachePermanent.put(systemId, content);
429                return createInputSource(content, systemId);
430            } catch (Throwable t) {
431                LOG.error(t.getLocalizedMessage(), t);
432            }
433
434        } else if (systemId.substring(0, systemId.lastIndexOf("/") + 1).equalsIgnoreCase(
435            CmsConfigurationManager.DEFAULT_DTD_PREFIX)//
436        ) {
437            // default DTD location in the org.opencms.configuration package
438            String location = null;
439            try {
440                String dtdFilename = systemId.substring(systemId.lastIndexOf("/") + 1);
441                location = CmsConfigurationManager.DEFAULT_DTD_LOCATION + dtdFilename;
442                InputStream stream = getClass().getClassLoader().getResourceAsStream(location);
443                content = CmsFileUtil.readFully(stream);
444                // cache the DTD
445                m_cachePermanent.put(systemId, content);
446                return createInputSource(content, systemId);
447            } catch (Throwable t) {
448                LOG.error(Messages.get().getBundle().key(Messages.LOG_DTD_NOT_FOUND_1, location), t);
449            }
450        }
451        LOG.error("Entity reference not allowed: " + systemId, new IOException());
452        throw new IOException("Entity reference not allowed (see log for details)");
453    }
454
455    /**
456     * Removes a cached entry for a system id (filename) from the internal offline temporary and content definition caches.<p>
457     *
458     * The online resources cached for the online project are only flushed when a project is published.<p>
459     *
460     * @param systemId the system id (filename) to remove from the cache
461     */
462    public void uncacheSystemId(String systemId) {
463
464        Object o;
465        o = m_cacheTemporary.remove(getCacheKey(systemId, false));
466        if (null != o) {
467            // if an object was removed from the temporary cache, all XML content definitions must be cleared
468            // because this may be a nested subschema
469            m_cacheContentDefinitions.clear();
470            if (LOG.isDebugEnabled()) {
471                LOG.debug(
472                    Messages.get().getBundle().key(Messages.LOG_ERR_UNCACHED_SYS_ID_1, getCacheKey(systemId, false)));
473            }
474        } else {
475            // check if a cached content definition has to be removed based on the system id
476            o = m_cacheContentDefinitions.remove(getCacheKey(systemId, false));
477            if ((null != o) && LOG.isDebugEnabled()) {
478                LOG.debug(
479                    Messages.get().getBundle().key(
480                        Messages.LOG_ERR_UNCACHED_CONTENT_DEF_1,
481                        getCacheKey(systemId, false)));
482            }
483        }
484    }
485
486    /**
487     * Creates an input source for the given byte data and system id.<p>
488     *
489     * @param data the data which the input source should return
490     * @param systemId the system id for the input source
491     *
492     * @return the input source
493     */
494    InputSource createInputSource(byte[] data, String systemId) {
495
496        InputSource result = new InputSource(new ByteArrayInputStream(data));
497        result.setSystemId(systemId);
498        return result;
499    }
500
501    /**
502     * Returns a cache key for the given system id (filename) based on the status
503     * of the given project flag.<p>
504     *
505     * @param systemId the system id (filename) to get the cache key for
506     * @param online indicates if this key is generated for the online project
507     *
508     * @return the cache key for the system id
509     */
510    private String getCacheKey(String systemId, boolean online) {
511
512        if (online) {
513            return "online_".concat(systemId);
514        }
515        return "offline_".concat(systemId);
516    }
517
518    /**
519     * Returns a cache key for the given system id (filename) based on the status
520     * of the internal CmsObject.<p>
521     *
522     * @param systemId the system id (filename) to get the cache key for
523     *
524     * @return the cache key for the system id
525     */
526    private String getCacheKeyForCurrentProject(String systemId) {
527
528        // check the project
529        boolean project = (m_cms != null) ? m_cms.getRequestContext().getCurrentProject().isOnlineProject() : false;
530
531        // remove opencms:// prefix
532        if (systemId.startsWith(OPENCMS_SCHEME)) {
533            systemId = systemId.substring(OPENCMS_SCHEME.length() - 1);
534        }
535
536        return getCacheKey(systemId, project);
537    }
538
539    /**
540     * Proves if there is at least one xsd or dtd file in the list of resources to publish.<p>
541     *
542     * @param publishHistoryId the publish history id
543     *
544     * @return true, if there is at least one xsd or dtd file in the list of resources to publish, otherwise false
545     */
546    private boolean isSchemaDefinitionInPublishList(CmsUUID publishHistoryId) {
547
548        if (m_cms == null) {
549            // CmsObject not available, assume there may be a schema definition in the publish history
550            return true;
551        }
552        try {
553            List<CmsPublishedResource> publishedResources = m_cms.readPublishedResources(publishHistoryId);
554            for (CmsPublishedResource cmsPublishedResource : publishedResources) {
555                String resourceRootPath = cmsPublishedResource.getRootPath();
556                String resourceRootPathLowerCase = resourceRootPath.toLowerCase();
557                if (resourceRootPathLowerCase.endsWith(".xsd")
558                    || resourceRootPathLowerCase.endsWith(".dtd")
559                    || m_cacheTemporary.containsKey(getCacheKey(resourceRootPath, true))) {
560                    return true;
561                }
562            }
563        } catch (CmsException e) {
564            // error reading published Resources.
565            LOG.warn(e.getMessage(), e);
566        }
567        return false;
568    }
569}