001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.configuration.CmsConfigurationManager; 031import org.opencms.db.CmsDriverManager; 032import org.opencms.db.CmsPublishedResource; 033import org.opencms.file.CmsFile; 034import org.opencms.file.CmsObject; 035import org.opencms.file.CmsResource; 036import org.opencms.file.CmsResourceFilter; 037import org.opencms.main.CmsEvent; 038import org.opencms.main.CmsException; 039import org.opencms.main.CmsLog; 040import org.opencms.main.I_CmsEventListener; 041import org.opencms.main.OpenCms; 042import org.opencms.monitor.CmsMemoryMonitor; 043import org.opencms.util.CmsCollectionsGenericWrapper; 044import org.opencms.util.CmsFileUtil; 045import org.opencms.util.CmsUUID; 046import org.opencms.xml.page.CmsXmlPage; 047 048import java.io.ByteArrayInputStream; 049import java.io.IOException; 050import java.io.InputStream; 051import java.util.List; 052import java.util.Map; 053import java.util.concurrent.ConcurrentHashMap; 054 055import org.apache.commons.logging.Log; 056 057import org.xml.sax.EntityResolver; 058import org.xml.sax.InputSource; 059 060/** 061 * Resolves XML entities (e.g. external DTDs) in the OpenCms VFS.<p> 062 * 063 * Also provides a cache for XML content schema definitions.<p> 064 * 065 * @since 6.0.0 066 */ 067public class CmsXmlEntityResolver implements EntityResolver, I_CmsEventListener { 068 069 /** Maximum size of the content definition cache. */ 070 public static final int CONTENT_DEFINITION_CACHE_SIZE = 2048; 071 072 /** Scheme for files which should be retrieved from the classpath. */ 073 public static final String INTERNAL_SCHEME = "internal://"; 074 075 /** The scheme to identify a file in the OpenCms VFS. */ 076 public static final String OPENCMS_SCHEME = "opencms://"; 077 078 /** 079 * A list of string pairs used to translate legacy system ids to a new form. The first component of each pair 080 * is the prefix which should be replaced by the second component of that pair. 081 */ 082 private static final String[][] LEGACY_TRANSLATIONS = { 083 {"opencms://system/modules/org.opencms.ade.config/schemas/", "internal://org/opencms/xml/adeconfig/"}, 084 { 085 "opencms://system/modules/org.opencms.ade.containerpage/schemas/", 086 "internal://org/opencms/xml/containerpage/"}, 087 {"opencms://system/modules/org.opencms.ade.sitemap/schemas/", "internal://org/opencms/xml/adeconfig/sitemap/"}, 088 {"opencms://system/modules/org.opencms.ugc/schemas/", "internal://org/opencms/ugc/"}, 089 {"opencms://system/modules/org.opencms.jsp.search/schemas/", "internal://org/opencms/jsp/search/"} 090 091 }; 092 093 /** The log object for this class. */ 094 private static final Log LOG = CmsLog.getLog(CmsXmlEntityResolver.class); 095 096 /** A temporary cache for XML content definitions. */ 097 private static Map<String, CmsXmlContentDefinition> m_cacheContentDefinitions; 098 099 /** A permanent cache to avoid multiple readings of often used files from the VFS. */ 100 private static Map<String, byte[]> m_cachePermanent; 101 102 /** A temporary cache to avoid multiple readings of often used files from the VFS. */ 103 private static Map<String, byte[]> m_cacheTemporary; 104 105 /** The location of the XML page XML schema. */ 106 private static final String XMLPAGE_OLD_DTD_LOCATION = "org/opencms/xml/page/xmlpage.dtd"; 107 108 /** The (old) DTD address of the OpenCms xmlpage (used in 5.3.5). */ 109 private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_1 = "http://www.opencms.org/dtd/6.0/xmlpage.dtd"; 110 111 /** The (old) DTD address of the OpenCms xmlpage (used until 5.3.5). */ 112 private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_2 = "/system/shared/page.dtd"; 113 114 /** The location of the xmlpage XSD. */ 115 private static final String XMLPAGE_XSD_LOCATION = "org/opencms/xml/page/xmlpage.xsd"; 116 117 /** The cms object to use for VFS access (will be initialized with "Guest" permissions). */ 118 private CmsObject m_cms; 119 120 /** 121 * Creates a new XML entity resolver based on the provided CmsObject.<p> 122 * 123 * If the provided CmsObject is null, then the OpenCms VFS is not 124 * searched for XML entities, however the internal cache and 125 * other OpenCms internal entities not in the VFS are still resolved.<p> 126 * 127 * @param cms the cms context to use for resolving XML files from the OpenCms VFS 128 */ 129 public CmsXmlEntityResolver(CmsObject cms) { 130 131 initCaches(); 132 m_cms = cms; 133 } 134 135 /** 136 * Adds a system ID URL to to internal permanent cache.<p> 137 * 138 * This cache will NOT be cleared automatically.<p> 139 * 140 * @param systemId the system ID to add 141 * @param content the content of the system id 142 */ 143 public static void cacheSystemId(String systemId, byte[] content) { 144 145 initCaches(); 146 m_cachePermanent.put(systemId, content); 147 } 148 149 /** 150 * Checks if a given system ID URL is in the internal permanent cache.<p> 151 * 152 * This check is required to see if a XML content is based on a file that actually exists in the OpenCms VFS, 153 * or if the schema has been just cached without a VFS file.<p> 154 * 155 * @param systemId the system id ID check 156 * 157 * @return <code>true</code> if the system ID is in the internal permanent cache, <code>false</code> otherwise 158 */ 159 public static boolean isCachedSystemId(String systemId) { 160 161 if (m_cachePermanent != null) { 162 return m_cachePermanent.containsKey(systemId); 163 } 164 return false; 165 } 166 167 /** 168 * Checks whether the given schema id is an internal schema id or is translated to an internal schema id.<p> 169 * @param schema the schema id 170 * @return true if the given schema id is an internal schema id or translated to an internal schema id 171 */ 172 public static boolean isInternalId(String schema) { 173 174 String translatedId = translateLegacySystemId(schema); 175 if (translatedId.startsWith(INTERNAL_SCHEME)) { 176 return true; 177 } 178 return false; 179 } 180 181 /** 182 * Initialize the OpenCms XML entity resolver.<p> 183 * 184 * @param adminCms an initialized OpenCms user context with "Administrator" role permissions 185 * @param typeSchemaBytes the base widget type XML schema definitions 186 * 187 * @see CmsXmlContentTypeManager#initialize(CmsObject) 188 */ 189 protected static void initialize(CmsObject adminCms, byte[] typeSchemaBytes) { 190 191 // create the resolver to register as event listener 192 CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(adminCms); 193 194 // register this object as event listener 195 OpenCms.addCmsEventListener( 196 resolver, 197 new int[] { 198 I_CmsEventListener.EVENT_CLEAR_CACHES, 199 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 200 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 201 I_CmsEventListener.EVENT_RESOURCE_MOVED, 202 I_CmsEventListener.EVENT_RESOURCE_DELETED}); 203 204 // cache the base widget type XML schema definitions 205 cacheSystemId(CmsXmlContentDefinition.XSD_INCLUDE_OPENCMS, typeSchemaBytes); 206 } 207 208 /** 209 * Initializes the internal caches for permanent and temporary system IDs.<p> 210 */ 211 private static void initCaches() { 212 213 if (m_cacheTemporary == null) { 214 m_cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(1024); 215 216 m_cachePermanent = new ConcurrentHashMap<String, byte[]>(32); 217 218 m_cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap(CONTENT_DEFINITION_CACHE_SIZE); 219 } 220 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_1_CORE_OBJECT) { 221 if ((OpenCms.getMemoryMonitor() != null) 222 && !OpenCms.getMemoryMonitor().isMonitoring(CmsXmlEntityResolver.class.getName() + ".cacheTemporary")) { 223 // reinitialize the caches after the memory monitor is set up 224 Map<String, byte[]> cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(128); 225 cacheTemporary.putAll(m_cacheTemporary); 226 m_cacheTemporary = cacheTemporary; 227 OpenCms.getMemoryMonitor().register( 228 CmsXmlEntityResolver.class.getName() + ".cacheTemporary", 229 cacheTemporary); 230 231 Map<String, byte[]> cachePermanent = new ConcurrentHashMap<String, byte[]>(32); 232 cachePermanent.putAll(m_cachePermanent); 233 m_cachePermanent = cachePermanent; 234 OpenCms.getMemoryMonitor().register( 235 CmsXmlEntityResolver.class.getName() + ".cachePermanent", 236 cachePermanent); 237 238 Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap( 239 CONTENT_DEFINITION_CACHE_SIZE); 240 cacheContentDefinitions.putAll(m_cacheContentDefinitions); 241 m_cacheContentDefinitions = cacheContentDefinitions; 242 OpenCms.getMemoryMonitor().register( 243 CmsXmlEntityResolver.class.getName() + ".cacheContentDefinitions", 244 cacheContentDefinitions); 245 } 246 } 247 } 248 249 /** 250 * Translates a legacy system id to a new form.<p> 251 * 252 * @param systemId the original system id 253 * @return the new system id 254 */ 255 private static String translateLegacySystemId(String systemId) { 256 257 String result = systemId; 258 for (String[] translation : LEGACY_TRANSLATIONS) { 259 if (systemId.startsWith(translation[0])) { 260 // replace prefix with second component if it matches the first component 261 result = translation[1] + systemId.substring(translation[0].length()); 262 break; 263 } 264 } 265 if (OpenCms.getRepositoryManager() != null) { 266 result = OpenCms.getResourceManager().getXsdTranslator().translateResource(result); 267 } 268 return result; 269 } 270 271 /** 272 * Caches an XML content definition based on the given system id and the online / offline status 273 * of this entity resolver instance.<p> 274 * 275 * @param systemId the system id to use as cache key 276 * @param contentDefinition the content definition to cache 277 */ 278 public void cacheContentDefinition(String systemId, CmsXmlContentDefinition contentDefinition) { 279 280 String cacheKey = getCacheKeyForCurrentProject(systemId); 281 m_cacheContentDefinitions.put(cacheKey, contentDefinition); 282 if (LOG.isDebugEnabled()) { 283 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYSTEM_ID_1, cacheKey)); 284 } 285 } 286 287 /** 288 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 289 */ 290 public void cmsEvent(CmsEvent event) { 291 292 CmsResource resource; 293 switch (event.getType()) { 294 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 295 // only flush cache if a schema definition where published 296 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 297 if (isSchemaDefinitionInPublishList(publishHistoryId)) { 298 m_cacheTemporary.clear(); 299 m_cacheContentDefinitions.clear(); 300 if (LOG.isDebugEnabled()) { 301 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0)); 302 } 303 } 304 break; 305 case I_CmsEventListener.EVENT_CLEAR_CACHES: 306 // flush cache 307 m_cacheTemporary.clear(); 308 m_cacheContentDefinitions.clear(); 309 if (LOG.isDebugEnabled()) { 310 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0)); 311 } 312 break; 313 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 314 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 315 if ((change != null) && change.equals(Integer.valueOf(CmsDriverManager.NOTHING_CHANGED))) { 316 // skip lock & unlock 317 return; 318 } 319 resource = (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE); 320 uncacheSystemId(resource.getRootPath()); 321 break; 322 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 323 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 324 List<CmsResource> resources = CmsCollectionsGenericWrapper.list( 325 event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 326 for (int i = 0; i < resources.size(); i++) { 327 resource = resources.get(i); 328 uncacheSystemId(resource.getRootPath()); 329 } 330 break; 331 default: 332 // no operation 333 } 334 } 335 336 /** 337 * Looks up the given XML content definition system id in the internal content definition cache.<p> 338 * 339 * @param systemId the system id of the XML content definition to look up 340 * 341 * @return the XML content definition found, or null if no definition is cached for the given system id 342 */ 343 public CmsXmlContentDefinition getCachedContentDefinition(String systemId) { 344 345 String cacheKey = getCacheKeyForCurrentProject(systemId); 346 CmsXmlContentDefinition result = m_cacheContentDefinitions.get(cacheKey); 347 if ((result != null) && LOG.isDebugEnabled()) { 348 LOG.debug(Messages.get().getBundle().key(Messages.LOG_CACHE_LOOKUP_SUCCEEDED_1, cacheKey)); 349 } 350 return result; 351 } 352 353 /** 354 * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String) 355 */ 356 public InputSource resolveEntity(String publicId, String systemId) throws IOException { 357 358 // lookup the system id caches first 359 byte[] content; 360 systemId = translateLegacySystemId(systemId); 361 content = m_cachePermanent.get(systemId); 362 if (content != null) { 363 // permanent cache contains system id 364 return createInputSource(content, systemId); 365 } else if (systemId.equals(CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID)) { 366 367 // XML page XSD reference 368 try (InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_XSD_LOCATION)) { 369 content = CmsFileUtil.readFully(stream); 370 // cache the XML page DTD 371 m_cachePermanent.put(systemId, content); 372 return createInputSource(content, systemId); 373 } catch (Throwable t) { 374 LOG.error( 375 Messages.get().getBundle().key(Messages.LOG_XMLPAGE_XSD_NOT_FOUND_1, XMLPAGE_XSD_LOCATION), 376 t); 377 } 378 379 } else if (systemId.equals(XMLPAGE_OLD_DTD_SYSTEM_ID_1) || systemId.endsWith(XMLPAGE_OLD_DTD_SYSTEM_ID_2)) { 380 381 // XML page DTD reference 382 try (InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_OLD_DTD_LOCATION)) { 383 // cache the XML page DTD 384 content = CmsFileUtil.readFully(stream); 385 m_cachePermanent.put(systemId, content); 386 return createInputSource(content, systemId); 387 } catch (Throwable t) { 388 LOG.error( 389 Messages.get().getBundle().key(Messages.LOG_XMLPAGE_DTD_NOT_FOUND_1, XMLPAGE_OLD_DTD_LOCATION), 390 t); 391 } 392 } else if ((m_cms != null) && systemId.startsWith(OPENCMS_SCHEME)) { 393 394 // opencms:// VFS reference 395 String cacheSystemId = systemId.substring(OPENCMS_SCHEME.length() - 1); 396 String cacheKey = getCacheKey( 397 cacheSystemId, 398 m_cms.getRequestContext().getCurrentProject().isOnlineProject()); 399 // look up temporary cache 400 content = m_cacheTemporary.get(cacheKey); 401 if (content != null) { 402 return createInputSource(content, systemId); 403 } 404 String storedSiteRoot = m_cms.getRequestContext().getSiteRoot(); 405 try { 406 // content not cached, read from VFS 407 m_cms.getRequestContext().setSiteRoot("/"); 408 CmsFile file = m_cms.readFile(cacheSystemId, CmsResourceFilter.IGNORE_EXPIRATION); 409 content = file.getContents(); 410 // store content in cache 411 m_cacheTemporary.put(cacheKey, content); 412 if (LOG.isDebugEnabled()) { 413 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYS_ID_1, cacheKey)); 414 } 415 return createInputSource(content, systemId); 416 } catch (CmsException e) { 417 throw new IOException( 418 Messages.get().getBundle().key(Messages.LOG_ENTITY_RESOLVE_FAILED_1, systemId), 419 e); 420 } finally { 421 m_cms.getRequestContext().setSiteRoot(storedSiteRoot); 422 } 423 424 } else if (systemId.startsWith(INTERNAL_SCHEME)) { 425 String location = systemId.substring(INTERNAL_SCHEME.length()); 426 try (InputStream stream = getClass().getClassLoader().getResourceAsStream(location)) { 427 content = CmsFileUtil.readFully(stream); 428 m_cachePermanent.put(systemId, content); 429 return createInputSource(content, systemId); 430 } catch (Throwable t) { 431 LOG.error(t.getLocalizedMessage(), t); 432 } 433 434 } else if (systemId.substring(0, systemId.lastIndexOf("/") + 1).equalsIgnoreCase( 435 CmsConfigurationManager.DEFAULT_DTD_PREFIX)// 436 ) { 437 // default DTD location in the org.opencms.configuration package 438 String location = null; 439 try { 440 String dtdFilename = systemId.substring(systemId.lastIndexOf("/") + 1); 441 location = CmsConfigurationManager.DEFAULT_DTD_LOCATION + dtdFilename; 442 InputStream stream = getClass().getClassLoader().getResourceAsStream(location); 443 content = CmsFileUtil.readFully(stream); 444 // cache the DTD 445 m_cachePermanent.put(systemId, content); 446 return createInputSource(content, systemId); 447 } catch (Throwable t) { 448 LOG.error(Messages.get().getBundle().key(Messages.LOG_DTD_NOT_FOUND_1, location), t); 449 } 450 } 451 LOG.error("Entity reference not allowed: " + systemId, new IOException()); 452 throw new IOException("Entity reference not allowed (see log for details)"); 453 } 454 455 /** 456 * Removes a cached entry for a system id (filename) from the internal offline temporary and content definition caches.<p> 457 * 458 * The online resources cached for the online project are only flushed when a project is published.<p> 459 * 460 * @param systemId the system id (filename) to remove from the cache 461 */ 462 public void uncacheSystemId(String systemId) { 463 464 Object o; 465 o = m_cacheTemporary.remove(getCacheKey(systemId, false)); 466 if (null != o) { 467 // if an object was removed from the temporary cache, all XML content definitions must be cleared 468 // because this may be a nested subschema 469 m_cacheContentDefinitions.clear(); 470 if (LOG.isDebugEnabled()) { 471 LOG.debug( 472 Messages.get().getBundle().key(Messages.LOG_ERR_UNCACHED_SYS_ID_1, getCacheKey(systemId, false))); 473 } 474 } else { 475 // check if a cached content definition has to be removed based on the system id 476 o = m_cacheContentDefinitions.remove(getCacheKey(systemId, false)); 477 if ((null != o) && LOG.isDebugEnabled()) { 478 LOG.debug( 479 Messages.get().getBundle().key( 480 Messages.LOG_ERR_UNCACHED_CONTENT_DEF_1, 481 getCacheKey(systemId, false))); 482 } 483 } 484 } 485 486 /** 487 * Creates an input source for the given byte data and system id.<p> 488 * 489 * @param data the data which the input source should return 490 * @param systemId the system id for the input source 491 * 492 * @return the input source 493 */ 494 InputSource createInputSource(byte[] data, String systemId) { 495 496 InputSource result = new InputSource(new ByteArrayInputStream(data)); 497 result.setSystemId(systemId); 498 return result; 499 } 500 501 /** 502 * Returns a cache key for the given system id (filename) based on the status 503 * of the given project flag.<p> 504 * 505 * @param systemId the system id (filename) to get the cache key for 506 * @param online indicates if this key is generated for the online project 507 * 508 * @return the cache key for the system id 509 */ 510 private String getCacheKey(String systemId, boolean online) { 511 512 if (online) { 513 return "online_".concat(systemId); 514 } 515 return "offline_".concat(systemId); 516 } 517 518 /** 519 * Returns a cache key for the given system id (filename) based on the status 520 * of the internal CmsObject.<p> 521 * 522 * @param systemId the system id (filename) to get the cache key for 523 * 524 * @return the cache key for the system id 525 */ 526 private String getCacheKeyForCurrentProject(String systemId) { 527 528 // check the project 529 boolean project = (m_cms != null) ? m_cms.getRequestContext().getCurrentProject().isOnlineProject() : false; 530 531 // remove opencms:// prefix 532 if (systemId.startsWith(OPENCMS_SCHEME)) { 533 systemId = systemId.substring(OPENCMS_SCHEME.length() - 1); 534 } 535 536 return getCacheKey(systemId, project); 537 } 538 539 /** 540 * Proves if there is at least one xsd or dtd file in the list of resources to publish.<p> 541 * 542 * @param publishHistoryId the publish history id 543 * 544 * @return true, if there is at least one xsd or dtd file in the list of resources to publish, otherwise false 545 */ 546 private boolean isSchemaDefinitionInPublishList(CmsUUID publishHistoryId) { 547 548 if (m_cms == null) { 549 // CmsObject not available, assume there may be a schema definition in the publish history 550 return true; 551 } 552 try { 553 List<CmsPublishedResource> publishedResources = m_cms.readPublishedResources(publishHistoryId); 554 for (CmsPublishedResource cmsPublishedResource : publishedResources) { 555 String resourceRootPath = cmsPublishedResource.getRootPath(); 556 String resourceRootPathLowerCase = resourceRootPath.toLowerCase(); 557 if (resourceRootPathLowerCase.endsWith(".xsd") 558 || resourceRootPathLowerCase.endsWith(".dtd") 559 || m_cacheTemporary.containsKey(getCacheKey(resourceRootPath, true))) { 560 return true; 561 } 562 } 563 } catch (CmsException e) { 564 // error reading published Resources. 565 LOG.warn(e.getMessage(), e); 566 } 567 return false; 568 } 569}