001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.configuration.CmsConfigurationManager; 031import org.opencms.db.CmsDriverManager; 032import org.opencms.db.CmsPublishedResource; 033import org.opencms.file.CmsFile; 034import org.opencms.file.CmsObject; 035import org.opencms.file.CmsResource; 036import org.opencms.file.CmsResourceFilter; 037import org.opencms.main.CmsEvent; 038import org.opencms.main.CmsException; 039import org.opencms.main.CmsLog; 040import org.opencms.main.I_CmsEventListener; 041import org.opencms.main.OpenCms; 042import org.opencms.util.CmsCollectionsGenericWrapper; 043import org.opencms.util.CmsFileUtil; 044import org.opencms.util.CmsUUID; 045import org.opencms.xml.page.CmsXmlPage; 046 047import java.io.ByteArrayInputStream; 048import java.io.InputStream; 049import java.util.Collections; 050import java.util.HashMap; 051import java.util.List; 052import java.util.Map; 053 054import org.apache.commons.logging.Log; 055 056import org.xml.sax.EntityResolver; 057import org.xml.sax.InputSource; 058 059/** 060 * Resolves XML entities (e.g. external DTDs) in the OpenCms VFS.<p> 061 * 062 * Also provides a cache for XML content schema definitions.<p> 063 * 064 * @since 6.0.0 065 */ 066public class CmsXmlEntityResolver implements EntityResolver, I_CmsEventListener { 067 068 /** Maximum size of the content definition cache. */ 069 public static final int CONTENT_DEFINITION_CACHE_SIZE = 2048; 070 071 /** Scheme for files which should be retrieved from the classpath. */ 072 public static final String INTERNAL_SCHEME = "internal://"; 073 074 /** The scheme to identify a file in the OpenCms VFS. */ 075 public static final String OPENCMS_SCHEME = "opencms://"; 076 077 /** 078 * A list of string pairs used to translate legacy system ids to a new form. The first component of each pair 079 * is the prefix which should be replaced by the second component of that pair. 080 */ 081 private static final String[][] LEGACY_TRANSLATIONS = { 082 {"opencms://system/modules/org.opencms.ade.config/schemas/", "internal://org/opencms/xml/adeconfig/"}, 083 { 084 "opencms://system/modules/org.opencms.ade.containerpage/schemas/", 085 "internal://org/opencms/xml/containerpage/"}, 086 {"opencms://system/modules/org.opencms.ade.sitemap/schemas/", "internal://org/opencms/xml/adeconfig/sitemap/"}}; 087 088 /** The log object for this class. */ 089 private static final Log LOG = CmsLog.getLog(CmsXmlEntityResolver.class); 090 091 /** A temporary cache for XML content definitions. */ 092 private static Map<String, CmsXmlContentDefinition> m_cacheContentDefinitions; 093 094 /** A permanent cache to avoid multiple readings of often used files from the VFS. */ 095 private static Map<String, byte[]> m_cachePermanent; 096 097 /** A temporary cache to avoid multiple readings of often used files from the VFS. */ 098 private static Map<String, byte[]> m_cacheTemporary; 099 100 /** The location of the XML page XML schema. */ 101 private static final String XMLPAGE_OLD_DTD_LOCATION = "org/opencms/xml/page/xmlpage.dtd"; 102 103 /** The (old) DTD address of the OpenCms xmlpage (used in 5.3.5). */ 104 private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_1 = "http://www.opencms.org/dtd/6.0/xmlpage.dtd"; 105 106 /** The (old) DTD address of the OpenCms xmlpage (used until 5.3.5). */ 107 private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_2 = "/system/shared/page.dtd"; 108 109 /** The location of the xmlpage XSD. */ 110 private static final String XMLPAGE_XSD_LOCATION = "org/opencms/xml/page/xmlpage.xsd"; 111 112 /** The cms object to use for VFS access (will be initialized with "Guest" permissions). */ 113 private CmsObject m_cms; 114 115 /** 116 * Creates a new XML entity resolver based on the provided CmsObject.<p> 117 * 118 * If the provided CmsObject is null, then the OpenCms VFS is not 119 * searched for XML entities, however the internal cache and 120 * other OpenCms internal entities not in the VFS are still resolved.<p> 121 * 122 * @param cms the cms context to use for resolving XML files from the OpenCms VFS 123 */ 124 public CmsXmlEntityResolver(CmsObject cms) { 125 126 initCaches(); 127 m_cms = cms; 128 } 129 130 /** 131 * Adds a system ID URL to to internal permanent cache.<p> 132 * 133 * This cache will NOT be cleared automatically.<p> 134 * 135 * @param systemId the system ID to add 136 * @param content the content of the system id 137 */ 138 public static void cacheSystemId(String systemId, byte[] content) { 139 140 initCaches(); 141 m_cachePermanent.put(systemId, content); 142 } 143 144 /** 145 * Checks if a given system ID URL is in the internal permanent cache.<p> 146 * 147 * This check is required to see if a XML content is based on a file that actually exists in the OpenCms VFS, 148 * or if the schema has been just cached without a VFS file.<p> 149 * 150 * @param systemId the system id ID check 151 * 152 * @return <code>true</code> if the system ID is in the internal permanent cache, <code>false</code> otherwise 153 */ 154 public static boolean isCachedSystemId(String systemId) { 155 156 if (m_cachePermanent != null) { 157 return m_cachePermanent.containsKey(systemId); 158 } 159 return false; 160 } 161 162 /** 163 * Checks whether the given schema id is an internal schema id or is translated to an internal schema id.<p> 164 * @param schema the schema id 165 * @return true if the given schema id is an internal schema id or translated to an internal schema id 166 */ 167 public static boolean isInternalId(String schema) { 168 169 String translatedId = translateLegacySystemId(schema); 170 if (translatedId.startsWith(INTERNAL_SCHEME)) { 171 return true; 172 } 173 return false; 174 } 175 176 /** 177 * Initialize the OpenCms XML entity resolver.<p> 178 * 179 * @param adminCms an initialized OpenCms user context with "Administrator" role permissions 180 * @param typeSchemaBytes the base widget type XML schema definitions 181 * 182 * @see CmsXmlContentTypeManager#initialize(CmsObject) 183 */ 184 protected static void initialize(CmsObject adminCms, byte[] typeSchemaBytes) { 185 186 // create the resolver to register as event listener 187 CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(adminCms); 188 189 // register this object as event listener 190 OpenCms.addCmsEventListener( 191 resolver, 192 new int[] { 193 I_CmsEventListener.EVENT_CLEAR_CACHES, 194 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 195 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 196 I_CmsEventListener.EVENT_RESOURCE_MOVED, 197 I_CmsEventListener.EVENT_RESOURCE_DELETED}); 198 199 // cache the base widget type XML schema definitions 200 cacheSystemId(CmsXmlContentDefinition.XSD_INCLUDE_OPENCMS, typeSchemaBytes); 201 } 202 203 /** 204 * Initializes the internal caches for permanent and temporary system IDs.<p> 205 */ 206 private static void initCaches() { 207 208 if (m_cacheTemporary == null) { 209 Map<String, byte[]> cacheTemporary = CmsCollectionsGenericWrapper.createLRUMap(1024); 210 m_cacheTemporary = Collections.synchronizedMap(cacheTemporary); 211 212 Map<String, byte[]> cachePermanent = new HashMap<String, byte[]>(32); 213 m_cachePermanent = Collections.synchronizedMap(cachePermanent); 214 215 Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsCollectionsGenericWrapper.createLRUMap( 216 CONTENT_DEFINITION_CACHE_SIZE); 217 m_cacheContentDefinitions = Collections.synchronizedMap(cacheContentDefinitions); 218 } 219 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_1_CORE_OBJECT) { 220 if ((OpenCms.getMemoryMonitor() != null) 221 && !OpenCms.getMemoryMonitor().isMonitoring(CmsXmlEntityResolver.class.getName() + ".cacheTemporary")) { 222 // reinitialize the caches after the memory monitor is set up 223 Map<String, byte[]> cacheTemporary = CmsCollectionsGenericWrapper.createLRUMap(128); 224 cacheTemporary.putAll(m_cacheTemporary); 225 m_cacheTemporary = Collections.synchronizedMap(cacheTemporary); 226 // map must be of type "LRUMap" so that memory monitor can access all information 227 OpenCms.getMemoryMonitor().register( 228 CmsXmlEntityResolver.class.getName() + ".cacheTemporary", 229 cacheTemporary); 230 231 Map<String, byte[]> cachePermanent = new HashMap<String, byte[]>(32); 232 cachePermanent.putAll(m_cachePermanent); 233 m_cachePermanent = Collections.synchronizedMap(cachePermanent); 234 // map must be of type "HashMap" so that memory monitor can access all information 235 OpenCms.getMemoryMonitor().register( 236 CmsXmlEntityResolver.class.getName() + ".cachePermanent", 237 cachePermanent); 238 239 Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsCollectionsGenericWrapper.createLRUMap( 240 CONTENT_DEFINITION_CACHE_SIZE); 241 cacheContentDefinitions.putAll(m_cacheContentDefinitions); 242 m_cacheContentDefinitions = Collections.synchronizedMap(cacheContentDefinitions); 243 // map must be of type "LRUMap" so that memory monitor can access all information 244 OpenCms.getMemoryMonitor().register( 245 CmsXmlEntityResolver.class.getName() + ".cacheContentDefinitions", 246 cacheContentDefinitions); 247 } 248 } 249 } 250 251 /** 252 * Translates a legacy system id to a new form.<p> 253 * 254 * @param systemId the original system id 255 * @return the new system id 256 */ 257 private static String translateLegacySystemId(String systemId) { 258 259 String result = systemId; 260 for (String[] translation : LEGACY_TRANSLATIONS) { 261 if (systemId.startsWith(translation[0])) { 262 // replace prefix with second component if it matches the first component 263 result = translation[1] + systemId.substring(translation[0].length()); 264 break; 265 } 266 } 267 if (OpenCms.getRepositoryManager() != null) { 268 result = OpenCms.getResourceManager().getXsdTranslator().translateResource(result); 269 } 270 return result; 271 } 272 273 /** 274 * Caches an XML content definition based on the given system id and the online / offline status 275 * of this entity resolver instance.<p> 276 * 277 * @param systemId the system id to use as cache key 278 * @param contentDefinition the content definition to cache 279 */ 280 public void cacheContentDefinition(String systemId, CmsXmlContentDefinition contentDefinition) { 281 282 String cacheKey = getCacheKeyForCurrentProject(systemId); 283 m_cacheContentDefinitions.put(cacheKey, contentDefinition); 284 if (LOG.isDebugEnabled()) { 285 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYSTEM_ID_1, cacheKey)); 286 } 287 } 288 289 /** 290 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 291 */ 292 public void cmsEvent(CmsEvent event) { 293 294 CmsResource resource; 295 switch (event.getType()) { 296 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 297 // only flush cache if a schema definition where published 298 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 299 if (isSchemaDefinitionInPublishList(publishHistoryId)) { 300 m_cacheTemporary.clear(); 301 m_cacheContentDefinitions.clear(); 302 if (LOG.isDebugEnabled()) { 303 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0)); 304 } 305 } 306 break; 307 case I_CmsEventListener.EVENT_CLEAR_CACHES: 308 // flush cache 309 m_cacheTemporary.clear(); 310 m_cacheContentDefinitions.clear(); 311 if (LOG.isDebugEnabled()) { 312 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0)); 313 } 314 break; 315 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 316 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 317 if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { 318 // skip lock & unlock 319 return; 320 } 321 resource = (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE); 322 uncacheSystemId(resource.getRootPath()); 323 break; 324 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 325 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 326 List<CmsResource> resources = CmsCollectionsGenericWrapper.list( 327 event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 328 for (int i = 0; i < resources.size(); i++) { 329 resource = resources.get(i); 330 uncacheSystemId(resource.getRootPath()); 331 } 332 break; 333 default: 334 // no operation 335 } 336 } 337 338 /** 339 * Looks up the given XML content definition system id in the internal content definition cache.<p> 340 * 341 * @param systemId the system id of the XML content definition to look up 342 * 343 * @return the XML content definition found, or null if no definition is cached for the given system id 344 */ 345 public CmsXmlContentDefinition getCachedContentDefinition(String systemId) { 346 347 String cacheKey = getCacheKeyForCurrentProject(systemId); 348 CmsXmlContentDefinition result = m_cacheContentDefinitions.get(cacheKey); 349 if ((result != null) && LOG.isDebugEnabled()) { 350 LOG.debug(Messages.get().getBundle().key(Messages.LOG_CACHE_LOOKUP_SUCCEEDED_1, cacheKey)); 351 } 352 return result; 353 } 354 355 /** 356 * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String) 357 */ 358 public InputSource resolveEntity(String publicId, String systemId) { 359 360 // lookup the system id caches first 361 byte[] content; 362 systemId = translateLegacySystemId(systemId); 363 content = m_cachePermanent.get(systemId); 364 if (content != null) { 365 // permanent cache contains system id 366 return createInputSource(content, systemId); 367 } else if (systemId.equals(CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID)) { 368 369 // XML page XSD reference 370 try { 371 InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_XSD_LOCATION); 372 content = CmsFileUtil.readFully(stream); 373 // cache the XML page DTD 374 m_cachePermanent.put(systemId, content); 375 return createInputSource(content, systemId); 376 } catch (Throwable t) { 377 LOG.error( 378 Messages.get().getBundle().key(Messages.LOG_XMLPAGE_XSD_NOT_FOUND_1, XMLPAGE_XSD_LOCATION), 379 t); 380 } 381 382 } else if (systemId.equals(XMLPAGE_OLD_DTD_SYSTEM_ID_1) || systemId.endsWith(XMLPAGE_OLD_DTD_SYSTEM_ID_2)) { 383 384 // XML page DTD reference 385 try { 386 InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_OLD_DTD_LOCATION); 387 // cache the XML page DTD 388 content = CmsFileUtil.readFully(stream); 389 m_cachePermanent.put(systemId, content); 390 return createInputSource(content, systemId); 391 } catch (Throwable t) { 392 LOG.error( 393 Messages.get().getBundle().key(Messages.LOG_XMLPAGE_DTD_NOT_FOUND_1, XMLPAGE_OLD_DTD_LOCATION), 394 t); 395 } 396 } else if ((m_cms != null) && systemId.startsWith(OPENCMS_SCHEME)) { 397 398 // opencms:// VFS reference 399 String cacheSystemId = systemId.substring(OPENCMS_SCHEME.length() - 1); 400 String cacheKey = getCacheKey( 401 cacheSystemId, 402 m_cms.getRequestContext().getCurrentProject().isOnlineProject()); 403 // look up temporary cache 404 content = m_cacheTemporary.get(cacheKey); 405 if (content != null) { 406 return createInputSource(content, systemId); 407 } 408 String storedSiteRoot = m_cms.getRequestContext().getSiteRoot(); 409 try { 410 // content not cached, read from VFS 411 m_cms.getRequestContext().setSiteRoot("/"); 412 CmsFile file = m_cms.readFile(cacheSystemId, CmsResourceFilter.IGNORE_EXPIRATION); 413 content = file.getContents(); 414 // store content in cache 415 m_cacheTemporary.put(cacheKey, content); 416 if (LOG.isDebugEnabled()) { 417 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYS_ID_1, cacheKey)); 418 } 419 return createInputSource(content, systemId); 420 } catch (Throwable t) { 421 LOG.error(Messages.get().getBundle().key(Messages.LOG_ENTITY_RESOLVE_FAILED_1, systemId), t); 422 } finally { 423 m_cms.getRequestContext().setSiteRoot(storedSiteRoot); 424 } 425 426 } else if (systemId.startsWith(INTERNAL_SCHEME)) { 427 String location = systemId.substring(INTERNAL_SCHEME.length()); 428 try { 429 InputStream stream = getClass().getClassLoader().getResourceAsStream(location); 430 content = CmsFileUtil.readFully(stream); 431 m_cachePermanent.put(systemId, content); 432 return createInputSource(content, systemId); 433 } catch (Throwable t) { 434 LOG.error(t.getLocalizedMessage(), t); 435 } 436 437 } else if (systemId.substring(0, systemId.lastIndexOf("/") + 1).equalsIgnoreCase( 438 CmsConfigurationManager.DEFAULT_DTD_PREFIX)) { 439 // default DTD location in the org.opencms.configuration package 440 String location = null; 441 try { 442 String dtdFilename = systemId.substring(systemId.lastIndexOf("/") + 1); 443 location = CmsConfigurationManager.DEFAULT_DTD_LOCATION + dtdFilename; 444 InputStream stream = getClass().getClassLoader().getResourceAsStream(location); 445 content = CmsFileUtil.readFully(stream); 446 // cache the DTD 447 m_cachePermanent.put(systemId, content); 448 return createInputSource(content, systemId); 449 } catch (Throwable t) { 450 LOG.error(Messages.get().getBundle().key(Messages.LOG_DTD_NOT_FOUND_1, location), t); 451 } 452 } 453 // use the default behaviour (i.e. resolve through external URL) 454 return null; 455 } 456 457 /** 458 * Removes a cached entry for a system id (filename) from the internal offline temporary and content definition caches.<p> 459 * 460 * The online resources cached for the online project are only flushed when a project is published.<p> 461 * 462 * @param systemId the system id (filename) to remove from the cache 463 */ 464 public void uncacheSystemId(String systemId) { 465 466 Object o; 467 o = m_cacheTemporary.remove(getCacheKey(systemId, false)); 468 if (null != o) { 469 // if an object was removed from the temporary cache, all XML content definitions must be cleared 470 // because this may be a nested subschema 471 m_cacheContentDefinitions.clear(); 472 if (LOG.isDebugEnabled()) { 473 LOG.debug( 474 Messages.get().getBundle().key(Messages.LOG_ERR_UNCACHED_SYS_ID_1, getCacheKey(systemId, false))); 475 } 476 } else { 477 // check if a cached content definition has to be removed based on the system id 478 o = m_cacheContentDefinitions.remove(getCacheKey(systemId, false)); 479 if ((null != o) && LOG.isDebugEnabled()) { 480 LOG.debug( 481 Messages.get().getBundle().key( 482 Messages.LOG_ERR_UNCACHED_CONTENT_DEF_1, 483 getCacheKey(systemId, false))); 484 } 485 } 486 } 487 488 /** 489 * Creates an input source for the given byte data and system id.<p> 490 * 491 * @param data the data which the input source should return 492 * @param systemId the system id for the input source 493 * 494 * @return the input source 495 */ 496 InputSource createInputSource(byte[] data, String systemId) { 497 498 InputSource result = new InputSource(new ByteArrayInputStream(data)); 499 result.setSystemId(systemId); 500 return result; 501 } 502 503 /** 504 * Returns a cache key for the given system id (filename) based on the status 505 * of the given project flag.<p> 506 * 507 * @param systemId the system id (filename) to get the cache key for 508 * @param online indicates if this key is generated for the online project 509 * 510 * @return the cache key for the system id 511 */ 512 private String getCacheKey(String systemId, boolean online) { 513 514 if (online) { 515 return "online_".concat(systemId); 516 } 517 return "offline_".concat(systemId); 518 } 519 520 /** 521 * Returns a cache key for the given system id (filename) based on the status 522 * of the internal CmsObject.<p> 523 * 524 * @param systemId the system id (filename) to get the cache key for 525 * 526 * @return the cache key for the system id 527 */ 528 private String getCacheKeyForCurrentProject(String systemId) { 529 530 // check the project 531 boolean project = (m_cms != null) ? m_cms.getRequestContext().getCurrentProject().isOnlineProject() : false; 532 533 // remove opencms:// prefix 534 if (systemId.startsWith(OPENCMS_SCHEME)) { 535 systemId = systemId.substring(OPENCMS_SCHEME.length() - 1); 536 } 537 538 return getCacheKey(systemId, project); 539 } 540 541 /** 542 * Proves if there is at least one xsd or dtd file in the list of resources to publish.<p> 543 * 544 * @param publishHistoryId the publish history id 545 * 546 * @return true, if there is at least one xsd or dtd file in the list of resources to publish, otherwise false 547 */ 548 private boolean isSchemaDefinitionInPublishList(CmsUUID publishHistoryId) { 549 550 if (m_cms == null) { 551 // CmsObject not available, assume there may be a schema definition in the publish history 552 return true; 553 } 554 try { 555 List<CmsPublishedResource> publishedResources = m_cms.readPublishedResources(publishHistoryId); 556 for (CmsPublishedResource cmsPublishedResource : publishedResources) { 557 String resourceRootPath = cmsPublishedResource.getRootPath(); 558 String resourceRootPathLowerCase = resourceRootPath.toLowerCase(); 559 if (resourceRootPathLowerCase.endsWith(".xsd") 560 || resourceRootPathLowerCase.endsWith(".dtd") 561 || m_cacheTemporary.containsKey(getCacheKey(resourceRootPath, true))) { 562 return true; 563 } 564 } 565 } catch (CmsException e) { 566 // error reading published Resources. 567 LOG.warn(e.getMessage(), e); 568 } 569 return false; 570 } 571}