001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.configuration.CmsConfigurationManager; 031import org.opencms.db.CmsDriverManager; 032import org.opencms.db.CmsPublishedResource; 033import org.opencms.file.CmsFile; 034import org.opencms.file.CmsObject; 035import org.opencms.file.CmsResource; 036import org.opencms.file.CmsResourceFilter; 037import org.opencms.main.CmsEvent; 038import org.opencms.main.CmsException; 039import org.opencms.main.CmsLog; 040import org.opencms.main.I_CmsEventListener; 041import org.opencms.main.OpenCms; 042import org.opencms.monitor.CmsMemoryMonitor; 043import org.opencms.util.CmsCollectionsGenericWrapper; 044import org.opencms.util.CmsFileUtil; 045import org.opencms.util.CmsUUID; 046import org.opencms.xml.page.CmsXmlPage; 047 048import java.io.ByteArrayInputStream; 049import java.io.InputStream; 050import java.util.List; 051import java.util.Map; 052import java.util.concurrent.ConcurrentHashMap; 053 054import org.apache.commons.logging.Log; 055 056import org.xml.sax.EntityResolver; 057import org.xml.sax.InputSource; 058 059/** 060 * Resolves XML entities (e.g. external DTDs) in the OpenCms VFS.<p> 061 * 062 * Also provides a cache for XML content schema definitions.<p> 063 * 064 * @since 6.0.0 065 */ 066public class CmsXmlEntityResolver implements EntityResolver, I_CmsEventListener { 067 068 /** Maximum size of the content definition cache. */ 069 public static final int CONTENT_DEFINITION_CACHE_SIZE = 2048; 070 071 /** Scheme for files which should be retrieved from the classpath. */ 072 public static final String INTERNAL_SCHEME = "internal://"; 073 074 /** The scheme to identify a file in the OpenCms VFS. */ 075 public static final String OPENCMS_SCHEME = "opencms://"; 076 077 /** 078 * A list of string pairs used to translate legacy system ids to a new form. The first component of each pair 079 * is the prefix which should be replaced by the second component of that pair. 080 */ 081 private static final String[][] LEGACY_TRANSLATIONS = { 082 {"opencms://system/modules/org.opencms.ade.config/schemas/", "internal://org/opencms/xml/adeconfig/"}, 083 { 084 "opencms://system/modules/org.opencms.ade.containerpage/schemas/", 085 "internal://org/opencms/xml/containerpage/"}, 086 {"opencms://system/modules/org.opencms.ade.sitemap/schemas/", "internal://org/opencms/xml/adeconfig/sitemap/"}}; 087 088 /** The log object for this class. */ 089 private static final Log LOG = CmsLog.getLog(CmsXmlEntityResolver.class); 090 091 /** A temporary cache for XML content definitions. */ 092 private static Map<String, CmsXmlContentDefinition> m_cacheContentDefinitions; 093 094 /** A permanent cache to avoid multiple readings of often used files from the VFS. */ 095 private static Map<String, byte[]> m_cachePermanent; 096 097 /** A temporary cache to avoid multiple readings of often used files from the VFS. */ 098 private static Map<String, byte[]> m_cacheTemporary; 099 100 /** The location of the XML page XML schema. */ 101 private static final String XMLPAGE_OLD_DTD_LOCATION = "org/opencms/xml/page/xmlpage.dtd"; 102 103 /** The (old) DTD address of the OpenCms xmlpage (used in 5.3.5). */ 104 private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_1 = "http://www.opencms.org/dtd/6.0/xmlpage.dtd"; 105 106 /** The (old) DTD address of the OpenCms xmlpage (used until 5.3.5). */ 107 private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_2 = "/system/shared/page.dtd"; 108 109 /** The location of the xmlpage XSD. */ 110 private static final String XMLPAGE_XSD_LOCATION = "org/opencms/xml/page/xmlpage.xsd"; 111 112 /** The cms object to use for VFS access (will be initialized with "Guest" permissions). */ 113 private CmsObject m_cms; 114 115 /** 116 * Creates a new XML entity resolver based on the provided CmsObject.<p> 117 * 118 * If the provided CmsObject is null, then the OpenCms VFS is not 119 * searched for XML entities, however the internal cache and 120 * other OpenCms internal entities not in the VFS are still resolved.<p> 121 * 122 * @param cms the cms context to use for resolving XML files from the OpenCms VFS 123 */ 124 public CmsXmlEntityResolver(CmsObject cms) { 125 126 initCaches(); 127 m_cms = cms; 128 } 129 130 /** 131 * Adds a system ID URL to to internal permanent cache.<p> 132 * 133 * This cache will NOT be cleared automatically.<p> 134 * 135 * @param systemId the system ID to add 136 * @param content the content of the system id 137 */ 138 public static void cacheSystemId(String systemId, byte[] content) { 139 140 initCaches(); 141 m_cachePermanent.put(systemId, content); 142 } 143 144 /** 145 * Checks if a given system ID URL is in the internal permanent cache.<p> 146 * 147 * This check is required to see if a XML content is based on a file that actually exists in the OpenCms VFS, 148 * or if the schema has been just cached without a VFS file.<p> 149 * 150 * @param systemId the system id ID check 151 * 152 * @return <code>true</code> if the system ID is in the internal permanent cache, <code>false</code> otherwise 153 */ 154 public static boolean isCachedSystemId(String systemId) { 155 156 if (m_cachePermanent != null) { 157 return m_cachePermanent.containsKey(systemId); 158 } 159 return false; 160 } 161 162 /** 163 * Checks whether the given schema id is an internal schema id or is translated to an internal schema id.<p> 164 * @param schema the schema id 165 * @return true if the given schema id is an internal schema id or translated to an internal schema id 166 */ 167 public static boolean isInternalId(String schema) { 168 169 String translatedId = translateLegacySystemId(schema); 170 if (translatedId.startsWith(INTERNAL_SCHEME)) { 171 return true; 172 } 173 return false; 174 } 175 176 /** 177 * Initialize the OpenCms XML entity resolver.<p> 178 * 179 * @param adminCms an initialized OpenCms user context with "Administrator" role permissions 180 * @param typeSchemaBytes the base widget type XML schema definitions 181 * 182 * @see CmsXmlContentTypeManager#initialize(CmsObject) 183 */ 184 protected static void initialize(CmsObject adminCms, byte[] typeSchemaBytes) { 185 186 // create the resolver to register as event listener 187 CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(adminCms); 188 189 // register this object as event listener 190 OpenCms.addCmsEventListener( 191 resolver, 192 new int[] { 193 I_CmsEventListener.EVENT_CLEAR_CACHES, 194 I_CmsEventListener.EVENT_PUBLISH_PROJECT, 195 I_CmsEventListener.EVENT_RESOURCE_MODIFIED, 196 I_CmsEventListener.EVENT_RESOURCE_MOVED, 197 I_CmsEventListener.EVENT_RESOURCE_DELETED}); 198 199 // cache the base widget type XML schema definitions 200 cacheSystemId(CmsXmlContentDefinition.XSD_INCLUDE_OPENCMS, typeSchemaBytes); 201 } 202 203 /** 204 * Initializes the internal caches for permanent and temporary system IDs.<p> 205 */ 206 private static void initCaches() { 207 208 if (m_cacheTemporary == null) { 209 m_cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(1024); 210 211 m_cachePermanent = new ConcurrentHashMap<String, byte[]>(32); 212 213 m_cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap(CONTENT_DEFINITION_CACHE_SIZE); 214 } 215 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_1_CORE_OBJECT) { 216 if ((OpenCms.getMemoryMonitor() != null) 217 && !OpenCms.getMemoryMonitor().isMonitoring(CmsXmlEntityResolver.class.getName() + ".cacheTemporary")) { 218 // reinitialize the caches after the memory monitor is set up 219 Map<String, byte[]> cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(128); 220 cacheTemporary.putAll(m_cacheTemporary); 221 m_cacheTemporary = cacheTemporary; 222 OpenCms.getMemoryMonitor().register( 223 CmsXmlEntityResolver.class.getName() + ".cacheTemporary", 224 cacheTemporary); 225 226 Map<String, byte[]> cachePermanent = new ConcurrentHashMap<String, byte[]>(32); 227 cachePermanent.putAll(m_cachePermanent); 228 m_cachePermanent = cachePermanent; 229 OpenCms.getMemoryMonitor().register( 230 CmsXmlEntityResolver.class.getName() + ".cachePermanent", 231 cachePermanent); 232 233 Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap( 234 CONTENT_DEFINITION_CACHE_SIZE); 235 cacheContentDefinitions.putAll(m_cacheContentDefinitions); 236 m_cacheContentDefinitions = cacheContentDefinitions; 237 OpenCms.getMemoryMonitor().register( 238 CmsXmlEntityResolver.class.getName() + ".cacheContentDefinitions", 239 cacheContentDefinitions); 240 } 241 } 242 } 243 244 /** 245 * Translates a legacy system id to a new form.<p> 246 * 247 * @param systemId the original system id 248 * @return the new system id 249 */ 250 private static String translateLegacySystemId(String systemId) { 251 252 String result = systemId; 253 for (String[] translation : LEGACY_TRANSLATIONS) { 254 if (systemId.startsWith(translation[0])) { 255 // replace prefix with second component if it matches the first component 256 result = translation[1] + systemId.substring(translation[0].length()); 257 break; 258 } 259 } 260 if (OpenCms.getRepositoryManager() != null) { 261 result = OpenCms.getResourceManager().getXsdTranslator().translateResource(result); 262 } 263 return result; 264 } 265 266 /** 267 * Caches an XML content definition based on the given system id and the online / offline status 268 * of this entity resolver instance.<p> 269 * 270 * @param systemId the system id to use as cache key 271 * @param contentDefinition the content definition to cache 272 */ 273 public void cacheContentDefinition(String systemId, CmsXmlContentDefinition contentDefinition) { 274 275 String cacheKey = getCacheKeyForCurrentProject(systemId); 276 m_cacheContentDefinitions.put(cacheKey, contentDefinition); 277 if (LOG.isDebugEnabled()) { 278 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYSTEM_ID_1, cacheKey)); 279 } 280 } 281 282 /** 283 * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) 284 */ 285 public void cmsEvent(CmsEvent event) { 286 287 CmsResource resource; 288 switch (event.getType()) { 289 case I_CmsEventListener.EVENT_PUBLISH_PROJECT: 290 // only flush cache if a schema definition where published 291 CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); 292 if (isSchemaDefinitionInPublishList(publishHistoryId)) { 293 m_cacheTemporary.clear(); 294 m_cacheContentDefinitions.clear(); 295 if (LOG.isDebugEnabled()) { 296 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0)); 297 } 298 } 299 break; 300 case I_CmsEventListener.EVENT_CLEAR_CACHES: 301 // flush cache 302 m_cacheTemporary.clear(); 303 m_cacheContentDefinitions.clear(); 304 if (LOG.isDebugEnabled()) { 305 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0)); 306 } 307 break; 308 case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: 309 Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); 310 if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { 311 // skip lock & unlock 312 return; 313 } 314 resource = (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE); 315 uncacheSystemId(resource.getRootPath()); 316 break; 317 case I_CmsEventListener.EVENT_RESOURCE_DELETED: 318 case I_CmsEventListener.EVENT_RESOURCE_MOVED: 319 List<CmsResource> resources = CmsCollectionsGenericWrapper.list( 320 event.getData().get(I_CmsEventListener.KEY_RESOURCES)); 321 for (int i = 0; i < resources.size(); i++) { 322 resource = resources.get(i); 323 uncacheSystemId(resource.getRootPath()); 324 } 325 break; 326 default: 327 // no operation 328 } 329 } 330 331 /** 332 * Looks up the given XML content definition system id in the internal content definition cache.<p> 333 * 334 * @param systemId the system id of the XML content definition to look up 335 * 336 * @return the XML content definition found, or null if no definition is cached for the given system id 337 */ 338 public CmsXmlContentDefinition getCachedContentDefinition(String systemId) { 339 340 String cacheKey = getCacheKeyForCurrentProject(systemId); 341 CmsXmlContentDefinition result = m_cacheContentDefinitions.get(cacheKey); 342 if ((result != null) && LOG.isDebugEnabled()) { 343 LOG.debug(Messages.get().getBundle().key(Messages.LOG_CACHE_LOOKUP_SUCCEEDED_1, cacheKey)); 344 } 345 return result; 346 } 347 348 /** 349 * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String) 350 */ 351 public InputSource resolveEntity(String publicId, String systemId) { 352 353 // lookup the system id caches first 354 byte[] content; 355 systemId = translateLegacySystemId(systemId); 356 content = m_cachePermanent.get(systemId); 357 if (content != null) { 358 // permanent cache contains system id 359 return createInputSource(content, systemId); 360 } else if (systemId.equals(CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID)) { 361 362 // XML page XSD reference 363 try { 364 InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_XSD_LOCATION); 365 content = CmsFileUtil.readFully(stream); 366 // cache the XML page DTD 367 m_cachePermanent.put(systemId, content); 368 return createInputSource(content, systemId); 369 } catch (Throwable t) { 370 LOG.error( 371 Messages.get().getBundle().key(Messages.LOG_XMLPAGE_XSD_NOT_FOUND_1, XMLPAGE_XSD_LOCATION), 372 t); 373 } 374 375 } else if (systemId.equals(XMLPAGE_OLD_DTD_SYSTEM_ID_1) || systemId.endsWith(XMLPAGE_OLD_DTD_SYSTEM_ID_2)) { 376 377 // XML page DTD reference 378 try { 379 InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_OLD_DTD_LOCATION); 380 // cache the XML page DTD 381 content = CmsFileUtil.readFully(stream); 382 m_cachePermanent.put(systemId, content); 383 return createInputSource(content, systemId); 384 } catch (Throwable t) { 385 LOG.error( 386 Messages.get().getBundle().key(Messages.LOG_XMLPAGE_DTD_NOT_FOUND_1, XMLPAGE_OLD_DTD_LOCATION), 387 t); 388 } 389 } else if ((m_cms != null) && systemId.startsWith(OPENCMS_SCHEME)) { 390 391 // opencms:// VFS reference 392 String cacheSystemId = systemId.substring(OPENCMS_SCHEME.length() - 1); 393 String cacheKey = getCacheKey( 394 cacheSystemId, 395 m_cms.getRequestContext().getCurrentProject().isOnlineProject()); 396 // look up temporary cache 397 content = m_cacheTemporary.get(cacheKey); 398 if (content != null) { 399 return createInputSource(content, systemId); 400 } 401 String storedSiteRoot = m_cms.getRequestContext().getSiteRoot(); 402 try { 403 // content not cached, read from VFS 404 m_cms.getRequestContext().setSiteRoot("/"); 405 CmsFile file = m_cms.readFile(cacheSystemId, CmsResourceFilter.IGNORE_EXPIRATION); 406 content = file.getContents(); 407 // store content in cache 408 m_cacheTemporary.put(cacheKey, content); 409 if (LOG.isDebugEnabled()) { 410 LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYS_ID_1, cacheKey)); 411 } 412 return createInputSource(content, systemId); 413 } catch (Throwable t) { 414 LOG.error(Messages.get().getBundle().key(Messages.LOG_ENTITY_RESOLVE_FAILED_1, systemId), t); 415 } finally { 416 m_cms.getRequestContext().setSiteRoot(storedSiteRoot); 417 } 418 419 } else if (systemId.startsWith(INTERNAL_SCHEME)) { 420 String location = systemId.substring(INTERNAL_SCHEME.length()); 421 try { 422 InputStream stream = getClass().getClassLoader().getResourceAsStream(location); 423 content = CmsFileUtil.readFully(stream); 424 m_cachePermanent.put(systemId, content); 425 return createInputSource(content, systemId); 426 } catch (Throwable t) { 427 LOG.error(t.getLocalizedMessage(), t); 428 } 429 430 } else if (systemId.substring(0, systemId.lastIndexOf("/") + 1).equalsIgnoreCase( 431 CmsConfigurationManager.DEFAULT_DTD_PREFIX)) { 432 // default DTD location in the org.opencms.configuration package 433 String location = null; 434 try { 435 String dtdFilename = systemId.substring(systemId.lastIndexOf("/") + 1); 436 location = CmsConfigurationManager.DEFAULT_DTD_LOCATION + dtdFilename; 437 InputStream stream = getClass().getClassLoader().getResourceAsStream(location); 438 content = CmsFileUtil.readFully(stream); 439 // cache the DTD 440 m_cachePermanent.put(systemId, content); 441 return createInputSource(content, systemId); 442 } catch (Throwable t) { 443 LOG.error(Messages.get().getBundle().key(Messages.LOG_DTD_NOT_FOUND_1, location), t); 444 } 445 } 446 // use the default behaviour (i.e. resolve through external URL) 447 return null; 448 } 449 450 /** 451 * Removes a cached entry for a system id (filename) from the internal offline temporary and content definition caches.<p> 452 * 453 * The online resources cached for the online project are only flushed when a project is published.<p> 454 * 455 * @param systemId the system id (filename) to remove from the cache 456 */ 457 public void uncacheSystemId(String systemId) { 458 459 Object o; 460 o = m_cacheTemporary.remove(getCacheKey(systemId, false)); 461 if (null != o) { 462 // if an object was removed from the temporary cache, all XML content definitions must be cleared 463 // because this may be a nested subschema 464 m_cacheContentDefinitions.clear(); 465 if (LOG.isDebugEnabled()) { 466 LOG.debug( 467 Messages.get().getBundle().key(Messages.LOG_ERR_UNCACHED_SYS_ID_1, getCacheKey(systemId, false))); 468 } 469 } else { 470 // check if a cached content definition has to be removed based on the system id 471 o = m_cacheContentDefinitions.remove(getCacheKey(systemId, false)); 472 if ((null != o) && LOG.isDebugEnabled()) { 473 LOG.debug( 474 Messages.get().getBundle().key( 475 Messages.LOG_ERR_UNCACHED_CONTENT_DEF_1, 476 getCacheKey(systemId, false))); 477 } 478 } 479 } 480 481 /** 482 * Creates an input source for the given byte data and system id.<p> 483 * 484 * @param data the data which the input source should return 485 * @param systemId the system id for the input source 486 * 487 * @return the input source 488 */ 489 InputSource createInputSource(byte[] data, String systemId) { 490 491 InputSource result = new InputSource(new ByteArrayInputStream(data)); 492 result.setSystemId(systemId); 493 return result; 494 } 495 496 /** 497 * Returns a cache key for the given system id (filename) based on the status 498 * of the given project flag.<p> 499 * 500 * @param systemId the system id (filename) to get the cache key for 501 * @param online indicates if this key is generated for the online project 502 * 503 * @return the cache key for the system id 504 */ 505 private String getCacheKey(String systemId, boolean online) { 506 507 if (online) { 508 return "online_".concat(systemId); 509 } 510 return "offline_".concat(systemId); 511 } 512 513 /** 514 * Returns a cache key for the given system id (filename) based on the status 515 * of the internal CmsObject.<p> 516 * 517 * @param systemId the system id (filename) to get the cache key for 518 * 519 * @return the cache key for the system id 520 */ 521 private String getCacheKeyForCurrentProject(String systemId) { 522 523 // check the project 524 boolean project = (m_cms != null) ? m_cms.getRequestContext().getCurrentProject().isOnlineProject() : false; 525 526 // remove opencms:// prefix 527 if (systemId.startsWith(OPENCMS_SCHEME)) { 528 systemId = systemId.substring(OPENCMS_SCHEME.length() - 1); 529 } 530 531 return getCacheKey(systemId, project); 532 } 533 534 /** 535 * Proves if there is at least one xsd or dtd file in the list of resources to publish.<p> 536 * 537 * @param publishHistoryId the publish history id 538 * 539 * @return true, if there is at least one xsd or dtd file in the list of resources to publish, otherwise false 540 */ 541 private boolean isSchemaDefinitionInPublishList(CmsUUID publishHistoryId) { 542 543 if (m_cms == null) { 544 // CmsObject not available, assume there may be a schema definition in the publish history 545 return true; 546 } 547 try { 548 List<CmsPublishedResource> publishedResources = m_cms.readPublishedResources(publishHistoryId); 549 for (CmsPublishedResource cmsPublishedResource : publishedResources) { 550 String resourceRootPath = cmsPublishedResource.getRootPath(); 551 String resourceRootPathLowerCase = resourceRootPath.toLowerCase(); 552 if (resourceRootPathLowerCase.endsWith(".xsd") 553 || resourceRootPathLowerCase.endsWith(".dtd") 554 || m_cacheTemporary.containsKey(getCacheKey(resourceRootPath, true))) { 555 return true; 556 } 557 } 558 } catch (CmsException e) { 559 // error reading published Resources. 560 LOG.warn(e.getMessage(), e); 561 } 562 return false; 563 } 564}