001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.configuration.CmsConfigurationManager;
031import org.opencms.db.CmsDriverManager;
032import org.opencms.db.CmsPublishedResource;
033import org.opencms.file.CmsFile;
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsResource;
036import org.opencms.file.CmsResourceFilter;
037import org.opencms.main.CmsEvent;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsLog;
040import org.opencms.main.I_CmsEventListener;
041import org.opencms.main.OpenCms;
042import org.opencms.util.CmsCollectionsGenericWrapper;
043import org.opencms.util.CmsFileUtil;
044import org.opencms.util.CmsUUID;
045import org.opencms.xml.page.CmsXmlPage;
046
047import java.io.ByteArrayInputStream;
048import java.io.InputStream;
049import java.util.Collections;
050import java.util.HashMap;
051import java.util.List;
052import java.util.Map;
053
054import org.apache.commons.logging.Log;
055
056import org.xml.sax.EntityResolver;
057import org.xml.sax.InputSource;
058
059/**
060 * Resolves XML entities (e.g. external DTDs) in the OpenCms VFS.<p>
061 *
062 * Also provides a cache for XML content schema definitions.<p>
063 *
064 * @since 6.0.0
065 */
066public class CmsXmlEntityResolver implements EntityResolver, I_CmsEventListener {
067
068    /** Maximum size of the content definition cache. */
069    public static final int CONTENT_DEFINITION_CACHE_SIZE = 2048;
070
071    /** Scheme for files which should be retrieved from the classpath. */
072    public static final String INTERNAL_SCHEME = "internal://";
073
074    /** The scheme to identify a file in the OpenCms VFS. */
075    public static final String OPENCMS_SCHEME = "opencms://";
076
077    /**
078     * A list of string pairs used to translate legacy system ids to a new form. The first component of each pair
079     * is the prefix which should be replaced by the second component of that pair.
080     */
081    private static final String[][] LEGACY_TRANSLATIONS = {
082        {"opencms://system/modules/org.opencms.ade.config/schemas/", "internal://org/opencms/xml/adeconfig/"},
083        {
084            "opencms://system/modules/org.opencms.ade.containerpage/schemas/",
085            "internal://org/opencms/xml/containerpage/"},
086        {"opencms://system/modules/org.opencms.ade.sitemap/schemas/", "internal://org/opencms/xml/adeconfig/sitemap/"}};
087
088    /** The log object for this class. */
089    private static final Log LOG = CmsLog.getLog(CmsXmlEntityResolver.class);
090
091    /** A temporary cache for XML content definitions. */
092    private static Map<String, CmsXmlContentDefinition> m_cacheContentDefinitions;
093
094    /** A permanent cache to avoid multiple readings of often used files from the VFS. */
095    private static Map<String, byte[]> m_cachePermanent;
096
097    /** A temporary cache to avoid multiple readings of often used files from the VFS. */
098    private static Map<String, byte[]> m_cacheTemporary;
099
100    /** The location of the XML page XML schema. */
101    private static final String XMLPAGE_OLD_DTD_LOCATION = "org/opencms/xml/page/xmlpage.dtd";
102
103    /** The (old) DTD address of the OpenCms xmlpage (used in 5.3.5). */
104    private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_1 = "http://www.opencms.org/dtd/6.0/xmlpage.dtd";
105
106    /** The (old) DTD address of the OpenCms xmlpage (used until 5.3.5). */
107    private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_2 = "/system/shared/page.dtd";
108
109    /** The location of the xmlpage XSD. */
110    private static final String XMLPAGE_XSD_LOCATION = "org/opencms/xml/page/xmlpage.xsd";
111
112    /** The cms object to use for VFS access (will be initialized with "Guest" permissions). */
113    private CmsObject m_cms;
114
115    /**
116     * Creates a new XML entity resolver based on the provided CmsObject.<p>
117     *
118     * If the provided CmsObject is null, then the OpenCms VFS is not
119     * searched for XML entities, however the internal cache and
120     * other OpenCms internal entities not in the VFS are still resolved.<p>
121     *
122     * @param cms the cms context to use for resolving XML files from the OpenCms VFS
123     */
124    public CmsXmlEntityResolver(CmsObject cms) {
125
126        initCaches();
127        m_cms = cms;
128    }
129
130    /**
131     * Adds a system ID URL to to internal permanent cache.<p>
132     *
133     * This cache will NOT be cleared automatically.<p>
134     *
135     * @param systemId the system ID to add
136     * @param content the content of the system id
137     */
138    public static void cacheSystemId(String systemId, byte[] content) {
139
140        initCaches();
141        m_cachePermanent.put(systemId, content);
142    }
143
144    /**
145     * Checks if a given system ID URL is in the internal permanent cache.<p>
146     *
147     * This check is required to see if a XML content is based on a file that actually exists in the OpenCms VFS,
148     * or if the schema has been just cached without a VFS file.<p>
149     *
150     * @param systemId the system id ID check
151     *
152     * @return <code>true</code> if the system ID is in the internal permanent cache, <code>false</code> otherwise
153     */
154    public static boolean isCachedSystemId(String systemId) {
155
156        if (m_cachePermanent != null) {
157            return m_cachePermanent.containsKey(systemId);
158        }
159        return false;
160    }
161
162    /**
163     * Checks whether the given schema id is an internal schema id or is translated to an internal schema id.<p>
164     * @param schema the schema id
165     * @return true if the given schema id is an internal schema id or translated to an internal schema id
166     */
167    public static boolean isInternalId(String schema) {
168
169        String translatedId = translateLegacySystemId(schema);
170        if (translatedId.startsWith(INTERNAL_SCHEME)) {
171            return true;
172        }
173        return false;
174    }
175
176    /**
177     * Initialize the OpenCms XML entity resolver.<p>
178     *
179     * @param adminCms an initialized OpenCms user context with "Administrator" role permissions
180     * @param typeSchemaBytes the base widget type XML schema definitions
181     *
182     * @see CmsXmlContentTypeManager#initialize(CmsObject)
183     */
184    protected static void initialize(CmsObject adminCms, byte[] typeSchemaBytes) {
185
186        // create the resolver to register as event listener
187        CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(adminCms);
188
189        // register this object as event listener
190        OpenCms.addCmsEventListener(
191            resolver,
192            new int[] {
193                I_CmsEventListener.EVENT_CLEAR_CACHES,
194                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
195                I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
196                I_CmsEventListener.EVENT_RESOURCE_MOVED,
197                I_CmsEventListener.EVENT_RESOURCE_DELETED});
198
199        // cache the base widget type XML schema definitions
200        cacheSystemId(CmsXmlContentDefinition.XSD_INCLUDE_OPENCMS, typeSchemaBytes);
201    }
202
203    /**
204     * Initializes the internal caches for permanent and temporary system IDs.<p>
205     */
206    private static void initCaches() {
207
208        if (m_cacheTemporary == null) {
209            Map<String, byte[]> cacheTemporary = CmsCollectionsGenericWrapper.createLRUMap(1024);
210            m_cacheTemporary = Collections.synchronizedMap(cacheTemporary);
211
212            Map<String, byte[]> cachePermanent = new HashMap<String, byte[]>(32);
213            m_cachePermanent = Collections.synchronizedMap(cachePermanent);
214
215            Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsCollectionsGenericWrapper.createLRUMap(
216                CONTENT_DEFINITION_CACHE_SIZE);
217            m_cacheContentDefinitions = Collections.synchronizedMap(cacheContentDefinitions);
218        }
219        if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_1_CORE_OBJECT) {
220            if ((OpenCms.getMemoryMonitor() != null)
221                && !OpenCms.getMemoryMonitor().isMonitoring(CmsXmlEntityResolver.class.getName() + ".cacheTemporary")) {
222                // reinitialize the caches after the memory monitor is set up
223                Map<String, byte[]> cacheTemporary = CmsCollectionsGenericWrapper.createLRUMap(128);
224                cacheTemporary.putAll(m_cacheTemporary);
225                m_cacheTemporary = Collections.synchronizedMap(cacheTemporary);
226                // map must be of type "LRUMap" so that memory monitor can access all information
227                OpenCms.getMemoryMonitor().register(
228                    CmsXmlEntityResolver.class.getName() + ".cacheTemporary",
229                    cacheTemporary);
230
231                Map<String, byte[]> cachePermanent = new HashMap<String, byte[]>(32);
232                cachePermanent.putAll(m_cachePermanent);
233                m_cachePermanent = Collections.synchronizedMap(cachePermanent);
234                // map must be of type "HashMap" so that memory monitor can access all information
235                OpenCms.getMemoryMonitor().register(
236                    CmsXmlEntityResolver.class.getName() + ".cachePermanent",
237                    cachePermanent);
238
239                Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsCollectionsGenericWrapper.createLRUMap(
240                    CONTENT_DEFINITION_CACHE_SIZE);
241                cacheContentDefinitions.putAll(m_cacheContentDefinitions);
242                m_cacheContentDefinitions = Collections.synchronizedMap(cacheContentDefinitions);
243                // map must be of type "LRUMap" so that memory monitor can access all information
244                OpenCms.getMemoryMonitor().register(
245                    CmsXmlEntityResolver.class.getName() + ".cacheContentDefinitions",
246                    cacheContentDefinitions);
247            }
248        }
249    }
250
251    /**
252     * Translates a legacy system id to a new form.<p>
253     *
254     * @param systemId the original system id
255     * @return the new system id
256     */
257    private static String translateLegacySystemId(String systemId) {
258
259        String result = systemId;
260        for (String[] translation : LEGACY_TRANSLATIONS) {
261            if (systemId.startsWith(translation[0])) {
262                // replace prefix with second component if it matches the first component
263                result = translation[1] + systemId.substring(translation[0].length());
264                break;
265            }
266        }
267        if (OpenCms.getRepositoryManager() != null) {
268            result = OpenCms.getResourceManager().getXsdTranslator().translateResource(result);
269        }
270        return result;
271    }
272
273    /**
274     * Caches an XML content definition based on the given system id and the online / offline status
275     * of this entity resolver instance.<p>
276     *
277     * @param systemId the system id to use as cache key
278     * @param contentDefinition the content definition to cache
279     */
280    public void cacheContentDefinition(String systemId, CmsXmlContentDefinition contentDefinition) {
281
282        String cacheKey = getCacheKeyForCurrentProject(systemId);
283        m_cacheContentDefinitions.put(cacheKey, contentDefinition);
284        if (LOG.isDebugEnabled()) {
285            LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYSTEM_ID_1, cacheKey));
286        }
287    }
288
289    /**
290     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
291     */
292    public void cmsEvent(CmsEvent event) {
293
294        CmsResource resource;
295        switch (event.getType()) {
296            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
297                // only flush cache if a schema definition where published
298                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
299                if (isSchemaDefinitionInPublishList(publishHistoryId)) {
300                    m_cacheTemporary.clear();
301                    m_cacheContentDefinitions.clear();
302                    if (LOG.isDebugEnabled()) {
303                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0));
304                    }
305                }
306                break;
307            case I_CmsEventListener.EVENT_CLEAR_CACHES:
308                // flush cache
309                m_cacheTemporary.clear();
310                m_cacheContentDefinitions.clear();
311                if (LOG.isDebugEnabled()) {
312                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0));
313                }
314                break;
315            case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
316                Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
317                if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
318                    // skip lock & unlock
319                    return;
320                }
321                resource = (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE);
322                uncacheSystemId(resource.getRootPath());
323                break;
324            case I_CmsEventListener.EVENT_RESOURCE_DELETED:
325            case I_CmsEventListener.EVENT_RESOURCE_MOVED:
326                List<CmsResource> resources = CmsCollectionsGenericWrapper.list(
327                    event.getData().get(I_CmsEventListener.KEY_RESOURCES));
328                for (int i = 0; i < resources.size(); i++) {
329                    resource = resources.get(i);
330                    uncacheSystemId(resource.getRootPath());
331                }
332                break;
333            default:
334                // no operation
335        }
336    }
337
338    /**
339     * Looks up the given XML content definition system id in the internal content definition cache.<p>
340     *
341     * @param systemId the system id of the XML content definition to look up
342     *
343     * @return the XML content definition found, or null if no definition is cached for the given system id
344     */
345    public CmsXmlContentDefinition getCachedContentDefinition(String systemId) {
346
347        String cacheKey = getCacheKeyForCurrentProject(systemId);
348        CmsXmlContentDefinition result = m_cacheContentDefinitions.get(cacheKey);
349        if ((result != null) && LOG.isDebugEnabled()) {
350            LOG.debug(Messages.get().getBundle().key(Messages.LOG_CACHE_LOOKUP_SUCCEEDED_1, cacheKey));
351        }
352        return result;
353    }
354
355    /**
356     * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String)
357     */
358    public InputSource resolveEntity(String publicId, String systemId) {
359
360        // lookup the system id caches first
361        byte[] content;
362        systemId = translateLegacySystemId(systemId);
363        content = m_cachePermanent.get(systemId);
364        if (content != null) {
365            // permanent cache contains system id
366            return createInputSource(content, systemId);
367        } else if (systemId.equals(CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID)) {
368
369            // XML page XSD reference
370            try {
371                InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_XSD_LOCATION);
372                content = CmsFileUtil.readFully(stream);
373                // cache the XML page DTD
374                m_cachePermanent.put(systemId, content);
375                return createInputSource(content, systemId);
376            } catch (Throwable t) {
377                LOG.error(
378                    Messages.get().getBundle().key(Messages.LOG_XMLPAGE_XSD_NOT_FOUND_1, XMLPAGE_XSD_LOCATION),
379                    t);
380            }
381
382        } else if (systemId.equals(XMLPAGE_OLD_DTD_SYSTEM_ID_1) || systemId.endsWith(XMLPAGE_OLD_DTD_SYSTEM_ID_2)) {
383
384            // XML page DTD reference
385            try {
386                InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_OLD_DTD_LOCATION);
387                // cache the XML page DTD
388                content = CmsFileUtil.readFully(stream);
389                m_cachePermanent.put(systemId, content);
390                return createInputSource(content, systemId);
391            } catch (Throwable t) {
392                LOG.error(
393                    Messages.get().getBundle().key(Messages.LOG_XMLPAGE_DTD_NOT_FOUND_1, XMLPAGE_OLD_DTD_LOCATION),
394                    t);
395            }
396        } else if ((m_cms != null) && systemId.startsWith(OPENCMS_SCHEME)) {
397
398            // opencms:// VFS reference
399            String cacheSystemId = systemId.substring(OPENCMS_SCHEME.length() - 1);
400            String cacheKey = getCacheKey(
401                cacheSystemId,
402                m_cms.getRequestContext().getCurrentProject().isOnlineProject());
403            // look up temporary cache
404            content = m_cacheTemporary.get(cacheKey);
405            if (content != null) {
406                return createInputSource(content, systemId);
407            }
408            String storedSiteRoot = m_cms.getRequestContext().getSiteRoot();
409            try {
410                // content not cached, read from VFS
411                m_cms.getRequestContext().setSiteRoot("/");
412                CmsFile file = m_cms.readFile(cacheSystemId, CmsResourceFilter.IGNORE_EXPIRATION);
413                content = file.getContents();
414                // store content in cache
415                m_cacheTemporary.put(cacheKey, content);
416                if (LOG.isDebugEnabled()) {
417                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYS_ID_1, cacheKey));
418                }
419                return createInputSource(content, systemId);
420            } catch (Throwable t) {
421                LOG.error(Messages.get().getBundle().key(Messages.LOG_ENTITY_RESOLVE_FAILED_1, systemId), t);
422            } finally {
423                m_cms.getRequestContext().setSiteRoot(storedSiteRoot);
424            }
425
426        } else if (systemId.startsWith(INTERNAL_SCHEME)) {
427            String location = systemId.substring(INTERNAL_SCHEME.length());
428            try {
429                InputStream stream = getClass().getClassLoader().getResourceAsStream(location);
430                content = CmsFileUtil.readFully(stream);
431                m_cachePermanent.put(systemId, content);
432                return createInputSource(content, systemId);
433            } catch (Throwable t) {
434                LOG.error(t.getLocalizedMessage(), t);
435            }
436
437        } else if (systemId.substring(0, systemId.lastIndexOf("/") + 1).equalsIgnoreCase(
438            CmsConfigurationManager.DEFAULT_DTD_PREFIX)) {
439            // default DTD location in the org.opencms.configuration package
440            String location = null;
441            try {
442                String dtdFilename = systemId.substring(systemId.lastIndexOf("/") + 1);
443                location = CmsConfigurationManager.DEFAULT_DTD_LOCATION + dtdFilename;
444                InputStream stream = getClass().getClassLoader().getResourceAsStream(location);
445                content = CmsFileUtil.readFully(stream);
446                // cache the DTD
447                m_cachePermanent.put(systemId, content);
448                return createInputSource(content, systemId);
449            } catch (Throwable t) {
450                LOG.error(Messages.get().getBundle().key(Messages.LOG_DTD_NOT_FOUND_1, location), t);
451            }
452        }
453        // use the default behaviour (i.e. resolve through external URL)
454        return null;
455    }
456
457    /**
458     * Removes a cached entry for a system id (filename) from the internal offline temporary and content definition caches.<p>
459     *
460     * The online resources cached for the online project are only flushed when a project is published.<p>
461     *
462     * @param systemId the system id (filename) to remove from the cache
463     */
464    public void uncacheSystemId(String systemId) {
465
466        Object o;
467        o = m_cacheTemporary.remove(getCacheKey(systemId, false));
468        if (null != o) {
469            // if an object was removed from the temporary cache, all XML content definitions must be cleared
470            // because this may be a nested subschema
471            m_cacheContentDefinitions.clear();
472            if (LOG.isDebugEnabled()) {
473                LOG.debug(
474                    Messages.get().getBundle().key(Messages.LOG_ERR_UNCACHED_SYS_ID_1, getCacheKey(systemId, false)));
475            }
476        } else {
477            // check if a cached content definition has to be removed based on the system id
478            o = m_cacheContentDefinitions.remove(getCacheKey(systemId, false));
479            if ((null != o) && LOG.isDebugEnabled()) {
480                LOG.debug(
481                    Messages.get().getBundle().key(
482                        Messages.LOG_ERR_UNCACHED_CONTENT_DEF_1,
483                        getCacheKey(systemId, false)));
484            }
485        }
486    }
487
488    /**
489     * Creates an input source for the given byte data and system id.<p>
490     *
491     * @param data the data which the input source should return
492     * @param systemId the system id for the input source
493     *
494     * @return the input source
495     */
496    InputSource createInputSource(byte[] data, String systemId) {
497
498        InputSource result = new InputSource(new ByteArrayInputStream(data));
499        result.setSystemId(systemId);
500        return result;
501    }
502
503    /**
504     * Returns a cache key for the given system id (filename) based on the status
505     * of the given project flag.<p>
506     *
507     * @param systemId the system id (filename) to get the cache key for
508     * @param online indicates if this key is generated for the online project
509     *
510     * @return the cache key for the system id
511     */
512    private String getCacheKey(String systemId, boolean online) {
513
514        if (online) {
515            return "online_".concat(systemId);
516        }
517        return "offline_".concat(systemId);
518    }
519
520    /**
521     * Returns a cache key for the given system id (filename) based on the status
522     * of the internal CmsObject.<p>
523     *
524     * @param systemId the system id (filename) to get the cache key for
525     *
526     * @return the cache key for the system id
527     */
528    private String getCacheKeyForCurrentProject(String systemId) {
529
530        // check the project
531        boolean project = (m_cms != null) ? m_cms.getRequestContext().getCurrentProject().isOnlineProject() : false;
532
533        // remove opencms:// prefix
534        if (systemId.startsWith(OPENCMS_SCHEME)) {
535            systemId = systemId.substring(OPENCMS_SCHEME.length() - 1);
536        }
537
538        return getCacheKey(systemId, project);
539    }
540
541    /**
542     * Proves if there is at least one xsd or dtd file in the list of resources to publish.<p>
543     *
544     * @param publishHistoryId the publish history id
545     *
546     * @return true, if there is at least one xsd or dtd file in the list of resources to publish, otherwise false
547     */
548    private boolean isSchemaDefinitionInPublishList(CmsUUID publishHistoryId) {
549
550        if (m_cms == null) {
551            // CmsObject not available, assume there may be a schema definition in the publish history
552            return true;
553        }
554        try {
555            List<CmsPublishedResource> publishedResources = m_cms.readPublishedResources(publishHistoryId);
556            for (CmsPublishedResource cmsPublishedResource : publishedResources) {
557                String resourceRootPath = cmsPublishedResource.getRootPath();
558                String resourceRootPathLowerCase = resourceRootPath.toLowerCase();
559                if (resourceRootPathLowerCase.endsWith(".xsd")
560                    || resourceRootPathLowerCase.endsWith(".dtd")
561                    || m_cacheTemporary.containsKey(getCacheKey(resourceRootPath, true))) {
562                    return true;
563                }
564            }
565        } catch (CmsException e) {
566            // error reading published Resources.
567            LOG.warn(e.getMessage(), e);
568        }
569        return false;
570    }
571}