001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.configuration.CmsConfigurationManager;
031import org.opencms.db.CmsDriverManager;
032import org.opencms.db.CmsPublishedResource;
033import org.opencms.file.CmsFile;
034import org.opencms.file.CmsObject;
035import org.opencms.file.CmsResource;
036import org.opencms.file.CmsResourceFilter;
037import org.opencms.main.CmsEvent;
038import org.opencms.main.CmsException;
039import org.opencms.main.CmsLog;
040import org.opencms.main.I_CmsEventListener;
041import org.opencms.main.OpenCms;
042import org.opencms.monitor.CmsMemoryMonitor;
043import org.opencms.util.CmsCollectionsGenericWrapper;
044import org.opencms.util.CmsFileUtil;
045import org.opencms.util.CmsUUID;
046import org.opencms.xml.page.CmsXmlPage;
047
048import java.io.ByteArrayInputStream;
049import java.io.InputStream;
050import java.util.List;
051import java.util.Map;
052import java.util.concurrent.ConcurrentHashMap;
053
054import org.apache.commons.logging.Log;
055
056import org.xml.sax.EntityResolver;
057import org.xml.sax.InputSource;
058
059/**
060 * Resolves XML entities (e.g. external DTDs) in the OpenCms VFS.<p>
061 *
062 * Also provides a cache for XML content schema definitions.<p>
063 *
064 * @since 6.0.0
065 */
066public class CmsXmlEntityResolver implements EntityResolver, I_CmsEventListener {
067
068    /** Maximum size of the content definition cache. */
069    public static final int CONTENT_DEFINITION_CACHE_SIZE = 2048;
070
071    /** Scheme for files which should be retrieved from the classpath. */
072    public static final String INTERNAL_SCHEME = "internal://";
073
074    /** The scheme to identify a file in the OpenCms VFS. */
075    public static final String OPENCMS_SCHEME = "opencms://";
076
077    /**
078     * A list of string pairs used to translate legacy system ids to a new form. The first component of each pair
079     * is the prefix which should be replaced by the second component of that pair.
080     */
081    private static final String[][] LEGACY_TRANSLATIONS = {
082        {"opencms://system/modules/org.opencms.ade.config/schemas/", "internal://org/opencms/xml/adeconfig/"},
083        {
084            "opencms://system/modules/org.opencms.ade.containerpage/schemas/",
085            "internal://org/opencms/xml/containerpage/"},
086        {"opencms://system/modules/org.opencms.ade.sitemap/schemas/", "internal://org/opencms/xml/adeconfig/sitemap/"}};
087
088    /** The log object for this class. */
089    private static final Log LOG = CmsLog.getLog(CmsXmlEntityResolver.class);
090
091    /** A temporary cache for XML content definitions. */
092    private static Map<String, CmsXmlContentDefinition> m_cacheContentDefinitions;
093
094    /** A permanent cache to avoid multiple readings of often used files from the VFS. */
095    private static Map<String, byte[]> m_cachePermanent;
096
097    /** A temporary cache to avoid multiple readings of often used files from the VFS. */
098    private static Map<String, byte[]> m_cacheTemporary;
099
100    /** The location of the XML page XML schema. */
101    private static final String XMLPAGE_OLD_DTD_LOCATION = "org/opencms/xml/page/xmlpage.dtd";
102
103    /** The (old) DTD address of the OpenCms xmlpage (used in 5.3.5). */
104    private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_1 = "http://www.opencms.org/dtd/6.0/xmlpage.dtd";
105
106    /** The (old) DTD address of the OpenCms xmlpage (used until 5.3.5). */
107    private static final String XMLPAGE_OLD_DTD_SYSTEM_ID_2 = "/system/shared/page.dtd";
108
109    /** The location of the xmlpage XSD. */
110    private static final String XMLPAGE_XSD_LOCATION = "org/opencms/xml/page/xmlpage.xsd";
111
112    /** The cms object to use for VFS access (will be initialized with "Guest" permissions). */
113    private CmsObject m_cms;
114
115    /**
116     * Creates a new XML entity resolver based on the provided CmsObject.<p>
117     *
118     * If the provided CmsObject is null, then the OpenCms VFS is not
119     * searched for XML entities, however the internal cache and
120     * other OpenCms internal entities not in the VFS are still resolved.<p>
121     *
122     * @param cms the cms context to use for resolving XML files from the OpenCms VFS
123     */
124    public CmsXmlEntityResolver(CmsObject cms) {
125
126        initCaches();
127        m_cms = cms;
128    }
129
130    /**
131     * Adds a system ID URL to to internal permanent cache.<p>
132     *
133     * This cache will NOT be cleared automatically.<p>
134     *
135     * @param systemId the system ID to add
136     * @param content the content of the system id
137     */
138    public static void cacheSystemId(String systemId, byte[] content) {
139
140        initCaches();
141        m_cachePermanent.put(systemId, content);
142    }
143
144    /**
145     * Checks if a given system ID URL is in the internal permanent cache.<p>
146     *
147     * This check is required to see if a XML content is based on a file that actually exists in the OpenCms VFS,
148     * or if the schema has been just cached without a VFS file.<p>
149     *
150     * @param systemId the system id ID check
151     *
152     * @return <code>true</code> if the system ID is in the internal permanent cache, <code>false</code> otherwise
153     */
154    public static boolean isCachedSystemId(String systemId) {
155
156        if (m_cachePermanent != null) {
157            return m_cachePermanent.containsKey(systemId);
158        }
159        return false;
160    }
161
162    /**
163     * Checks whether the given schema id is an internal schema id or is translated to an internal schema id.<p>
164     * @param schema the schema id
165     * @return true if the given schema id is an internal schema id or translated to an internal schema id
166     */
167    public static boolean isInternalId(String schema) {
168
169        String translatedId = translateLegacySystemId(schema);
170        if (translatedId.startsWith(INTERNAL_SCHEME)) {
171            return true;
172        }
173        return false;
174    }
175
176    /**
177     * Initialize the OpenCms XML entity resolver.<p>
178     *
179     * @param adminCms an initialized OpenCms user context with "Administrator" role permissions
180     * @param typeSchemaBytes the base widget type XML schema definitions
181     *
182     * @see CmsXmlContentTypeManager#initialize(CmsObject)
183     */
184    protected static void initialize(CmsObject adminCms, byte[] typeSchemaBytes) {
185
186        // create the resolver to register as event listener
187        CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(adminCms);
188
189        // register this object as event listener
190        OpenCms.addCmsEventListener(
191            resolver,
192            new int[] {
193                I_CmsEventListener.EVENT_CLEAR_CACHES,
194                I_CmsEventListener.EVENT_PUBLISH_PROJECT,
195                I_CmsEventListener.EVENT_RESOURCE_MODIFIED,
196                I_CmsEventListener.EVENT_RESOURCE_MOVED,
197                I_CmsEventListener.EVENT_RESOURCE_DELETED});
198
199        // cache the base widget type XML schema definitions
200        cacheSystemId(CmsXmlContentDefinition.XSD_INCLUDE_OPENCMS, typeSchemaBytes);
201    }
202
203    /**
204     * Initializes the internal caches for permanent and temporary system IDs.<p>
205     */
206    private static void initCaches() {
207
208        if (m_cacheTemporary == null) {
209            m_cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(1024);
210
211            m_cachePermanent = new ConcurrentHashMap<String, byte[]>(32);
212
213            m_cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap(CONTENT_DEFINITION_CACHE_SIZE);
214        }
215        if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_1_CORE_OBJECT) {
216            if ((OpenCms.getMemoryMonitor() != null)
217                && !OpenCms.getMemoryMonitor().isMonitoring(CmsXmlEntityResolver.class.getName() + ".cacheTemporary")) {
218                // reinitialize the caches after the memory monitor is set up
219                Map<String, byte[]> cacheTemporary = CmsMemoryMonitor.createLRUCacheMap(128);
220                cacheTemporary.putAll(m_cacheTemporary);
221                m_cacheTemporary = cacheTemporary;
222                OpenCms.getMemoryMonitor().register(
223                    CmsXmlEntityResolver.class.getName() + ".cacheTemporary",
224                    cacheTemporary);
225
226                Map<String, byte[]> cachePermanent = new ConcurrentHashMap<String, byte[]>(32);
227                cachePermanent.putAll(m_cachePermanent);
228                m_cachePermanent = cachePermanent;
229                OpenCms.getMemoryMonitor().register(
230                    CmsXmlEntityResolver.class.getName() + ".cachePermanent",
231                    cachePermanent);
232
233                Map<String, CmsXmlContentDefinition> cacheContentDefinitions = CmsMemoryMonitor.createLRUCacheMap(
234                    CONTENT_DEFINITION_CACHE_SIZE);
235                cacheContentDefinitions.putAll(m_cacheContentDefinitions);
236                m_cacheContentDefinitions = cacheContentDefinitions;
237                OpenCms.getMemoryMonitor().register(
238                    CmsXmlEntityResolver.class.getName() + ".cacheContentDefinitions",
239                    cacheContentDefinitions);
240            }
241        }
242    }
243
244    /**
245     * Translates a legacy system id to a new form.<p>
246     *
247     * @param systemId the original system id
248     * @return the new system id
249     */
250    private static String translateLegacySystemId(String systemId) {
251
252        String result = systemId;
253        for (String[] translation : LEGACY_TRANSLATIONS) {
254            if (systemId.startsWith(translation[0])) {
255                // replace prefix with second component if it matches the first component
256                result = translation[1] + systemId.substring(translation[0].length());
257                break;
258            }
259        }
260        if (OpenCms.getRepositoryManager() != null) {
261            result = OpenCms.getResourceManager().getXsdTranslator().translateResource(result);
262        }
263        return result;
264    }
265
266    /**
267     * Caches an XML content definition based on the given system id and the online / offline status
268     * of this entity resolver instance.<p>
269     *
270     * @param systemId the system id to use as cache key
271     * @param contentDefinition the content definition to cache
272     */
273    public void cacheContentDefinition(String systemId, CmsXmlContentDefinition contentDefinition) {
274
275        String cacheKey = getCacheKeyForCurrentProject(systemId);
276        m_cacheContentDefinitions.put(cacheKey, contentDefinition);
277        if (LOG.isDebugEnabled()) {
278            LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYSTEM_ID_1, cacheKey));
279        }
280    }
281
282    /**
283     * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
284     */
285    public void cmsEvent(CmsEvent event) {
286
287        CmsResource resource;
288        switch (event.getType()) {
289            case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
290                // only flush cache if a schema definition where published
291                CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID));
292                if (isSchemaDefinitionInPublishList(publishHistoryId)) {
293                    m_cacheTemporary.clear();
294                    m_cacheContentDefinitions.clear();
295                    if (LOG.isDebugEnabled()) {
296                        LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0));
297                    }
298                }
299                break;
300            case I_CmsEventListener.EVENT_CLEAR_CACHES:
301                // flush cache
302                m_cacheTemporary.clear();
303                m_cacheContentDefinitions.clear();
304                if (LOG.isDebugEnabled()) {
305                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_FLUSHED_CACHES_0));
306                }
307                break;
308            case I_CmsEventListener.EVENT_RESOURCE_MODIFIED:
309                Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE);
310                if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) {
311                    // skip lock & unlock
312                    return;
313                }
314                resource = (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE);
315                uncacheSystemId(resource.getRootPath());
316                break;
317            case I_CmsEventListener.EVENT_RESOURCE_DELETED:
318            case I_CmsEventListener.EVENT_RESOURCE_MOVED:
319                List<CmsResource> resources = CmsCollectionsGenericWrapper.list(
320                    event.getData().get(I_CmsEventListener.KEY_RESOURCES));
321                for (int i = 0; i < resources.size(); i++) {
322                    resource = resources.get(i);
323                    uncacheSystemId(resource.getRootPath());
324                }
325                break;
326            default:
327                // no operation
328        }
329    }
330
331    /**
332     * Looks up the given XML content definition system id in the internal content definition cache.<p>
333     *
334     * @param systemId the system id of the XML content definition to look up
335     *
336     * @return the XML content definition found, or null if no definition is cached for the given system id
337     */
338    public CmsXmlContentDefinition getCachedContentDefinition(String systemId) {
339
340        String cacheKey = getCacheKeyForCurrentProject(systemId);
341        CmsXmlContentDefinition result = m_cacheContentDefinitions.get(cacheKey);
342        if ((result != null) && LOG.isDebugEnabled()) {
343            LOG.debug(Messages.get().getBundle().key(Messages.LOG_CACHE_LOOKUP_SUCCEEDED_1, cacheKey));
344        }
345        return result;
346    }
347
348    /**
349     * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String)
350     */
351    public InputSource resolveEntity(String publicId, String systemId) {
352
353        // lookup the system id caches first
354        byte[] content;
355        systemId = translateLegacySystemId(systemId);
356        content = m_cachePermanent.get(systemId);
357        if (content != null) {
358            // permanent cache contains system id
359            return createInputSource(content, systemId);
360        } else if (systemId.equals(CmsXmlPage.XMLPAGE_XSD_SYSTEM_ID)) {
361
362            // XML page XSD reference
363            try {
364                InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_XSD_LOCATION);
365                content = CmsFileUtil.readFully(stream);
366                // cache the XML page DTD
367                m_cachePermanent.put(systemId, content);
368                return createInputSource(content, systemId);
369            } catch (Throwable t) {
370                LOG.error(
371                    Messages.get().getBundle().key(Messages.LOG_XMLPAGE_XSD_NOT_FOUND_1, XMLPAGE_XSD_LOCATION),
372                    t);
373            }
374
375        } else if (systemId.equals(XMLPAGE_OLD_DTD_SYSTEM_ID_1) || systemId.endsWith(XMLPAGE_OLD_DTD_SYSTEM_ID_2)) {
376
377            // XML page DTD reference
378            try {
379                InputStream stream = getClass().getClassLoader().getResourceAsStream(XMLPAGE_OLD_DTD_LOCATION);
380                // cache the XML page DTD
381                content = CmsFileUtil.readFully(stream);
382                m_cachePermanent.put(systemId, content);
383                return createInputSource(content, systemId);
384            } catch (Throwable t) {
385                LOG.error(
386                    Messages.get().getBundle().key(Messages.LOG_XMLPAGE_DTD_NOT_FOUND_1, XMLPAGE_OLD_DTD_LOCATION),
387                    t);
388            }
389        } else if ((m_cms != null) && systemId.startsWith(OPENCMS_SCHEME)) {
390
391            // opencms:// VFS reference
392            String cacheSystemId = systemId.substring(OPENCMS_SCHEME.length() - 1);
393            String cacheKey = getCacheKey(
394                cacheSystemId,
395                m_cms.getRequestContext().getCurrentProject().isOnlineProject());
396            // look up temporary cache
397            content = m_cacheTemporary.get(cacheKey);
398            if (content != null) {
399                return createInputSource(content, systemId);
400            }
401            String storedSiteRoot = m_cms.getRequestContext().getSiteRoot();
402            try {
403                // content not cached, read from VFS
404                m_cms.getRequestContext().setSiteRoot("/");
405                CmsFile file = m_cms.readFile(cacheSystemId, CmsResourceFilter.IGNORE_EXPIRATION);
406                content = file.getContents();
407                // store content in cache
408                m_cacheTemporary.put(cacheKey, content);
409                if (LOG.isDebugEnabled()) {
410                    LOG.debug(Messages.get().getBundle().key(Messages.LOG_ERR_CACHED_SYS_ID_1, cacheKey));
411                }
412                return createInputSource(content, systemId);
413            } catch (Throwable t) {
414                LOG.error(Messages.get().getBundle().key(Messages.LOG_ENTITY_RESOLVE_FAILED_1, systemId), t);
415            } finally {
416                m_cms.getRequestContext().setSiteRoot(storedSiteRoot);
417            }
418
419        } else if (systemId.startsWith(INTERNAL_SCHEME)) {
420            String location = systemId.substring(INTERNAL_SCHEME.length());
421            try {
422                InputStream stream = getClass().getClassLoader().getResourceAsStream(location);
423                content = CmsFileUtil.readFully(stream);
424                m_cachePermanent.put(systemId, content);
425                return createInputSource(content, systemId);
426            } catch (Throwable t) {
427                LOG.error(t.getLocalizedMessage(), t);
428            }
429
430        } else if (systemId.substring(0, systemId.lastIndexOf("/") + 1).equalsIgnoreCase(
431            CmsConfigurationManager.DEFAULT_DTD_PREFIX)) {
432            // default DTD location in the org.opencms.configuration package
433            String location = null;
434            try {
435                String dtdFilename = systemId.substring(systemId.lastIndexOf("/") + 1);
436                location = CmsConfigurationManager.DEFAULT_DTD_LOCATION + dtdFilename;
437                InputStream stream = getClass().getClassLoader().getResourceAsStream(location);
438                content = CmsFileUtil.readFully(stream);
439                // cache the DTD
440                m_cachePermanent.put(systemId, content);
441                return createInputSource(content, systemId);
442            } catch (Throwable t) {
443                LOG.error(Messages.get().getBundle().key(Messages.LOG_DTD_NOT_FOUND_1, location), t);
444            }
445        }
446        // use the default behaviour (i.e. resolve through external URL)
447        return null;
448    }
449
450    /**
451     * Removes a cached entry for a system id (filename) from the internal offline temporary and content definition caches.<p>
452     *
453     * The online resources cached for the online project are only flushed when a project is published.<p>
454     *
455     * @param systemId the system id (filename) to remove from the cache
456     */
457    public void uncacheSystemId(String systemId) {
458
459        Object o;
460        o = m_cacheTemporary.remove(getCacheKey(systemId, false));
461        if (null != o) {
462            // if an object was removed from the temporary cache, all XML content definitions must be cleared
463            // because this may be a nested subschema
464            m_cacheContentDefinitions.clear();
465            if (LOG.isDebugEnabled()) {
466                LOG.debug(
467                    Messages.get().getBundle().key(Messages.LOG_ERR_UNCACHED_SYS_ID_1, getCacheKey(systemId, false)));
468            }
469        } else {
470            // check if a cached content definition has to be removed based on the system id
471            o = m_cacheContentDefinitions.remove(getCacheKey(systemId, false));
472            if ((null != o) && LOG.isDebugEnabled()) {
473                LOG.debug(
474                    Messages.get().getBundle().key(
475                        Messages.LOG_ERR_UNCACHED_CONTENT_DEF_1,
476                        getCacheKey(systemId, false)));
477            }
478        }
479    }
480
481    /**
482     * Creates an input source for the given byte data and system id.<p>
483     *
484     * @param data the data which the input source should return
485     * @param systemId the system id for the input source
486     *
487     * @return the input source
488     */
489    InputSource createInputSource(byte[] data, String systemId) {
490
491        InputSource result = new InputSource(new ByteArrayInputStream(data));
492        result.setSystemId(systemId);
493        return result;
494    }
495
496    /**
497     * Returns a cache key for the given system id (filename) based on the status
498     * of the given project flag.<p>
499     *
500     * @param systemId the system id (filename) to get the cache key for
501     * @param online indicates if this key is generated for the online project
502     *
503     * @return the cache key for the system id
504     */
505    private String getCacheKey(String systemId, boolean online) {
506
507        if (online) {
508            return "online_".concat(systemId);
509        }
510        return "offline_".concat(systemId);
511    }
512
513    /**
514     * Returns a cache key for the given system id (filename) based on the status
515     * of the internal CmsObject.<p>
516     *
517     * @param systemId the system id (filename) to get the cache key for
518     *
519     * @return the cache key for the system id
520     */
521    private String getCacheKeyForCurrentProject(String systemId) {
522
523        // check the project
524        boolean project = (m_cms != null) ? m_cms.getRequestContext().getCurrentProject().isOnlineProject() : false;
525
526        // remove opencms:// prefix
527        if (systemId.startsWith(OPENCMS_SCHEME)) {
528            systemId = systemId.substring(OPENCMS_SCHEME.length() - 1);
529        }
530
531        return getCacheKey(systemId, project);
532    }
533
534    /**
535     * Proves if there is at least one xsd or dtd file in the list of resources to publish.<p>
536     *
537     * @param publishHistoryId the publish history id
538     *
539     * @return true, if there is at least one xsd or dtd file in the list of resources to publish, otherwise false
540     */
541    private boolean isSchemaDefinitionInPublishList(CmsUUID publishHistoryId) {
542
543        if (m_cms == null) {
544            // CmsObject not available, assume there may be a schema definition in the publish history
545            return true;
546        }
547        try {
548            List<CmsPublishedResource> publishedResources = m_cms.readPublishedResources(publishHistoryId);
549            for (CmsPublishedResource cmsPublishedResource : publishedResources) {
550                String resourceRootPath = cmsPublishedResource.getRootPath();
551                String resourceRootPathLowerCase = resourceRootPath.toLowerCase();
552                if (resourceRootPathLowerCase.endsWith(".xsd")
553                    || resourceRootPathLowerCase.endsWith(".dtd")
554                    || m_cacheTemporary.containsKey(getCacheKey(resourceRootPath, true))) {
555                    return true;
556                }
557            }
558        } catch (CmsException e) {
559            // error reading published Resources.
560            LOG.warn(e.getMessage(), e);
561        }
562        return false;
563    }
564}