001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (C) Alkacon Software (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.file;
029
030import org.opencms.file.types.A_CmsResourceTypeLinkParseable;
031import org.opencms.file.types.CmsResourceTypeJsp;
032import org.opencms.file.types.I_CmsResourceType;
033import org.opencms.i18n.CmsEncoder;
034import org.opencms.loader.CmsLoaderException;
035import org.opencms.lock.CmsLock;
036import org.opencms.main.CmsException;
037import org.opencms.main.CmsIllegalArgumentException;
038import org.opencms.main.CmsLog;
039import org.opencms.main.OpenCms;
040import org.opencms.relations.CmsRelation;
041import org.opencms.relations.CmsRelationFilter;
042import org.opencms.relations.CmsRelationType;
043import org.opencms.relations.I_CmsLinkParseable;
044import org.opencms.util.CmsFileUtil;
045import org.opencms.util.CmsPair;
046import org.opencms.util.CmsStringUtil;
047import org.opencms.util.CmsUUID;
048import org.opencms.util.I_CmsRegexSubstitution;
049import org.opencms.xml.CmsXmlEntityResolver;
050import org.opencms.xml.CmsXmlException;
051import org.opencms.xml.CmsXmlUtils;
052import org.opencms.xml.content.Messages;
053
054import java.io.UnsupportedEncodingException;
055import java.util.ArrayList;
056import java.util.Collection;
057import java.util.HashMap;
058import java.util.HashSet;
059import java.util.List;
060import java.util.Map;
061import java.util.Set;
062import java.util.regex.Matcher;
063import java.util.regex.Pattern;
064
065import org.apache.commons.logging.Log;
066
067import org.dom4j.Document;
068
069import com.google.common.collect.ArrayListMultimap;
070import com.google.common.collect.Lists;
071import com.google.common.collect.Multimap;
072
073/**
074 * A class used to rewrite links and relations in one subtree such that relations from that subtree to another given subtree
075 * replaced with relations to the first subtree.<p>
076 */
077public class CmsLinkRewriter {
078
079    /** The logger instance for this class. */
080    private static final Log LOG = CmsLog.getLog(CmsLinkRewriter.class);
081
082    /** A map from source folder structure ids to corresponding target folder resources. */
083    protected Map<CmsUUID, CmsResource> m_translationsById = new HashMap<CmsUUID, CmsResource>();
084
085    /** A map from source folder root paths to the corresponding target folder resources. */
086    protected Map<String, CmsResource> m_translationsByPath = new HashMap<String, CmsResource>();
087
088    /** A map of resources which have been cached by structure id. */
089    private Map<CmsUUID, CmsResource> m_cachedResources = new HashMap<CmsUUID, CmsResource>();
090
091    /** The CMS object used for file operations. */
092    private CmsObject m_cms;
093
094    /** If true, all XML contents will be rewritten instead of just those containing links to correct. */
095    private boolean m_rewriteAllXmlContents = true;
096
097    /** The set of structure ids of resources whose content has been rewritten. */
098    private Set<CmsUUID> m_rewrittenContent = new HashSet<CmsUUID>();
099
100    /** A list of path pairs, each containing a source and a target of a copy operation. */
101    private List<CmsPair<String, String>> m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>();
102
103    /** The target folder root path. */
104    private String m_targetPath;
105
106    /**
107     * Creates a link rewriter for use after a multi-copy operation.<p>
108     *
109     * @param cms the current CMS context
110     * @param sources the list of source root paths
111     * @param target the target parent folder root path
112     */
113    public CmsLinkRewriter(CmsObject cms, List<String> sources, String target) {
114
115        m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>();
116        for (String source : sources) {
117            checkNotSubPath(source, target);
118            String targetSub = CmsStringUtil.joinPaths(target, CmsResource.getName(source));
119            m_sourceTargetPairs.add(CmsPair.create(source, targetSub));
120        }
121        m_targetPath = target;
122        m_cms = cms;
123    }
124
125    /**
126     * Creates a new link rewriter for a list of sources and corresponding targets.<p>
127     *
128     * @param cms the current CMS context
129     * @param targetPath the target root path
130     * @param sourceTargetPairs the list of source-target pairs
131     */
132    public CmsLinkRewriter(CmsObject cms, String targetPath, List<CmsPair<String, String>> sourceTargetPairs) {
133
134        m_cms = cms;
135        m_targetPath = targetPath;
136        m_sourceTargetPairs = sourceTargetPairs;
137    }
138
139    /**
140     * Creates a link rewriter for use after a single copy operation.<p>
141     *
142     * @param cms the current CMS context
143     * @param source the source folder root path
144     * @param target the target folder root path
145     */
146    public CmsLinkRewriter(CmsObject cms, String source, String target) {
147
148        m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>();
149        checkNotSubPath(source, target);
150
151        m_sourceTargetPairs.add(CmsPair.create(source, target));
152        m_targetPath = target;
153        m_cms = cms;
154    }
155
156    /**
157     * Checks whether a given resource is a folder and throws an exception otherwise.<p>
158     *
159     * @param resource the resource to check
160     * @throws CmsException if something goes wrong
161     */
162    protected static void checkIsFolder(CmsResource resource) throws CmsException {
163
164        if (!isFolder(resource)) {
165            throw new CmsIllegalArgumentException(Messages.get().container(
166                org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOT_NOT_FOLDER_1,
167                resource.getRootPath()));
168        }
169    }
170
171    /**
172     * Helper method to check whether a given resource is a folder.<p>
173     *
174     * @param resource the resouce to check
175     * @return true if the resource is a folder
176     *
177     * @throws CmsLoaderException if the resource type couldn't be found
178     */
179    protected static boolean isFolder(CmsResource resource) throws CmsLoaderException {
180
181        I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
182        return resourceType.isFolder();
183    }
184
185    /**
186     * Starts the link rewriting process.<p>
187     *
188     * @throws CmsException if something goes wrong
189     */
190    public void rewriteLinks() throws CmsException {
191
192        init();
193        List<CmsRelation> relationsToCorrect = findRelationsFromTargetToSource();
194        // group relations by the structure id of their source
195        Multimap<CmsUUID, CmsRelation> relationsBySourceId = ArrayListMultimap.create();
196        for (CmsRelation relation : relationsToCorrect) {
197            LOG.info(
198                "Found relation which needs to be corrected: "
199                    + relation.getSourcePath()
200                    + " -> "
201                    + relation.getTargetPath()
202                    + " ["
203                    + relation.getType().getName()
204                    + "]");
205            relationsBySourceId.put(relation.getSourceId(), relation);
206        }
207
208        // make sure we have a lock on the target folder before doing any write operations
209        CmsLock lock = m_cms.getLock(m_targetPath);
210        if (lock.isUnlocked() || !lock.isOwnedBy(m_cms.getRequestContext().getCurrentUser())) {
211            // fail if locked by another user
212            m_cms.lockResource(m_targetPath);
213        }
214
215        for (CmsUUID structureId : relationsBySourceId.keySet()) {
216
217            Collection<CmsRelation> relationsForResource = relationsBySourceId.get(structureId);
218            CmsResource resource = null;
219            try {
220                resource = getResource(structureId);
221                rewriteLinks(resource, relationsForResource);
222            } catch (CmsException e) {
223                LOG.error(e.getLocalizedMessage(), e);
224            }
225        }
226        if (!m_rewriteAllXmlContents) {
227            return;
228        }
229        for (Map.Entry<CmsUUID, CmsResource> entry : m_cachedResources.entrySet()) {
230            CmsUUID key = entry.getKey();
231            CmsResource resource = entry.getValue();
232            if (isInTargets(resource.getRootPath()) && !m_rewrittenContent.contains(key)) {
233                I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
234                // rewrite content for other files so
235                if (resType instanceof A_CmsResourceTypeLinkParseable) {
236                    try {
237                        CmsFile file = m_cms.readFile(resource);
238                        m_cms.writeFile(file);
239                    } catch (CmsException e) {
240                        LOG.error(e.getLocalizedMessage(), e);
241                    }
242                }
243
244            }
245        }
246    }
247
248    /**
249     * Sets the 'rewriteAllContents' flag, which controls whether all XML contents will be rewritten
250     * or just those whose links need to be corrected.<p>
251     *
252     * @param rewriteAllContents if true, all contents will be rewritten
253     */
254    public void setRewriteAllContents(boolean rewriteAllContents) {
255
256        m_rewriteAllXmlContents = rewriteAllContents;
257    }
258
259    /**
260     * Checks that the target path is not a subfolder of the source path.<p>
261     *
262     * @param source the source path
263     * @param target the target path
264     */
265    protected void checkNotSubPath(String source, String target) {
266
267        source = CmsStringUtil.joinPaths("/", source, "/");
268        target = CmsStringUtil.joinPaths("/", target, "/");
269        if (target.startsWith(source)) {
270            throw new CmsIllegalArgumentException(
271                org.opencms.file.Messages.get().container(
272                    org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOTS_DEPENDENT_2,
273                    source,
274                    target));
275        }
276    }
277
278    /**
279     * Decodes a byte array into a string with a given encoding, or the default encoding if that fails.<p>
280     *
281     * @param bytes the byte array
282     * @param encoding the encoding to use
283     *
284     * @return the decoded string
285     */
286    protected String decode(byte[] bytes, String encoding) {
287
288        try {
289            return new String(bytes, encoding);
290        } catch (UnsupportedEncodingException e) {
291            return new String(bytes);
292        }
293    }
294
295    /**
296     * Decodes a file's contents and return the content string and the encoding to use for writing the file
297     * back to the VFS.<p>
298     *
299     * @param file the file to decode
300     * @return a pair (content, encoding)
301     * @throws CmsException if something goes wrong
302     */
303    protected CmsPair<String, String> decode(CmsFile file) throws CmsException {
304
305        String content = null;
306        String encoding = getConfiguredEncoding(m_cms, file);
307        I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(file.getTypeId());
308        if (resType instanceof CmsResourceTypeJsp) {
309            content = decode(file.getContents(), encoding);
310        } else {
311            try {
312                CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(m_cms);
313                // parse the XML and serialize it back to  a string with the configured encoding
314                Document doc = CmsXmlUtils.unmarshalHelper(file.getContents(), resolver);
315                content = CmsXmlUtils.marshal(doc, encoding);
316            } catch (Exception e) {
317                // invalid xml structure, just use the configured encoding
318                content = decode(file.getContents(), encoding);
319            }
320        }
321        return CmsPair.create(content, encoding);
322    }
323
324    /**
325     * Finds relations from the target root folder or its children to the source root folder or its children.<p>
326     *
327     * @return the list of relations from the target to the source
328     *
329     * @throws CmsException if something goes wrong
330     */
331    protected List<CmsRelation> findRelationsFromTargetToSource() throws CmsException {
332
333        List<CmsRelation> relations = m_cms.readRelations(
334            CmsRelationFilter.SOURCES.filterPath(m_targetPath).filterIncludeChildren());
335        List<CmsRelation> result = new ArrayList<CmsRelation>();
336        for (CmsRelation rel : relations) {
337            if (isInTargets(rel.getSourcePath()) && isInSources(rel.getTargetPath())) {
338                result.add(rel);
339            }
340        }
341        return result;
342    }
343
344    /**
345     * Gets the encoding which is configured at the location of a given resource.<p>
346     *
347     * @param cms the current CMS context
348     * @param resource the resource for which the configured encoding should be retrieved
349     * @return the configured encoding for the resource
350     *
351     * @throws CmsException if something goes wrong
352     */
353    protected String getConfiguredEncoding(CmsObject cms, CmsResource resource) throws CmsException {
354
355        String encoding = null;
356        try {
357            encoding = cms.readPropertyObject(
358                resource.getRootPath(),
359                CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING,
360                true).getValue();
361        } catch (CmsException e) {
362            // encoding will be null
363        }
364        if (encoding == null) {
365            encoding = OpenCms.getSystemInfo().getDefaultEncoding();
366        } else {
367            encoding = CmsEncoder.lookupEncoding(encoding, null);
368            if (encoding == null) {
369                throw new CmsXmlException(
370                    Messages.get().container(Messages.ERR_XMLCONTENT_INVALID_ENC_1, resource.getRootPath()));
371            }
372        }
373        return encoding;
374    }
375
376    /**
377     * Gets a list of resource pairs whose paths relative to the source/target roots passed match.<p>
378     *
379     * @param source the source root
380     * @param target the target root
381     *
382     * @return the list of matching resources
383     *
384     * @throws CmsException if something goes wrong
385     */
386    protected List<CmsPair<CmsResource, CmsResource>> getMatchingResources(String source, String target)
387    throws CmsException {
388
389        List<CmsResource> sourceResources = readTree(source);
390        Map<String, CmsResource> sourceRelative = getResourcesByRelativePath(sourceResources, source);
391
392        List<CmsResource> targetResources = readTree(target);
393        Map<String, CmsResource> targetRelative = getResourcesByRelativePath(targetResources, target);
394
395        List<CmsPair<CmsResource, CmsResource>> result = new ArrayList<CmsPair<CmsResource, CmsResource>>();
396        sourceRelative.keySet().retainAll(targetRelative.keySet());
397        for (Map.Entry<String, CmsResource> entry : sourceRelative.entrySet()) {
398            String key = entry.getKey();
399            CmsResource sourceRes = entry.getValue();
400            CmsResource targetRes = targetRelative.get(key);
401            result.add(CmsPair.create(sourceRes, targetRes));
402        }
403        return result;
404    }
405
406    /**
407     * Computes the relative path given an ancestor folder path.<p>
408     *
409     * @param ancestor the ancestor folder
410     * @param rootPath the path for which the relative path should be computed
411     *
412     * @return the relative path
413     */
414    protected String getRelativePath(String ancestor, String rootPath) {
415
416        String result = rootPath.substring(ancestor.length());
417        result = CmsStringUtil.joinPaths("/", result, "/");
418        return result;
419    }
420
421    /**
422     * Accesses a resource by structure id.<p>
423     *
424     * @param structureId the structure id of the resource
425     * @return the resource with the given structure id
426     *
427     * @throws CmsException if the resource couldn't be read
428     */
429    protected CmsResource getResource(CmsUUID structureId) throws CmsException {
430
431        if (m_cachedResources.containsKey(structureId)) {
432            return m_cachedResources.get(structureId);
433        }
434        return m_cms.readResource(structureId);
435    }
436
437    /**
438     * Collects a list of resources in a map where the key for each resource is the path relative to a given folder.<p>
439     *
440     * @param resources the resources to put in the map
441     * @param basePath the path relative to which the keys of the resulting map should be computed
442     *
443     * @return a map from relative paths to resources
444     */
445    protected Map<String, CmsResource> getResourcesByRelativePath(List<CmsResource> resources, String basePath) {
446
447        Map<String, CmsResource> result = new HashMap<String, CmsResource>();
448        for (CmsResource resource : resources) {
449            String relativeSubPath = CmsStringUtil.getRelativeSubPath(basePath, resource.getRootPath());
450            if (relativeSubPath != null) {
451                result.put(relativeSubPath, resource);
452            }
453        }
454        return result;
455    }
456
457    /**
458     * Reads the data needed for rewriting the relations from the VFS.<p>
459     *
460     * @throws CmsException if something goes wrong
461     */
462    protected void init() throws CmsException {
463
464        m_cms = OpenCms.initCmsObject(m_cms);
465        // we want to use autocorrection when writing XML contents back
466        //m_cms.getRequestContext().setAttribute(CmsXmlContent.AUTO_CORRECTION_ATTRIBUTE, Boolean.TRUE);
467        m_cms.getRequestContext().setSiteRoot("");
468        List<CmsPair<CmsResource, CmsResource>> allMatchingResources = Lists.newArrayList();
469        for (CmsPair<String, String> pair : m_sourceTargetPairs) {
470            List<CmsPair<CmsResource, CmsResource>> matchingResources = getMatchingResources(
471                pair.getFirst(),
472                pair.getSecond());
473            allMatchingResources.addAll(matchingResources);
474        }
475        for (CmsPair<CmsResource, CmsResource> resPair : allMatchingResources) {
476            CmsResource source = resPair.getFirst();
477            CmsResource target = resPair.getSecond();
478            m_translationsById.put(source.getStructureId(), target);
479            m_translationsByPath.put(source.getRootPath(), target);
480        }
481    }
482
483    /**
484     * Checks if a path belongs to one of the sources.<p>
485     *
486     * @param path a root path
487     *
488     * @return true if the path belongs to the sources
489     */
490    protected boolean isInSources(String path) {
491
492        for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) {
493            String source = sourceTargetPair.getFirst();
494            if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(source, "/"))) {
495                return true;
496            }
497        }
498        return false;
499    }
500
501    /**
502     * Checks if a path belongs to one of the targets.<p>
503     *
504     * @param path a root path
505     *
506     * @return true if the path belongs to the targets
507     */
508    protected boolean isInTargets(String path) {
509
510        for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) {
511            String target = sourceTargetPair.getSecond();
512            if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(target, "/"))) {
513                return true;
514            }
515        }
516        return false;
517    }
518
519    /**
520     * Reads the resources in a subtree.<p>
521     *
522     * @param rootPath the root of the subtree
523     *
524     * @return the list of resources from the subtree
525     *
526     * @throws CmsException if something goes wrong
527     */
528    protected List<CmsResource> readTree(String rootPath) throws CmsException {
529
530        rootPath = CmsFileUtil.removeTrailingSeparator(rootPath);
531        CmsResource base = m_cms.readResource(rootPath);
532
533        I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(base);
534        List<CmsResource> result = new ArrayList<CmsResource>();
535        if (resType.isFolder()) {
536            rootPath = CmsStringUtil.joinPaths(rootPath, "/");
537            List<CmsResource> subResources = m_cms.readResources(rootPath, CmsResourceFilter.ALL, true);
538            result.add(base);
539            result.addAll(subResources);
540        } else {
541            result.add(base);
542        }
543        for (CmsResource resource : result) {
544            m_cachedResources.put(resource.getStructureId(), resource);
545        }
546
547        return result;
548    }
549
550    /**
551     * Rewrites the links included in the content itself.<p>
552     *
553     * @param file the file for which the links should be replaced
554     * @param relations the original relations
555     *
556     * @throws CmsException if something goes wrong
557     */
558    protected void rewriteContent(CmsFile file, Collection<CmsRelation> relations) throws CmsException {
559
560        LOG.info("Rewriting in-content links for " + file.getRootPath());
561        CmsPair<String, String> contentAndEncoding = decode(file);
562        String content = contentAndEncoding.getFirst();
563        String encodingForSave = contentAndEncoding.getSecond();
564        String newContent = rewriteContentString(content);
565        byte[] newContentBytes;
566        try {
567            newContentBytes = newContent.getBytes(encodingForSave);
568        } catch (UnsupportedEncodingException e) {
569            newContentBytes = newContent.getBytes();
570        }
571        file.setContents(newContentBytes);
572        m_cms.writeFile(file);
573    }
574
575    /**
576     * Replaces structure ids of resources in the source subtree with the structure ids of the corresponding
577     * resources in the target subtree inside a content string.<p>
578     *
579     * @param originalContent the original content
580     *
581     * @return the content with the new structure ids
582     */
583    protected String rewriteContentString(String originalContent) {
584
585        Pattern uuidPattern = Pattern.compile(CmsUUID.UUID_REGEX);
586        I_CmsRegexSubstitution substitution = new I_CmsRegexSubstitution() {
587
588            public String substituteMatch(String text, Matcher matcher) {
589
590                String uuidString = text.substring(matcher.start(), matcher.end());
591                CmsUUID uuid = new CmsUUID(uuidString);
592                String result = uuidString;
593                if (m_translationsById.containsKey(uuid)) {
594                    result = m_translationsById.get(uuid).getStructureId().toString();
595                }
596                return result;
597            }
598        };
599        return CmsStringUtil.substitute(uuidPattern, originalContent, substitution);
600    }
601
602    /**
603     * Rewrites the links for a single resource.<p>
604     *
605     * @param resource the resource for which the links should be rewritten
606     * @param relations the relations to the source folder which have this resource as its source
607     *
608     * @throws CmsException if something goes wrong
609     */
610    protected void rewriteLinks(CmsResource resource, Collection<CmsRelation> relations) throws CmsException {
611
612        LOG.info("Rewriting relations for resource " + resource.getRootPath());
613        I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
614        boolean hasContentLinks = false;
615        boolean hasOtherLinks = false;
616
617        for (CmsRelation relation : relations) {
618            if (relation.getType().isDefinedInContent()) {
619                hasContentLinks = true;
620            } else {
621                hasOtherLinks = true;
622            }
623        }
624        if (hasContentLinks) {
625            LOG.info("The resource " + resource.getRootPath() + " has links in the content.");
626        }
627        if (hasOtherLinks) {
628            LOG.info("The resource " + resource.getRootPath() + " has non-content links.");
629        }
630
631        if (hasContentLinks) {
632            if (resourceType instanceof I_CmsLinkParseable) {
633                CmsFile file = m_cms.readFile(resource);
634                rewriteContent(file, relations);
635                m_rewrittenContent.add(file.getStructureId());
636            }
637        }
638        if (hasOtherLinks) {
639            rewriteOtherRelations(resource, relations);
640        }
641    }
642
643    /**
644     * Rewrites relations which are not derived from links in the content itself.<p>
645     *
646     * @param res the resource for which to rewrite the relations
647     * @param relations the original relations
648     *
649     * @throws CmsException if something goes wrong
650     */
651    protected void rewriteOtherRelations(CmsResource res, Collection<CmsRelation> relations) throws CmsException {
652
653        LOG.info("Rewriting non-content links for " + res.getRootPath());
654        for (CmsRelation rel : relations) {
655            CmsUUID targetId = rel.getTargetId();
656            CmsResource newTargetResource = m_translationsById.get(targetId);
657            CmsRelationType relType = rel.getType();
658            if (!relType.isDefinedInContent()) {
659                if (newTargetResource != null) {
660                    m_cms.deleteRelationsFromResource(
661                        rel.getSourcePath(),
662                        CmsRelationFilter.TARGETS.filterStructureId(rel.getTargetId()).filterType(relType));
663                    m_cms.addRelationToResource(
664                        rel.getSourcePath(),
665                        newTargetResource.getRootPath(),
666                        relType.getName());
667                }
668            }
669        }
670    }
671}