001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (C) Alkacon Software (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 * 
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.file;
029
030import org.opencms.file.types.A_CmsResourceTypeLinkParseable;
031import org.opencms.file.types.CmsResourceTypeJsp;
032import org.opencms.file.types.I_CmsResourceType;
033import org.opencms.i18n.CmsEncoder;
034import org.opencms.loader.CmsLoaderException;
035import org.opencms.lock.CmsLock;
036import org.opencms.main.CmsException;
037import org.opencms.main.CmsIllegalArgumentException;
038import org.opencms.main.CmsLog;
039import org.opencms.main.OpenCms;
040import org.opencms.relations.CmsRelation;
041import org.opencms.relations.CmsRelationFilter;
042import org.opencms.relations.CmsRelationType;
043import org.opencms.relations.I_CmsLinkParseable;
044import org.opencms.util.CmsFileUtil;
045import org.opencms.util.CmsPair;
046import org.opencms.util.CmsStringUtil;
047import org.opencms.util.CmsUUID;
048import org.opencms.util.I_CmsRegexSubstitution;
049import org.opencms.xml.CmsXmlEntityResolver;
050import org.opencms.xml.CmsXmlException;
051import org.opencms.xml.CmsXmlUtils;
052import org.opencms.xml.content.Messages;
053
054import java.io.UnsupportedEncodingException;
055import java.util.ArrayList;
056import java.util.Collection;
057import java.util.HashMap;
058import java.util.HashSet;
059import java.util.List;
060import java.util.Map;
061import java.util.Set;
062import java.util.regex.Matcher;
063import java.util.regex.Pattern;
064
065import org.apache.commons.logging.Log;
066
067import org.dom4j.Document;
068
069import com.google.common.collect.ArrayListMultimap;
070import com.google.common.collect.Lists;
071import com.google.common.collect.Multimap;
072
073/**
074 * A class used to rewrite links and relations in one subtree such that relations from that subtree to another given subtree
075 * replaced with relations to the first subtree.<p>
076 */
077public class CmsLinkRewriter {
078
079    /** The logger instance for this class. */
080    private static final Log LOG = CmsLog.getLog(CmsLinkRewriter.class);
081
082    /** A map from source folder structure ids to corresponding target folder resources. */
083    protected Map<CmsUUID, CmsResource> m_translationsById = new HashMap<CmsUUID, CmsResource>();
084
085    /** A map from source folder root paths to the corresponding target folder resources. */
086    protected Map<String, CmsResource> m_translationsByPath = new HashMap<String, CmsResource>();
087
088    /** A map of resources which have been cached by structure id. */
089    private Map<CmsUUID, CmsResource> m_cachedResources = new HashMap<CmsUUID, CmsResource>();
090
091    /** The CMS object used for file operations. */
092    private CmsObject m_cms;
093
094    /** If true, all XML contents will be rewritten instead of just those containing links to correct. */
095    private boolean m_rewriteAllXmlContents = true;
096
097    /** The set of structure ids of resources whose content has been rewritten. */
098    private Set<CmsUUID> m_rewrittenContent = new HashSet<CmsUUID>();
099
100    /** A list of path pairs, each containing a source and a target of a copy operation. */
101    private List<CmsPair<String, String>> m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>();
102
103    /** The target folder root path. */
104    private String m_targetPath;
105
106    /**
107     * Creates a link rewriter for use after a multi-copy operation.<p>
108     * 
109     * @param cms the current CMS context 
110     * @param sources the list of source root paths 
111     * @param target the target parent folder root path 
112     */
113    public CmsLinkRewriter(CmsObject cms, List<String> sources, String target) {
114
115        m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>();
116        for (String source : sources) {
117            checkNotSubPath(source, target);
118            String targetSub = CmsStringUtil.joinPaths(target, CmsResource.getName(source));
119            m_sourceTargetPairs.add(CmsPair.create(source, targetSub));
120        }
121        m_targetPath = target;
122        m_cms = cms;
123    }
124
125    /**
126     * Creates a new link rewriter for a list of sources and corresponding targets.<p>
127     * 
128     * @param cms the current CMS context 
129     * @param targetPath the target root path 
130     * @param sourceTargetPairs the list of source-target pairs
131     */
132    public CmsLinkRewriter(CmsObject cms, String targetPath, List<CmsPair<String, String>> sourceTargetPairs) {
133
134        m_cms = cms;
135        m_targetPath = targetPath;
136        m_sourceTargetPairs = sourceTargetPairs;
137    }
138
139    /**
140     * Creates a link rewriter for use after a single copy operation.<p>
141     * 
142     * @param cms the current CMS context 
143     * @param source the source folder root path 
144     * @param target the target folder root path
145     */
146    public CmsLinkRewriter(CmsObject cms, String source, String target) {
147
148        m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>();
149        checkNotSubPath(source, target);
150
151        m_sourceTargetPairs.add(CmsPair.create(source, target));
152        m_targetPath = target;
153        m_cms = cms;
154    }
155
156    /**
157     * Checks whether a given resource is a folder and throws an exception otherwise.<p>
158     * 
159     * @param resource the resource to check 
160     * @throws CmsException if something goes wrong 
161     */
162    protected static void checkIsFolder(CmsResource resource) throws CmsException {
163
164        if (!isFolder(resource)) {
165            throw new CmsIllegalArgumentException(Messages.get().container(
166                org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOT_NOT_FOLDER_1,
167                resource.getRootPath()));
168        }
169    }
170
171    /**
172     * Helper method to check whether a given resource is a folder.<p>
173     * 
174     * @param resource the resouce to check
175     * @return true if the resource is a folder
176     * 
177     * @throws CmsLoaderException if the resource type couldn't be found 
178     */
179    protected static boolean isFolder(CmsResource resource) throws CmsLoaderException {
180
181        I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
182        return resourceType.isFolder();
183    }
184
185    /**
186     * Starts the link rewriting process.<p>
187     * 
188     * @throws CmsException if something goes wrong 
189     */
190    public void rewriteLinks() throws CmsException {
191
192        init();
193        List<CmsRelation> relationsToCorrect = findRelationsFromTargetToSource();
194        // group relations by the structure id of their source 
195        Multimap<CmsUUID, CmsRelation> relationsBySourceId = ArrayListMultimap.create();
196        for (CmsRelation relation : relationsToCorrect) {
197            LOG.info("Found relation which needs to be corrected: "
198                + relation.getSourcePath()
199                + " -> "
200                + relation.getTargetPath()
201                + " ["
202                + relation.getType().getName()
203                + "]");
204            relationsBySourceId.put(relation.getSourceId(), relation);
205        }
206
207        // make sure we have a lock on the target folder before doing any write operations
208        CmsLock lock = m_cms.getLock(m_targetPath);
209        if (lock.isUnlocked() || !lock.isOwnedBy(m_cms.getRequestContext().getCurrentUser())) {
210            // fail if locked by another user 
211            m_cms.lockResource(m_targetPath);
212        }
213
214        for (CmsUUID structureId : relationsBySourceId.keySet()) {
215
216            Collection<CmsRelation> relationsForResource = relationsBySourceId.get(structureId);
217            CmsResource resource = null;
218            try {
219                resource = getResource(structureId);
220                rewriteLinks(resource, relationsForResource);
221            } catch (CmsException e) {
222                LOG.error(e.getLocalizedMessage(), e);
223            }
224        }
225        if (!m_rewriteAllXmlContents) {
226            return;
227        }
228        for (Map.Entry<CmsUUID, CmsResource> entry : m_cachedResources.entrySet()) {
229            CmsUUID key = entry.getKey();
230            CmsResource resource = entry.getValue();
231            if (isInTargets(resource.getRootPath()) && !m_rewrittenContent.contains(key)) {
232                I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
233                // rewrite content for other files so 
234                if (resType instanceof A_CmsResourceTypeLinkParseable) {
235                    try {
236                        CmsFile file = m_cms.readFile(resource);
237                        m_cms.writeFile(file);
238                    } catch (CmsException e) {
239                        LOG.error(e.getLocalizedMessage(), e);
240                    }
241                }
242
243            }
244        }
245    }
246
247    /**
248     * Sets the 'rewriteAllContents' flag, which controls whether all XML contents will be rewritten
249     * or just those whose links need to be corrected.<p>
250     * 
251     * @param rewriteAllContents if true, all contents will be rewritten 
252     */
253    public void setRewriteAllContents(boolean rewriteAllContents) {
254
255        m_rewriteAllXmlContents = rewriteAllContents;
256    }
257
258    /**
259     * Checks that the target path is not a subfolder of the source path.<p>
260     * 
261     * @param source the source path 
262     * @param target the target path 
263     */
264    protected void checkNotSubPath(String source, String target) {
265
266        source = CmsStringUtil.joinPaths("/", source, "/");
267        target = CmsStringUtil.joinPaths("/", target, "/");
268        if (target.startsWith(source)) {
269            throw new CmsIllegalArgumentException(org.opencms.file.Messages.get().container(
270                org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOTS_DEPENDENT_2,
271                source,
272                target));
273        }
274    }
275
276    /**
277     * Decodes a byte array into a string with a given encoding, or the default encoding if that fails.<p>
278     * 
279     * @param bytes the byte array
280     * @param encoding the encoding to use
281     *  
282     * @return the decoded string 
283     */
284    protected String decode(byte[] bytes, String encoding) {
285
286        try {
287            return new String(bytes, encoding);
288        } catch (UnsupportedEncodingException e) {
289            return new String(bytes);
290        }
291    }
292
293    /**
294     * Decodes a file's contents and return the content string and the encoding to use for writing the file 
295     * back to the VFS.<p>
296     * 
297     * @param file the file to decode
298     * @return a pair (content, encoding)
299     * @throws CmsException if something goes wrong 
300     */
301    protected CmsPair<String, String> decode(CmsFile file) throws CmsException {
302
303        String content = null;
304        String encoding = getConfiguredEncoding(m_cms, file);
305        I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(file.getTypeId());
306        if (resType instanceof CmsResourceTypeJsp) {
307            content = decode(file.getContents(), encoding);
308        } else {
309            try {
310                CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(m_cms);
311                // parse the XML and serialize it back to  a string with the configured encoding 
312                Document doc = CmsXmlUtils.unmarshalHelper(file.getContents(), resolver);
313                content = CmsXmlUtils.marshal(doc, encoding);
314            } catch (Exception e) {
315                // invalid xml structure, just use the configured encoding 
316                content = decode(file.getContents(), encoding);
317            }
318        }
319        return CmsPair.create(content, encoding);
320    }
321
322    /**
323     * Finds relations from the target root folder or its children to the source root folder or its children.<p>
324     * 
325     * @return the list of relations from the target to the source 
326     * 
327     * @throws CmsException if something goes wrong 
328     */
329    protected List<CmsRelation> findRelationsFromTargetToSource() throws CmsException {
330
331        List<CmsRelation> relations = m_cms.readRelations(CmsRelationFilter.SOURCES.filterPath(m_targetPath).filterIncludeChildren());
332        List<CmsRelation> result = new ArrayList<CmsRelation>();
333        for (CmsRelation rel : relations) {
334            if (isInTargets(rel.getSourcePath()) && isInSources(rel.getTargetPath())) {
335                result.add(rel);
336            }
337        }
338        return result;
339    }
340
341    /**
342     * Gets the encoding which is configured at the location of a given resource.<p>
343     * 
344     * @param cms the current CMS context 
345     * @param resource the resource for which the configured encoding should be retrieved 
346     * @return the configured encoding for the resource
347     *  
348     * @throws CmsException if something goes wrong 
349     */
350    protected String getConfiguredEncoding(CmsObject cms, CmsResource resource) throws CmsException {
351
352        String encoding = null;
353        try {
354            encoding = cms.readPropertyObject(
355                resource.getRootPath(),
356                CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING,
357                true).getValue();
358        } catch (CmsException e) {
359            // encoding will be null 
360        }
361        if (encoding == null) {
362            encoding = OpenCms.getSystemInfo().getDefaultEncoding();
363        } else {
364            encoding = CmsEncoder.lookupEncoding(encoding, null);
365            if (encoding == null) {
366                throw new CmsXmlException(Messages.get().container(
367                    Messages.ERR_XMLCONTENT_INVALID_ENC_1,
368                    resource.getRootPath()));
369            }
370        }
371        return encoding;
372    }
373
374    /**
375     * Gets a list of resource pairs whose paths relative to the source/target roots passed match.<p>
376     * 
377     * @param source the source root 
378     * @param target the target root 
379     * 
380     * @return the list of matching resources 
381     *  
382     * @throws CmsException if something goes wrong 
383     */
384    protected List<CmsPair<CmsResource, CmsResource>> getMatchingResources(String source, String target)
385    throws CmsException {
386
387        List<CmsResource> sourceResources = readTree(source);
388        Map<String, CmsResource> sourceRelative = getResourcesByRelativePath(sourceResources, source);
389
390        List<CmsResource> targetResources = readTree(target);
391        Map<String, CmsResource> targetRelative = getResourcesByRelativePath(targetResources, target);
392
393        List<CmsPair<CmsResource, CmsResource>> result = new ArrayList<CmsPair<CmsResource, CmsResource>>();
394        sourceRelative.keySet().retainAll(targetRelative.keySet());
395        for (Map.Entry<String, CmsResource> entry : sourceRelative.entrySet()) {
396            String key = entry.getKey();
397            CmsResource sourceRes = entry.getValue();
398            CmsResource targetRes = targetRelative.get(key);
399            result.add(CmsPair.create(sourceRes, targetRes));
400        }
401        return result;
402    }
403
404    /**
405     * Computes the relative path given an ancestor folder path.<p>
406     *  
407     * @param ancestor the ancestor folder 
408     * @param rootPath the path for which the relative path should be computed 
409     * 
410     * @return the relative path 
411     */
412    protected String getRelativePath(String ancestor, String rootPath) {
413
414        String result = rootPath.substring(ancestor.length());
415        result = CmsStringUtil.joinPaths("/", result, "/");
416        return result;
417    }
418
419    /**
420     * Accesses a resource by structure id.<p>
421     * 
422     * @param structureId the structure id of the resource  
423     * @return the resource with the given structure id 
424     * 
425     * @throws CmsException if the resource couldn't be read 
426     */
427    protected CmsResource getResource(CmsUUID structureId) throws CmsException {
428
429        if (m_cachedResources.containsKey(structureId)) {
430            return m_cachedResources.get(structureId);
431        }
432        return m_cms.readResource(structureId);
433    }
434
435    /**
436     * Collects a list of resources in a map where the key for each resource is the path relative to a given folder.<p>
437     * 
438     * @param resources the resources to put in the map 
439     * @param basePath the path relative to which the keys of the resulting map should be computed
440     *  
441     * @return a map from relative paths to resources 
442     */
443    protected Map<String, CmsResource> getResourcesByRelativePath(List<CmsResource> resources, String basePath) {
444
445        Map<String, CmsResource> result = new HashMap<String, CmsResource>();
446        for (CmsResource resource : resources) {
447            String relativeSubPath = CmsStringUtil.getRelativeSubPath(basePath, resource.getRootPath());
448            if (relativeSubPath != null) {
449                result.put(relativeSubPath, resource);
450            }
451        }
452        return result;
453    }
454
455    /**
456     * Reads the data needed for rewriting the relations from the VFS.<p>
457     * 
458     * @throws CmsException if something goes wrong 
459     */
460    protected void init() throws CmsException {
461
462        m_cms = OpenCms.initCmsObject(m_cms);
463        // we want to use autocorrection when writing XML contents back 
464        //m_cms.getRequestContext().setAttribute(CmsXmlContent.AUTO_CORRECTION_ATTRIBUTE, Boolean.TRUE);
465        m_cms.getRequestContext().setSiteRoot("");
466        List<CmsPair<CmsResource, CmsResource>> allMatchingResources = Lists.newArrayList();
467        for (CmsPair<String, String> pair : m_sourceTargetPairs) {
468            List<CmsPair<CmsResource, CmsResource>> matchingResources = getMatchingResources(
469                pair.getFirst(),
470                pair.getSecond());
471            allMatchingResources.addAll(matchingResources);
472        }
473        for (CmsPair<CmsResource, CmsResource> resPair : allMatchingResources) {
474            CmsResource source = resPair.getFirst();
475            CmsResource target = resPair.getSecond();
476            m_translationsById.put(source.getStructureId(), target);
477            m_translationsByPath.put(source.getRootPath(), target);
478        }
479    }
480
481    /**
482     * Checks if a path belongs to one of the sources.<p>
483     * 
484     * @param path a root path 
485     * 
486     * @return true if the path belongs to the sources 
487     */
488    protected boolean isInSources(String path) {
489
490        for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) {
491            String source = sourceTargetPair.getFirst();
492            if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(source, "/"))) {
493                return true;
494            }
495        }
496        return false;
497    }
498
499    /**
500     * Checks if a path belongs to one of the targets.<p>
501     * 
502     * @param path a root path 
503     * 
504     * @return true if the path belongs to the targets  
505     */
506    protected boolean isInTargets(String path) {
507
508        for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) {
509            String target = sourceTargetPair.getSecond();
510            if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(target, "/"))) {
511                return true;
512            }
513        }
514        return false;
515    }
516
517    /**
518     * Reads the resources in a subtree.<p>
519     *  
520     * @param rootPath the root of the subtree
521     *  
522     * @return the list of resources from the subtree
523     *  
524     * @throws CmsException if something goes wrong 
525     */
526    protected List<CmsResource> readTree(String rootPath) throws CmsException {
527
528        rootPath = CmsFileUtil.removeTrailingSeparator(rootPath);
529        CmsResource base = m_cms.readResource(rootPath);
530
531        I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(base);
532        List<CmsResource> result = new ArrayList<CmsResource>();
533        if (resType.isFolder()) {
534            rootPath = CmsStringUtil.joinPaths(rootPath, "/");
535            List<CmsResource> subResources = m_cms.readResources(rootPath, CmsResourceFilter.ALL, true);
536            result.add(base);
537            result.addAll(subResources);
538        } else {
539            result.add(base);
540        }
541        for (CmsResource resource : result) {
542            m_cachedResources.put(resource.getStructureId(), resource);
543        }
544
545        return result;
546    }
547
548    /**
549     * Rewrites the links included in the content itself.<p>
550     * 
551     * @param file the file for which the links should be replaced 
552     * @param relations the original relations
553     * 
554     * @throws CmsException if something goes wrong 
555     */
556    protected void rewriteContent(CmsFile file, Collection<CmsRelation> relations) throws CmsException {
557
558        LOG.info("Rewriting in-content links for " + file.getRootPath());
559        CmsPair<String, String> contentAndEncoding = decode(file);
560        String content = contentAndEncoding.getFirst();
561        String encodingForSave = contentAndEncoding.getSecond();
562        String newContent = rewriteContentString(content);
563        byte[] newContentBytes;
564        try {
565            newContentBytes = newContent.getBytes(encodingForSave);
566        } catch (UnsupportedEncodingException e) {
567            newContentBytes = newContent.getBytes();
568        }
569        file.setContents(newContentBytes);
570        m_cms.writeFile(file);
571    }
572
573    /**
574     * Replaces structure ids of resources in the source subtree with the structure ids of the corresponding 
575     * resources in the target subtree inside a content string.<p>
576     *  
577     * @param originalContent the original content 
578     * 
579     * @return the content with the new structure ids 
580     */
581    protected String rewriteContentString(String originalContent) {
582
583        Pattern uuidPattern = Pattern.compile(CmsUUID.UUID_REGEX);
584        I_CmsRegexSubstitution substitution = new I_CmsRegexSubstitution() {
585
586            public String substituteMatch(String text, Matcher matcher) {
587
588                String uuidString = text.substring(matcher.start(), matcher.end());
589                CmsUUID uuid = new CmsUUID(uuidString);
590                String result = uuidString;
591                if (m_translationsById.containsKey(uuid)) {
592                    result = m_translationsById.get(uuid).getStructureId().toString();
593                }
594                return result;
595            }
596        };
597        return CmsStringUtil.substitute(uuidPattern, originalContent, substitution);
598    }
599
600    /** 
601     * Rewrites the links for a single resource.<p>
602     * 
603     * @param resource the resource for which the links should be rewritten 
604     * @param relations the relations to the source folder which have this resource as its source 
605     * 
606     * @throws CmsException if something goes wrong 
607     */
608    protected void rewriteLinks(CmsResource resource, Collection<CmsRelation> relations) throws CmsException {
609
610        LOG.info("Rewriting relations for resource " + resource.getRootPath());
611        I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId());
612        boolean hasContentLinks = false;
613        boolean hasOtherLinks = false;
614
615        for (CmsRelation relation : relations) {
616            if (relation.getType().isDefinedInContent()) {
617                hasContentLinks = true;
618            } else {
619                hasOtherLinks = true;
620            }
621        }
622        if (hasContentLinks) {
623            LOG.info("The resource " + resource.getRootPath() + " has links in the content.");
624        }
625        if (hasOtherLinks) {
626            LOG.info("The resource " + resource.getRootPath() + " has non-content links.");
627        }
628
629        if (hasContentLinks) {
630            if (resourceType instanceof I_CmsLinkParseable) {
631                CmsFile file = m_cms.readFile(resource);
632                rewriteContent(file, relations);
633                m_rewrittenContent.add(file.getStructureId());
634            }
635        }
636        if (hasOtherLinks) {
637            rewriteOtherRelations(resource, relations);
638        }
639    }
640
641    /**
642     * Rewrites relations which are not derived from links in the content itself.<p>
643     * 
644     * @param res the resource for which to rewrite the relations 
645     * @param relations the original relations 
646     * 
647     * @throws CmsException if something goes wrong 
648     */
649    protected void rewriteOtherRelations(CmsResource res, Collection<CmsRelation> relations) throws CmsException {
650
651        LOG.info("Rewriting non-content links for " + res.getRootPath());
652        for (CmsRelation rel : relations) {
653            CmsUUID targetId = rel.getTargetId();
654            CmsResource newTargetResource = m_translationsById.get(targetId);
655            CmsRelationType relType = rel.getType();
656            if (!relType.isDefinedInContent()) {
657                if (newTargetResource != null) {
658                    m_cms.deleteRelationsFromResource(
659                        rel.getSourcePath(),
660                        CmsRelationFilter.TARGETS.filterStructureId(rel.getTargetId()).filterType(relType));
661                    m_cms.addRelationToResource(rel.getSourcePath(), newTargetResource.getRootPath(), relType.getName());
662                }
663            }
664        }
665    }
666}