001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (C) Alkacon Software (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.file; 029 030import org.opencms.file.types.A_CmsResourceTypeLinkParseable; 031import org.opencms.file.types.CmsResourceTypeJsp; 032import org.opencms.file.types.I_CmsResourceType; 033import org.opencms.i18n.CmsEncoder; 034import org.opencms.loader.CmsLoaderException; 035import org.opencms.lock.CmsLock; 036import org.opencms.main.CmsException; 037import org.opencms.main.CmsIllegalArgumentException; 038import org.opencms.main.CmsLog; 039import org.opencms.main.OpenCms; 040import org.opencms.relations.CmsRelation; 041import org.opencms.relations.CmsRelationFilter; 042import org.opencms.relations.CmsRelationType; 043import org.opencms.relations.I_CmsLinkParseable; 044import org.opencms.util.CmsFileUtil; 045import org.opencms.util.CmsPair; 046import org.opencms.util.CmsStringUtil; 047import org.opencms.util.CmsUUID; 048import org.opencms.util.I_CmsRegexSubstitution; 049import org.opencms.xml.CmsXmlEntityResolver; 050import org.opencms.xml.CmsXmlException; 051import org.opencms.xml.CmsXmlUtils; 052import org.opencms.xml.content.Messages; 053 054import java.io.UnsupportedEncodingException; 055import java.util.ArrayList; 056import java.util.Collection; 057import java.util.HashMap; 058import java.util.HashSet; 059import java.util.List; 060import java.util.Map; 061import java.util.Set; 062import java.util.regex.Matcher; 063import java.util.regex.Pattern; 064 065import org.apache.commons.logging.Log; 066 067import org.dom4j.Document; 068 069import com.google.common.collect.ArrayListMultimap; 070import com.google.common.collect.Lists; 071import com.google.common.collect.Multimap; 072 073/** 074 * A class used to rewrite links and relations in one subtree such that relations from that subtree to another given subtree 075 * replaced with relations to the first subtree.<p> 076 */ 077public class CmsLinkRewriter { 078 079 /** The logger instance for this class. */ 080 private static final Log LOG = CmsLog.getLog(CmsLinkRewriter.class); 081 082 /** A map from source folder structure ids to corresponding target folder resources. */ 083 protected Map<CmsUUID, CmsResource> m_translationsById = new HashMap<CmsUUID, CmsResource>(); 084 085 /** A map from source folder root paths to the corresponding target folder resources. */ 086 protected Map<String, CmsResource> m_translationsByPath = new HashMap<String, CmsResource>(); 087 088 /** A map of resources which have been cached by structure id. */ 089 private Map<CmsUUID, CmsResource> m_cachedResources = new HashMap<CmsUUID, CmsResource>(); 090 091 /** The CMS object used for file operations. */ 092 private CmsObject m_cms; 093 094 /** If true, all XML contents will be rewritten instead of just those containing links to correct. */ 095 private boolean m_rewriteAllXmlContents = true; 096 097 /** The set of structure ids of resources whose content has been rewritten. */ 098 private Set<CmsUUID> m_rewrittenContent = new HashSet<CmsUUID>(); 099 100 /** A list of path pairs, each containing a source and a target of a copy operation. */ 101 private List<CmsPair<String, String>> m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>(); 102 103 /** The target folder root path. */ 104 private String m_targetPath; 105 106 /** 107 * Creates a link rewriter for use after a multi-copy operation.<p> 108 * 109 * @param cms the current CMS context 110 * @param sources the list of source root paths 111 * @param target the target parent folder root path 112 */ 113 public CmsLinkRewriter(CmsObject cms, List<String> sources, String target) { 114 115 m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>(); 116 for (String source : sources) { 117 checkNotSubPath(source, target); 118 String targetSub = CmsStringUtil.joinPaths(target, CmsResource.getName(source)); 119 m_sourceTargetPairs.add(CmsPair.create(source, targetSub)); 120 } 121 m_targetPath = target; 122 m_cms = cms; 123 } 124 125 /** 126 * Creates a new link rewriter for a list of sources and corresponding targets.<p> 127 * 128 * @param cms the current CMS context 129 * @param targetPath the target root path 130 * @param sourceTargetPairs the list of source-target pairs 131 */ 132 public CmsLinkRewriter(CmsObject cms, String targetPath, List<CmsPair<String, String>> sourceTargetPairs) { 133 134 m_cms = cms; 135 m_targetPath = targetPath; 136 m_sourceTargetPairs = sourceTargetPairs; 137 } 138 139 /** 140 * Creates a link rewriter for use after a single copy operation.<p> 141 * 142 * @param cms the current CMS context 143 * @param source the source folder root path 144 * @param target the target folder root path 145 */ 146 public CmsLinkRewriter(CmsObject cms, String source, String target) { 147 148 m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>(); 149 checkNotSubPath(source, target); 150 151 m_sourceTargetPairs.add(CmsPair.create(source, target)); 152 m_targetPath = target; 153 m_cms = cms; 154 } 155 156 /** 157 * Checks whether a given resource is a folder and throws an exception otherwise.<p> 158 * 159 * @param resource the resource to check 160 * @throws CmsException if something goes wrong 161 */ 162 protected static void checkIsFolder(CmsResource resource) throws CmsException { 163 164 if (!isFolder(resource)) { 165 throw new CmsIllegalArgumentException(Messages.get().container( 166 org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOT_NOT_FOLDER_1, 167 resource.getRootPath())); 168 } 169 } 170 171 /** 172 * Helper method to check whether a given resource is a folder.<p> 173 * 174 * @param resource the resouce to check 175 * @return true if the resource is a folder 176 * 177 * @throws CmsLoaderException if the resource type couldn't be found 178 */ 179 protected static boolean isFolder(CmsResource resource) throws CmsLoaderException { 180 181 I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 182 return resourceType.isFolder(); 183 } 184 185 /** 186 * Starts the link rewriting process.<p> 187 * 188 * @throws CmsException if something goes wrong 189 */ 190 public void rewriteLinks() throws CmsException { 191 192 init(); 193 List<CmsRelation> relationsToCorrect = findRelationsFromTargetToSource(); 194 // group relations by the structure id of their source 195 Multimap<CmsUUID, CmsRelation> relationsBySourceId = ArrayListMultimap.create(); 196 for (CmsRelation relation : relationsToCorrect) { 197 LOG.info("Found relation which needs to be corrected: " 198 + relation.getSourcePath() 199 + " -> " 200 + relation.getTargetPath() 201 + " [" 202 + relation.getType().getName() 203 + "]"); 204 relationsBySourceId.put(relation.getSourceId(), relation); 205 } 206 207 // make sure we have a lock on the target folder before doing any write operations 208 CmsLock lock = m_cms.getLock(m_targetPath); 209 if (lock.isUnlocked() || !lock.isOwnedBy(m_cms.getRequestContext().getCurrentUser())) { 210 // fail if locked by another user 211 m_cms.lockResource(m_targetPath); 212 } 213 214 for (CmsUUID structureId : relationsBySourceId.keySet()) { 215 216 Collection<CmsRelation> relationsForResource = relationsBySourceId.get(structureId); 217 CmsResource resource = null; 218 try { 219 resource = getResource(structureId); 220 rewriteLinks(resource, relationsForResource); 221 } catch (CmsException e) { 222 LOG.error(e.getLocalizedMessage(), e); 223 } 224 } 225 if (!m_rewriteAllXmlContents) { 226 return; 227 } 228 for (Map.Entry<CmsUUID, CmsResource> entry : m_cachedResources.entrySet()) { 229 CmsUUID key = entry.getKey(); 230 CmsResource resource = entry.getValue(); 231 if (isInTargets(resource.getRootPath()) && !m_rewrittenContent.contains(key)) { 232 I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 233 // rewrite content for other files so 234 if (resType instanceof A_CmsResourceTypeLinkParseable) { 235 try { 236 CmsFile file = m_cms.readFile(resource); 237 m_cms.writeFile(file); 238 } catch (CmsException e) { 239 LOG.error(e.getLocalizedMessage(), e); 240 } 241 } 242 243 } 244 } 245 } 246 247 /** 248 * Sets the 'rewriteAllContents' flag, which controls whether all XML contents will be rewritten 249 * or just those whose links need to be corrected.<p> 250 * 251 * @param rewriteAllContents if true, all contents will be rewritten 252 */ 253 public void setRewriteAllContents(boolean rewriteAllContents) { 254 255 m_rewriteAllXmlContents = rewriteAllContents; 256 } 257 258 /** 259 * Checks that the target path is not a subfolder of the source path.<p> 260 * 261 * @param source the source path 262 * @param target the target path 263 */ 264 protected void checkNotSubPath(String source, String target) { 265 266 source = CmsStringUtil.joinPaths("/", source, "/"); 267 target = CmsStringUtil.joinPaths("/", target, "/"); 268 if (target.startsWith(source)) { 269 throw new CmsIllegalArgumentException(org.opencms.file.Messages.get().container( 270 org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOTS_DEPENDENT_2, 271 source, 272 target)); 273 } 274 } 275 276 /** 277 * Decodes a byte array into a string with a given encoding, or the default encoding if that fails.<p> 278 * 279 * @param bytes the byte array 280 * @param encoding the encoding to use 281 * 282 * @return the decoded string 283 */ 284 protected String decode(byte[] bytes, String encoding) { 285 286 try { 287 return new String(bytes, encoding); 288 } catch (UnsupportedEncodingException e) { 289 return new String(bytes); 290 } 291 } 292 293 /** 294 * Decodes a file's contents and return the content string and the encoding to use for writing the file 295 * back to the VFS.<p> 296 * 297 * @param file the file to decode 298 * @return a pair (content, encoding) 299 * @throws CmsException if something goes wrong 300 */ 301 protected CmsPair<String, String> decode(CmsFile file) throws CmsException { 302 303 String content = null; 304 String encoding = getConfiguredEncoding(m_cms, file); 305 I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(file.getTypeId()); 306 if (resType instanceof CmsResourceTypeJsp) { 307 content = decode(file.getContents(), encoding); 308 } else { 309 try { 310 CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(m_cms); 311 // parse the XML and serialize it back to a string with the configured encoding 312 Document doc = CmsXmlUtils.unmarshalHelper(file.getContents(), resolver); 313 content = CmsXmlUtils.marshal(doc, encoding); 314 } catch (Exception e) { 315 // invalid xml structure, just use the configured encoding 316 content = decode(file.getContents(), encoding); 317 } 318 } 319 return CmsPair.create(content, encoding); 320 } 321 322 /** 323 * Finds relations from the target root folder or its children to the source root folder or its children.<p> 324 * 325 * @return the list of relations from the target to the source 326 * 327 * @throws CmsException if something goes wrong 328 */ 329 protected List<CmsRelation> findRelationsFromTargetToSource() throws CmsException { 330 331 List<CmsRelation> relations = m_cms.readRelations(CmsRelationFilter.SOURCES.filterPath(m_targetPath).filterIncludeChildren()); 332 List<CmsRelation> result = new ArrayList<CmsRelation>(); 333 for (CmsRelation rel : relations) { 334 if (isInTargets(rel.getSourcePath()) && isInSources(rel.getTargetPath())) { 335 result.add(rel); 336 } 337 } 338 return result; 339 } 340 341 /** 342 * Gets the encoding which is configured at the location of a given resource.<p> 343 * 344 * @param cms the current CMS context 345 * @param resource the resource for which the configured encoding should be retrieved 346 * @return the configured encoding for the resource 347 * 348 * @throws CmsException if something goes wrong 349 */ 350 protected String getConfiguredEncoding(CmsObject cms, CmsResource resource) throws CmsException { 351 352 String encoding = null; 353 try { 354 encoding = cms.readPropertyObject( 355 resource.getRootPath(), 356 CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, 357 true).getValue(); 358 } catch (CmsException e) { 359 // encoding will be null 360 } 361 if (encoding == null) { 362 encoding = OpenCms.getSystemInfo().getDefaultEncoding(); 363 } else { 364 encoding = CmsEncoder.lookupEncoding(encoding, null); 365 if (encoding == null) { 366 throw new CmsXmlException(Messages.get().container( 367 Messages.ERR_XMLCONTENT_INVALID_ENC_1, 368 resource.getRootPath())); 369 } 370 } 371 return encoding; 372 } 373 374 /** 375 * Gets a list of resource pairs whose paths relative to the source/target roots passed match.<p> 376 * 377 * @param source the source root 378 * @param target the target root 379 * 380 * @return the list of matching resources 381 * 382 * @throws CmsException if something goes wrong 383 */ 384 protected List<CmsPair<CmsResource, CmsResource>> getMatchingResources(String source, String target) 385 throws CmsException { 386 387 List<CmsResource> sourceResources = readTree(source); 388 Map<String, CmsResource> sourceRelative = getResourcesByRelativePath(sourceResources, source); 389 390 List<CmsResource> targetResources = readTree(target); 391 Map<String, CmsResource> targetRelative = getResourcesByRelativePath(targetResources, target); 392 393 List<CmsPair<CmsResource, CmsResource>> result = new ArrayList<CmsPair<CmsResource, CmsResource>>(); 394 sourceRelative.keySet().retainAll(targetRelative.keySet()); 395 for (Map.Entry<String, CmsResource> entry : sourceRelative.entrySet()) { 396 String key = entry.getKey(); 397 CmsResource sourceRes = entry.getValue(); 398 CmsResource targetRes = targetRelative.get(key); 399 result.add(CmsPair.create(sourceRes, targetRes)); 400 } 401 return result; 402 } 403 404 /** 405 * Computes the relative path given an ancestor folder path.<p> 406 * 407 * @param ancestor the ancestor folder 408 * @param rootPath the path for which the relative path should be computed 409 * 410 * @return the relative path 411 */ 412 protected String getRelativePath(String ancestor, String rootPath) { 413 414 String result = rootPath.substring(ancestor.length()); 415 result = CmsStringUtil.joinPaths("/", result, "/"); 416 return result; 417 } 418 419 /** 420 * Accesses a resource by structure id.<p> 421 * 422 * @param structureId the structure id of the resource 423 * @return the resource with the given structure id 424 * 425 * @throws CmsException if the resource couldn't be read 426 */ 427 protected CmsResource getResource(CmsUUID structureId) throws CmsException { 428 429 if (m_cachedResources.containsKey(structureId)) { 430 return m_cachedResources.get(structureId); 431 } 432 return m_cms.readResource(structureId); 433 } 434 435 /** 436 * Collects a list of resources in a map where the key for each resource is the path relative to a given folder.<p> 437 * 438 * @param resources the resources to put in the map 439 * @param basePath the path relative to which the keys of the resulting map should be computed 440 * 441 * @return a map from relative paths to resources 442 */ 443 protected Map<String, CmsResource> getResourcesByRelativePath(List<CmsResource> resources, String basePath) { 444 445 Map<String, CmsResource> result = new HashMap<String, CmsResource>(); 446 for (CmsResource resource : resources) { 447 String relativeSubPath = CmsStringUtil.getRelativeSubPath(basePath, resource.getRootPath()); 448 if (relativeSubPath != null) { 449 result.put(relativeSubPath, resource); 450 } 451 } 452 return result; 453 } 454 455 /** 456 * Reads the data needed for rewriting the relations from the VFS.<p> 457 * 458 * @throws CmsException if something goes wrong 459 */ 460 protected void init() throws CmsException { 461 462 m_cms = OpenCms.initCmsObject(m_cms); 463 // we want to use autocorrection when writing XML contents back 464 //m_cms.getRequestContext().setAttribute(CmsXmlContent.AUTO_CORRECTION_ATTRIBUTE, Boolean.TRUE); 465 m_cms.getRequestContext().setSiteRoot(""); 466 List<CmsPair<CmsResource, CmsResource>> allMatchingResources = Lists.newArrayList(); 467 for (CmsPair<String, String> pair : m_sourceTargetPairs) { 468 List<CmsPair<CmsResource, CmsResource>> matchingResources = getMatchingResources( 469 pair.getFirst(), 470 pair.getSecond()); 471 allMatchingResources.addAll(matchingResources); 472 } 473 for (CmsPair<CmsResource, CmsResource> resPair : allMatchingResources) { 474 CmsResource source = resPair.getFirst(); 475 CmsResource target = resPair.getSecond(); 476 m_translationsById.put(source.getStructureId(), target); 477 m_translationsByPath.put(source.getRootPath(), target); 478 } 479 } 480 481 /** 482 * Checks if a path belongs to one of the sources.<p> 483 * 484 * @param path a root path 485 * 486 * @return true if the path belongs to the sources 487 */ 488 protected boolean isInSources(String path) { 489 490 for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) { 491 String source = sourceTargetPair.getFirst(); 492 if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(source, "/"))) { 493 return true; 494 } 495 } 496 return false; 497 } 498 499 /** 500 * Checks if a path belongs to one of the targets.<p> 501 * 502 * @param path a root path 503 * 504 * @return true if the path belongs to the targets 505 */ 506 protected boolean isInTargets(String path) { 507 508 for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) { 509 String target = sourceTargetPair.getSecond(); 510 if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(target, "/"))) { 511 return true; 512 } 513 } 514 return false; 515 } 516 517 /** 518 * Reads the resources in a subtree.<p> 519 * 520 * @param rootPath the root of the subtree 521 * 522 * @return the list of resources from the subtree 523 * 524 * @throws CmsException if something goes wrong 525 */ 526 protected List<CmsResource> readTree(String rootPath) throws CmsException { 527 528 rootPath = CmsFileUtil.removeTrailingSeparator(rootPath); 529 CmsResource base = m_cms.readResource(rootPath); 530 531 I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(base); 532 List<CmsResource> result = new ArrayList<CmsResource>(); 533 if (resType.isFolder()) { 534 rootPath = CmsStringUtil.joinPaths(rootPath, "/"); 535 List<CmsResource> subResources = m_cms.readResources(rootPath, CmsResourceFilter.ALL, true); 536 result.add(base); 537 result.addAll(subResources); 538 } else { 539 result.add(base); 540 } 541 for (CmsResource resource : result) { 542 m_cachedResources.put(resource.getStructureId(), resource); 543 } 544 545 return result; 546 } 547 548 /** 549 * Rewrites the links included in the content itself.<p> 550 * 551 * @param file the file for which the links should be replaced 552 * @param relations the original relations 553 * 554 * @throws CmsException if something goes wrong 555 */ 556 protected void rewriteContent(CmsFile file, Collection<CmsRelation> relations) throws CmsException { 557 558 LOG.info("Rewriting in-content links for " + file.getRootPath()); 559 CmsPair<String, String> contentAndEncoding = decode(file); 560 String content = contentAndEncoding.getFirst(); 561 String encodingForSave = contentAndEncoding.getSecond(); 562 String newContent = rewriteContentString(content); 563 byte[] newContentBytes; 564 try { 565 newContentBytes = newContent.getBytes(encodingForSave); 566 } catch (UnsupportedEncodingException e) { 567 newContentBytes = newContent.getBytes(); 568 } 569 file.setContents(newContentBytes); 570 m_cms.writeFile(file); 571 } 572 573 /** 574 * Replaces structure ids of resources in the source subtree with the structure ids of the corresponding 575 * resources in the target subtree inside a content string.<p> 576 * 577 * @param originalContent the original content 578 * 579 * @return the content with the new structure ids 580 */ 581 protected String rewriteContentString(String originalContent) { 582 583 Pattern uuidPattern = Pattern.compile(CmsUUID.UUID_REGEX); 584 I_CmsRegexSubstitution substitution = new I_CmsRegexSubstitution() { 585 586 public String substituteMatch(String text, Matcher matcher) { 587 588 String uuidString = text.substring(matcher.start(), matcher.end()); 589 CmsUUID uuid = new CmsUUID(uuidString); 590 String result = uuidString; 591 if (m_translationsById.containsKey(uuid)) { 592 result = m_translationsById.get(uuid).getStructureId().toString(); 593 } 594 return result; 595 } 596 }; 597 return CmsStringUtil.substitute(uuidPattern, originalContent, substitution); 598 } 599 600 /** 601 * Rewrites the links for a single resource.<p> 602 * 603 * @param resource the resource for which the links should be rewritten 604 * @param relations the relations to the source folder which have this resource as its source 605 * 606 * @throws CmsException if something goes wrong 607 */ 608 protected void rewriteLinks(CmsResource resource, Collection<CmsRelation> relations) throws CmsException { 609 610 LOG.info("Rewriting relations for resource " + resource.getRootPath()); 611 I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 612 boolean hasContentLinks = false; 613 boolean hasOtherLinks = false; 614 615 for (CmsRelation relation : relations) { 616 if (relation.getType().isDefinedInContent()) { 617 hasContentLinks = true; 618 } else { 619 hasOtherLinks = true; 620 } 621 } 622 if (hasContentLinks) { 623 LOG.info("The resource " + resource.getRootPath() + " has links in the content."); 624 } 625 if (hasOtherLinks) { 626 LOG.info("The resource " + resource.getRootPath() + " has non-content links."); 627 } 628 629 if (hasContentLinks) { 630 if (resourceType instanceof I_CmsLinkParseable) { 631 CmsFile file = m_cms.readFile(resource); 632 rewriteContent(file, relations); 633 m_rewrittenContent.add(file.getStructureId()); 634 } 635 } 636 if (hasOtherLinks) { 637 rewriteOtherRelations(resource, relations); 638 } 639 } 640 641 /** 642 * Rewrites relations which are not derived from links in the content itself.<p> 643 * 644 * @param res the resource for which to rewrite the relations 645 * @param relations the original relations 646 * 647 * @throws CmsException if something goes wrong 648 */ 649 protected void rewriteOtherRelations(CmsResource res, Collection<CmsRelation> relations) throws CmsException { 650 651 LOG.info("Rewriting non-content links for " + res.getRootPath()); 652 for (CmsRelation rel : relations) { 653 CmsUUID targetId = rel.getTargetId(); 654 CmsResource newTargetResource = m_translationsById.get(targetId); 655 CmsRelationType relType = rel.getType(); 656 if (!relType.isDefinedInContent()) { 657 if (newTargetResource != null) { 658 m_cms.deleteRelationsFromResource( 659 rel.getSourcePath(), 660 CmsRelationFilter.TARGETS.filterStructureId(rel.getTargetId()).filterType(relType)); 661 m_cms.addRelationToResource(rel.getSourcePath(), newTargetResource.getRootPath(), relType.getName()); 662 } 663 } 664 } 665 } 666}