001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (C) Alkacon Software (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.file; 029 030import org.opencms.file.types.A_CmsResourceTypeLinkParseable; 031import org.opencms.file.types.CmsResourceTypeJsp; 032import org.opencms.file.types.I_CmsResourceType; 033import org.opencms.i18n.CmsEncoder; 034import org.opencms.loader.CmsLoaderException; 035import org.opencms.lock.CmsLock; 036import org.opencms.main.CmsException; 037import org.opencms.main.CmsIllegalArgumentException; 038import org.opencms.main.CmsLog; 039import org.opencms.main.OpenCms; 040import org.opencms.relations.CmsRelation; 041import org.opencms.relations.CmsRelationFilter; 042import org.opencms.relations.CmsRelationType; 043import org.opencms.relations.I_CmsLinkParseable; 044import org.opencms.util.CmsFileUtil; 045import org.opencms.util.CmsPair; 046import org.opencms.util.CmsStringUtil; 047import org.opencms.util.CmsUUID; 048import org.opencms.util.I_CmsRegexSubstitution; 049import org.opencms.xml.CmsXmlEntityResolver; 050import org.opencms.xml.CmsXmlException; 051import org.opencms.xml.CmsXmlUtils; 052import org.opencms.xml.content.Messages; 053 054import java.io.UnsupportedEncodingException; 055import java.util.ArrayList; 056import java.util.Collection; 057import java.util.HashMap; 058import java.util.HashSet; 059import java.util.List; 060import java.util.Map; 061import java.util.Set; 062import java.util.regex.Matcher; 063import java.util.regex.Pattern; 064 065import org.apache.commons.logging.Log; 066 067import org.dom4j.Document; 068 069import com.google.common.collect.ArrayListMultimap; 070import com.google.common.collect.Lists; 071import com.google.common.collect.Multimap; 072 073/** 074 * A class used to rewrite links and relations in one subtree such that relations from that subtree to another given subtree 075 * replaced with relations to the first subtree.<p> 076 */ 077public class CmsLinkRewriter { 078 079 /** The logger instance for this class. */ 080 private static final Log LOG = CmsLog.getLog(CmsLinkRewriter.class); 081 082 /** A map from source folder structure ids to corresponding target folder resources. */ 083 protected Map<CmsUUID, CmsResource> m_translationsById = new HashMap<CmsUUID, CmsResource>(); 084 085 /** A map from source folder root paths to the corresponding target folder resources. */ 086 protected Map<String, CmsResource> m_translationsByPath = new HashMap<String, CmsResource>(); 087 088 /** A map of resources which have been cached by structure id. */ 089 private Map<CmsUUID, CmsResource> m_cachedResources = new HashMap<CmsUUID, CmsResource>(); 090 091 /** The CMS object used for file operations. */ 092 private CmsObject m_cms; 093 094 /** If true, all XML contents will be rewritten instead of just those containing links to correct. */ 095 private boolean m_rewriteAllXmlContents = true; 096 097 /** The set of structure ids of resources whose content has been rewritten. */ 098 private Set<CmsUUID> m_rewrittenContent = new HashSet<CmsUUID>(); 099 100 /** A list of path pairs, each containing a source and a target of a copy operation. */ 101 private List<CmsPair<String, String>> m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>(); 102 103 /** The target folder root path. */ 104 private String m_targetPath; 105 106 /** 107 * Creates a link rewriter for use after a multi-copy operation.<p> 108 * 109 * @param cms the current CMS context 110 * @param sources the list of source root paths 111 * @param target the target parent folder root path 112 */ 113 public CmsLinkRewriter(CmsObject cms, List<String> sources, String target) { 114 115 m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>(); 116 for (String source : sources) { 117 checkNotSubPath(source, target); 118 String targetSub = CmsStringUtil.joinPaths(target, CmsResource.getName(source)); 119 m_sourceTargetPairs.add(CmsPair.create(source, targetSub)); 120 } 121 m_targetPath = target; 122 m_cms = cms; 123 } 124 125 /** 126 * Creates a new link rewriter for a list of sources and corresponding targets.<p> 127 * 128 * @param cms the current CMS context 129 * @param targetPath the target root path 130 * @param sourceTargetPairs the list of source-target pairs 131 */ 132 public CmsLinkRewriter(CmsObject cms, String targetPath, List<CmsPair<String, String>> sourceTargetPairs) { 133 134 m_cms = cms; 135 m_targetPath = targetPath; 136 m_sourceTargetPairs = sourceTargetPairs; 137 } 138 139 /** 140 * Creates a link rewriter for use after a single copy operation.<p> 141 * 142 * @param cms the current CMS context 143 * @param source the source folder root path 144 * @param target the target folder root path 145 */ 146 public CmsLinkRewriter(CmsObject cms, String source, String target) { 147 148 m_sourceTargetPairs = new ArrayList<CmsPair<String, String>>(); 149 checkNotSubPath(source, target); 150 151 m_sourceTargetPairs.add(CmsPair.create(source, target)); 152 m_targetPath = target; 153 m_cms = cms; 154 } 155 156 /** 157 * Checks whether a given resource is a folder and throws an exception otherwise.<p> 158 * 159 * @param resource the resource to check 160 * @throws CmsException if something goes wrong 161 */ 162 protected static void checkIsFolder(CmsResource resource) throws CmsException { 163 164 if (!isFolder(resource)) { 165 throw new CmsIllegalArgumentException(Messages.get().container( 166 org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOT_NOT_FOLDER_1, 167 resource.getRootPath())); 168 } 169 } 170 171 /** 172 * Helper method to check whether a given resource is a folder.<p> 173 * 174 * @param resource the resouce to check 175 * @return true if the resource is a folder 176 * 177 * @throws CmsLoaderException if the resource type couldn't be found 178 */ 179 protected static boolean isFolder(CmsResource resource) throws CmsLoaderException { 180 181 I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 182 return resourceType.isFolder(); 183 } 184 185 /** 186 * Starts the link rewriting process.<p> 187 * 188 * @throws CmsException if something goes wrong 189 */ 190 public void rewriteLinks() throws CmsException { 191 192 init(); 193 List<CmsRelation> relationsToCorrect = findRelationsFromTargetToSource(); 194 // group relations by the structure id of their source 195 Multimap<CmsUUID, CmsRelation> relationsBySourceId = ArrayListMultimap.create(); 196 for (CmsRelation relation : relationsToCorrect) { 197 LOG.info( 198 "Found relation which needs to be corrected: " 199 + relation.getSourcePath() 200 + " -> " 201 + relation.getTargetPath() 202 + " [" 203 + relation.getType().getName() 204 + "]"); 205 relationsBySourceId.put(relation.getSourceId(), relation); 206 } 207 208 // make sure we have a lock on the target folder before doing any write operations 209 CmsLock lock = m_cms.getLock(m_targetPath); 210 if (lock.isUnlocked() || !lock.isOwnedBy(m_cms.getRequestContext().getCurrentUser())) { 211 // fail if locked by another user 212 m_cms.lockResource(m_targetPath); 213 } 214 215 for (CmsUUID structureId : relationsBySourceId.keySet()) { 216 217 Collection<CmsRelation> relationsForResource = relationsBySourceId.get(structureId); 218 CmsResource resource = null; 219 try { 220 resource = getResource(structureId); 221 rewriteLinks(resource, relationsForResource); 222 } catch (CmsException e) { 223 LOG.error(e.getLocalizedMessage(), e); 224 } 225 } 226 if (!m_rewriteAllXmlContents) { 227 return; 228 } 229 for (Map.Entry<CmsUUID, CmsResource> entry : m_cachedResources.entrySet()) { 230 CmsUUID key = entry.getKey(); 231 CmsResource resource = entry.getValue(); 232 if (isInTargets(resource.getRootPath()) && !m_rewrittenContent.contains(key)) { 233 I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 234 // rewrite content for other files so 235 if (resType instanceof A_CmsResourceTypeLinkParseable) { 236 try { 237 CmsFile file = m_cms.readFile(resource); 238 m_cms.writeFile(file); 239 } catch (CmsException e) { 240 LOG.error(e.getLocalizedMessage(), e); 241 } 242 } 243 244 } 245 } 246 } 247 248 /** 249 * Sets the 'rewriteAllContents' flag, which controls whether all XML contents will be rewritten 250 * or just those whose links need to be corrected.<p> 251 * 252 * @param rewriteAllContents if true, all contents will be rewritten 253 */ 254 public void setRewriteAllContents(boolean rewriteAllContents) { 255 256 m_rewriteAllXmlContents = rewriteAllContents; 257 } 258 259 /** 260 * Checks that the target path is not a subfolder of the source path.<p> 261 * 262 * @param source the source path 263 * @param target the target path 264 */ 265 protected void checkNotSubPath(String source, String target) { 266 267 source = CmsStringUtil.joinPaths("/", source, "/"); 268 target = CmsStringUtil.joinPaths("/", target, "/"); 269 if (target.startsWith(source)) { 270 throw new CmsIllegalArgumentException( 271 org.opencms.file.Messages.get().container( 272 org.opencms.file.Messages.ERR_REWRITE_LINKS_ROOTS_DEPENDENT_2, 273 source, 274 target)); 275 } 276 } 277 278 /** 279 * Decodes a byte array into a string with a given encoding, or the default encoding if that fails.<p> 280 * 281 * @param bytes the byte array 282 * @param encoding the encoding to use 283 * 284 * @return the decoded string 285 */ 286 protected String decode(byte[] bytes, String encoding) { 287 288 try { 289 return new String(bytes, encoding); 290 } catch (UnsupportedEncodingException e) { 291 return new String(bytes); 292 } 293 } 294 295 /** 296 * Decodes a file's contents and return the content string and the encoding to use for writing the file 297 * back to the VFS.<p> 298 * 299 * @param file the file to decode 300 * @return a pair (content, encoding) 301 * @throws CmsException if something goes wrong 302 */ 303 protected CmsPair<String, String> decode(CmsFile file) throws CmsException { 304 305 String content = null; 306 String encoding = getConfiguredEncoding(m_cms, file); 307 I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(file.getTypeId()); 308 if (resType instanceof CmsResourceTypeJsp) { 309 content = decode(file.getContents(), encoding); 310 } else { 311 try { 312 CmsXmlEntityResolver resolver = new CmsXmlEntityResolver(m_cms); 313 // parse the XML and serialize it back to a string with the configured encoding 314 Document doc = CmsXmlUtils.unmarshalHelper(file.getContents(), resolver); 315 content = CmsXmlUtils.marshal(doc, encoding); 316 } catch (Exception e) { 317 // invalid xml structure, just use the configured encoding 318 content = decode(file.getContents(), encoding); 319 } 320 } 321 return CmsPair.create(content, encoding); 322 } 323 324 /** 325 * Finds relations from the target root folder or its children to the source root folder or its children.<p> 326 * 327 * @return the list of relations from the target to the source 328 * 329 * @throws CmsException if something goes wrong 330 */ 331 protected List<CmsRelation> findRelationsFromTargetToSource() throws CmsException { 332 333 List<CmsRelation> relations = m_cms.readRelations( 334 CmsRelationFilter.SOURCES.filterPath(m_targetPath).filterIncludeChildren()); 335 List<CmsRelation> result = new ArrayList<CmsRelation>(); 336 for (CmsRelation rel : relations) { 337 if (isInTargets(rel.getSourcePath()) && isInSources(rel.getTargetPath())) { 338 result.add(rel); 339 } 340 } 341 return result; 342 } 343 344 /** 345 * Gets the encoding which is configured at the location of a given resource.<p> 346 * 347 * @param cms the current CMS context 348 * @param resource the resource for which the configured encoding should be retrieved 349 * @return the configured encoding for the resource 350 * 351 * @throws CmsException if something goes wrong 352 */ 353 protected String getConfiguredEncoding(CmsObject cms, CmsResource resource) throws CmsException { 354 355 String encoding = null; 356 try { 357 encoding = cms.readPropertyObject( 358 resource.getRootPath(), 359 CmsPropertyDefinition.PROPERTY_CONTENT_ENCODING, 360 true).getValue(); 361 } catch (CmsException e) { 362 // encoding will be null 363 } 364 if (encoding == null) { 365 encoding = OpenCms.getSystemInfo().getDefaultEncoding(); 366 } else { 367 encoding = CmsEncoder.lookupEncoding(encoding, null); 368 if (encoding == null) { 369 throw new CmsXmlException( 370 Messages.get().container(Messages.ERR_XMLCONTENT_INVALID_ENC_1, resource.getRootPath())); 371 } 372 } 373 return encoding; 374 } 375 376 /** 377 * Gets a list of resource pairs whose paths relative to the source/target roots passed match.<p> 378 * 379 * @param source the source root 380 * @param target the target root 381 * 382 * @return the list of matching resources 383 * 384 * @throws CmsException if something goes wrong 385 */ 386 protected List<CmsPair<CmsResource, CmsResource>> getMatchingResources(String source, String target) 387 throws CmsException { 388 389 List<CmsResource> sourceResources = readTree(source); 390 Map<String, CmsResource> sourceRelative = getResourcesByRelativePath(sourceResources, source); 391 392 List<CmsResource> targetResources = readTree(target); 393 Map<String, CmsResource> targetRelative = getResourcesByRelativePath(targetResources, target); 394 395 List<CmsPair<CmsResource, CmsResource>> result = new ArrayList<CmsPair<CmsResource, CmsResource>>(); 396 sourceRelative.keySet().retainAll(targetRelative.keySet()); 397 for (Map.Entry<String, CmsResource> entry : sourceRelative.entrySet()) { 398 String key = entry.getKey(); 399 CmsResource sourceRes = entry.getValue(); 400 CmsResource targetRes = targetRelative.get(key); 401 result.add(CmsPair.create(sourceRes, targetRes)); 402 } 403 return result; 404 } 405 406 /** 407 * Computes the relative path given an ancestor folder path.<p> 408 * 409 * @param ancestor the ancestor folder 410 * @param rootPath the path for which the relative path should be computed 411 * 412 * @return the relative path 413 */ 414 protected String getRelativePath(String ancestor, String rootPath) { 415 416 String result = rootPath.substring(ancestor.length()); 417 result = CmsStringUtil.joinPaths("/", result, "/"); 418 return result; 419 } 420 421 /** 422 * Accesses a resource by structure id.<p> 423 * 424 * @param structureId the structure id of the resource 425 * @return the resource with the given structure id 426 * 427 * @throws CmsException if the resource couldn't be read 428 */ 429 protected CmsResource getResource(CmsUUID structureId) throws CmsException { 430 431 if (m_cachedResources.containsKey(structureId)) { 432 return m_cachedResources.get(structureId); 433 } 434 return m_cms.readResource(structureId); 435 } 436 437 /** 438 * Collects a list of resources in a map where the key for each resource is the path relative to a given folder.<p> 439 * 440 * @param resources the resources to put in the map 441 * @param basePath the path relative to which the keys of the resulting map should be computed 442 * 443 * @return a map from relative paths to resources 444 */ 445 protected Map<String, CmsResource> getResourcesByRelativePath(List<CmsResource> resources, String basePath) { 446 447 Map<String, CmsResource> result = new HashMap<String, CmsResource>(); 448 for (CmsResource resource : resources) { 449 String relativeSubPath = CmsStringUtil.getRelativeSubPath(basePath, resource.getRootPath()); 450 if (relativeSubPath != null) { 451 result.put(relativeSubPath, resource); 452 } 453 } 454 return result; 455 } 456 457 /** 458 * Reads the data needed for rewriting the relations from the VFS.<p> 459 * 460 * @throws CmsException if something goes wrong 461 */ 462 protected void init() throws CmsException { 463 464 m_cms = OpenCms.initCmsObject(m_cms); 465 // we want to use autocorrection when writing XML contents back 466 //m_cms.getRequestContext().setAttribute(CmsXmlContent.AUTO_CORRECTION_ATTRIBUTE, Boolean.TRUE); 467 m_cms.getRequestContext().setSiteRoot(""); 468 List<CmsPair<CmsResource, CmsResource>> allMatchingResources = Lists.newArrayList(); 469 for (CmsPair<String, String> pair : m_sourceTargetPairs) { 470 List<CmsPair<CmsResource, CmsResource>> matchingResources = getMatchingResources( 471 pair.getFirst(), 472 pair.getSecond()); 473 allMatchingResources.addAll(matchingResources); 474 } 475 for (CmsPair<CmsResource, CmsResource> resPair : allMatchingResources) { 476 CmsResource source = resPair.getFirst(); 477 CmsResource target = resPair.getSecond(); 478 m_translationsById.put(source.getStructureId(), target); 479 m_translationsByPath.put(source.getRootPath(), target); 480 } 481 } 482 483 /** 484 * Checks if a path belongs to one of the sources.<p> 485 * 486 * @param path a root path 487 * 488 * @return true if the path belongs to the sources 489 */ 490 protected boolean isInSources(String path) { 491 492 for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) { 493 String source = sourceTargetPair.getFirst(); 494 if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(source, "/"))) { 495 return true; 496 } 497 } 498 return false; 499 } 500 501 /** 502 * Checks if a path belongs to one of the targets.<p> 503 * 504 * @param path a root path 505 * 506 * @return true if the path belongs to the targets 507 */ 508 protected boolean isInTargets(String path) { 509 510 for (CmsPair<String, String> sourceTargetPair : m_sourceTargetPairs) { 511 String target = sourceTargetPair.getSecond(); 512 if (CmsStringUtil.joinPaths(path, "/").startsWith(CmsStringUtil.joinPaths(target, "/"))) { 513 return true; 514 } 515 } 516 return false; 517 } 518 519 /** 520 * Reads the resources in a subtree.<p> 521 * 522 * @param rootPath the root of the subtree 523 * 524 * @return the list of resources from the subtree 525 * 526 * @throws CmsException if something goes wrong 527 */ 528 protected List<CmsResource> readTree(String rootPath) throws CmsException { 529 530 rootPath = CmsFileUtil.removeTrailingSeparator(rootPath); 531 CmsResource base = m_cms.readResource(rootPath); 532 533 I_CmsResourceType resType = OpenCms.getResourceManager().getResourceType(base); 534 List<CmsResource> result = new ArrayList<CmsResource>(); 535 if (resType.isFolder()) { 536 rootPath = CmsStringUtil.joinPaths(rootPath, "/"); 537 List<CmsResource> subResources = m_cms.readResources(rootPath, CmsResourceFilter.ALL, true); 538 result.add(base); 539 result.addAll(subResources); 540 } else { 541 result.add(base); 542 } 543 for (CmsResource resource : result) { 544 m_cachedResources.put(resource.getStructureId(), resource); 545 } 546 547 return result; 548 } 549 550 /** 551 * Rewrites the links included in the content itself.<p> 552 * 553 * @param file the file for which the links should be replaced 554 * @param relations the original relations 555 * 556 * @throws CmsException if something goes wrong 557 */ 558 protected void rewriteContent(CmsFile file, Collection<CmsRelation> relations) throws CmsException { 559 560 LOG.info("Rewriting in-content links for " + file.getRootPath()); 561 CmsPair<String, String> contentAndEncoding = decode(file); 562 String content = contentAndEncoding.getFirst(); 563 String encodingForSave = contentAndEncoding.getSecond(); 564 String newContent = rewriteContentString(content); 565 byte[] newContentBytes; 566 try { 567 newContentBytes = newContent.getBytes(encodingForSave); 568 } catch (UnsupportedEncodingException e) { 569 newContentBytes = newContent.getBytes(); 570 } 571 file.setContents(newContentBytes); 572 m_cms.writeFile(file); 573 } 574 575 /** 576 * Replaces structure ids of resources in the source subtree with the structure ids of the corresponding 577 * resources in the target subtree inside a content string.<p> 578 * 579 * @param originalContent the original content 580 * 581 * @return the content with the new structure ids 582 */ 583 protected String rewriteContentString(String originalContent) { 584 585 Pattern uuidPattern = Pattern.compile(CmsUUID.UUID_REGEX); 586 I_CmsRegexSubstitution substitution = new I_CmsRegexSubstitution() { 587 588 public String substituteMatch(String text, Matcher matcher) { 589 590 String uuidString = text.substring(matcher.start(), matcher.end()); 591 CmsUUID uuid = new CmsUUID(uuidString); 592 String result = uuidString; 593 if (m_translationsById.containsKey(uuid)) { 594 result = m_translationsById.get(uuid).getStructureId().toString(); 595 } 596 return result; 597 } 598 }; 599 return CmsStringUtil.substitute(uuidPattern, originalContent, substitution); 600 } 601 602 /** 603 * Rewrites the links for a single resource.<p> 604 * 605 * @param resource the resource for which the links should be rewritten 606 * @param relations the relations to the source folder which have this resource as its source 607 * 608 * @throws CmsException if something goes wrong 609 */ 610 protected void rewriteLinks(CmsResource resource, Collection<CmsRelation> relations) throws CmsException { 611 612 LOG.info("Rewriting relations for resource " + resource.getRootPath()); 613 I_CmsResourceType resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()); 614 boolean hasContentLinks = false; 615 boolean hasOtherLinks = false; 616 617 for (CmsRelation relation : relations) { 618 if (relation.getType().isDefinedInContent()) { 619 hasContentLinks = true; 620 } else { 621 hasOtherLinks = true; 622 } 623 } 624 if (hasContentLinks) { 625 LOG.info("The resource " + resource.getRootPath() + " has links in the content."); 626 } 627 if (hasOtherLinks) { 628 LOG.info("The resource " + resource.getRootPath() + " has non-content links."); 629 } 630 631 if (hasContentLinks) { 632 if (resourceType instanceof I_CmsLinkParseable) { 633 CmsFile file = m_cms.readFile(resource); 634 rewriteContent(file, relations); 635 m_rewrittenContent.add(file.getStructureId()); 636 } 637 } 638 if (hasOtherLinks) { 639 rewriteOtherRelations(resource, relations); 640 } 641 } 642 643 /** 644 * Rewrites relations which are not derived from links in the content itself.<p> 645 * 646 * @param res the resource for which to rewrite the relations 647 * @param relations the original relations 648 * 649 * @throws CmsException if something goes wrong 650 */ 651 protected void rewriteOtherRelations(CmsResource res, Collection<CmsRelation> relations) throws CmsException { 652 653 LOG.info("Rewriting non-content links for " + res.getRootPath()); 654 for (CmsRelation rel : relations) { 655 CmsUUID targetId = rel.getTargetId(); 656 CmsResource newTargetResource = m_translationsById.get(targetId); 657 CmsRelationType relType = rel.getType(); 658 if (!relType.isDefinedInContent()) { 659 if (newTargetResource != null) { 660 m_cms.deleteRelationsFromResource( 661 rel.getSourcePath(), 662 CmsRelationFilter.TARGETS.filterStructureId(rel.getTargetId()).filterType(relType)); 663 m_cms.addRelationToResource( 664 rel.getSourcePath(), 665 newTargetResource.getRootPath(), 666 relType.getName()); 667 } 668 } 669 } 670 } 671}