001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.file.CmsResource; 031import org.opencms.main.CmsLog; 032import org.opencms.util.CmsStringUtil; 033 034import java.io.ByteArrayInputStream; 035import java.io.ByteArrayOutputStream; 036import java.io.IOException; 037import java.io.InputStream; 038import java.io.OutputStream; 039import java.io.StringReader; 040import java.io.StringWriter; 041import java.io.UnsupportedEncodingException; 042import java.util.Arrays; 043import java.util.List; 044import java.util.stream.Collectors; 045 046import javax.xml.parsers.SAXParserFactory; 047 048import org.apache.commons.logging.Log; 049import org.apache.xerces.parsers.SAXParser; 050 051import org.dom4j.Document; 052import org.dom4j.DocumentException; 053import org.dom4j.Node; 054import org.dom4j.io.OutputFormat; 055import org.dom4j.io.SAXReader; 056import org.dom4j.io.XMLWriter; 057import org.xml.sax.EntityResolver; 058import org.xml.sax.InputSource; 059import org.xml.sax.SAXException; 060import org.xml.sax.SAXNotRecognizedException; 061import org.xml.sax.SAXNotSupportedException; 062import org.xml.sax.XMLReader; 063import org.xml.sax.helpers.XMLReaderFactory; 064 065/** 066 * Provides some basic XML handling utilities.<p> 067 * 068 * @since 6.0.0 069 */ 070public final class CmsXmlUtils { 071 072 /** 073 * This class is only used to expose the XML parser configuration implementation name.<p> 074 */ 075 private static class ParserImpl extends SAXParser { 076 077 /** 078 * Constructor.<p> 079 */ 080 ParserImpl() { 081 082 super(); 083 } 084 085 /** 086 * Returns the implementation name of the used XML parser configuration.<p> 087 * 088 * @return the implementation name 089 */ 090 String getConfigImplName() { 091 092 if (fConfiguration != null) { 093 return fConfiguration.getClass().getName(); 094 } else { 095 return null; 096 } 097 } 098 } 099 100 /** The log object for this class. */ 101 private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); 102 103 /** Key of the SAX parser configuration system property. */ 104 private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration"; 105 106 /** Key of the SAX parser factory system property. */ 107 private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory"; 108 109 /** Key of the XML reader system property. */ 110 private static final String XML_READER_KEY = "org.xml.sax.driver"; 111 112 /** 113 * Prevents instances of this class from being generated.<p> 114 */ 115 private CmsXmlUtils() { 116 117 // noop 118 } 119 120 /** 121 * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p> 122 * 123 * Use this method if it's uncertain if the given arguments are starting or ending with 124 * a slash "/".<p> 125 * 126 * Examples:<br> 127 * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br> 128 * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br> 129 * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p> 130 * 131 * @param prefix the prefix Xpath 132 * @param suffix the suffix Xpath 133 * 134 * @return the concatenated Xpath build from prefix and suffix 135 */ 136 public static String concatXpath(String prefix, String suffix) { 137 138 if (suffix == null) { 139 // ensure suffix is not null 140 suffix = ""; 141 } else { 142 if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { 143 // remove leading '/' form suffix 144 suffix = suffix.substring(1); 145 } 146 } 147 if (prefix != null) { 148 StringBuffer result = new StringBuffer(32); 149 result.append(prefix); 150 if (!CmsResource.isFolder(prefix) && (suffix.length() > 0)) { 151 result.append('/'); 152 } 153 result.append(suffix); 154 return result.toString(); 155 } 156 return suffix; 157 } 158 159 /** 160 * Translates a simple lookup path to the simplified Xpath format used for 161 * the internal bookmarks.<p> 162 * 163 * Examples:<br> 164 * <code>title</code> becomes <code>title[1]</code><br> 165 * <code>title[1]</code> is left untouched<br> 166 * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br> 167 * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p> 168 * 169 * Note: If the name already has the format <code>title[1]</code> then provided index parameter 170 * is ignored.<p> 171 * 172 * @param path the path to get the simplified Xpath for 173 * @param index the index to append (if required) 174 * 175 * @return the simplified Xpath for the given name 176 */ 177 public static String createXpath(String path, int index) { 178 179 if (path.indexOf('/') > -1) { 180 // this is a complex path over more then 1 node 181 StringBuffer result = new StringBuffer(path.length() + 32); 182 183 // split the path into sub elements 184 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 185 int end = elements.size() - 1; 186 for (int i = 0; i <= end; i++) { 187 // append [i] to path element if required 188 result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); 189 if (i < end) { 190 // append path delimiter if not final path element 191 result.append('/'); 192 } 193 } 194 return result.toString(); 195 } 196 197 // this path has only 1 node, append [index] if required 198 return createXpathElementCheck(path, index); 199 } 200 201 /** 202 * Appends the provided index parameter in square brackets to the given name, 203 * like <code>path[index]</code>.<p> 204 * 205 * This method is used if it's clear that some path does not have 206 * a square bracket already appended.<p> 207 * 208 * @param path the path append the index to 209 * @param index the index to append 210 * 211 * @return the simplified Xpath for the given name 212 */ 213 public static String createXpathElement(String path, int index) { 214 215 StringBuffer result = new StringBuffer(path.length() + 5); 216 result.append(path); 217 result.append('['); 218 result.append(index); 219 result.append(']'); 220 return result.toString(); 221 } 222 223 /** 224 * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p> 225 * 226 * This method is used if it's uncertain if some path does have 227 * a square bracket already appended or not.<p> 228 * 229 * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 230 * is ignored.<p> 231 * 232 * @param path the path to get the simplified Xpath for 233 * @param index the index to append (if required) 234 * 235 * @return the simplified Xpath for the given name 236 */ 237 public static String createXpathElementCheck(String path, int index) { 238 239 if (path.charAt(path.length() - 1) == ']') { 240 // path is already in the form "title[1]" 241 // ignore provided index and return the path "as is" 242 return path; 243 } 244 245 // append index in square brackets 246 return createXpathElement(path, index); 247 } 248 249 /** 250 * Returns the first Xpath element from the provided path, 251 * without the index value.<p> 252 * 253 * Examples:<br> 254 * <code>title</code> is left untouched<br> 255 * <code>title[1]</code> becomes <code>title</code><br> 256 * <code>title/subtitle</code> becomes <code>title</code><br> 257 * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p> 258 * 259 * @param path the path to get the first Xpath element from 260 * 261 * @return the first Xpath element from the provided path 262 */ 263 public static String getFirstXpathElement(String path) { 264 265 int pos = path.indexOf('/'); 266 if (pos >= 0) { 267 path = path.substring(0, pos); 268 } 269 270 return CmsXmlUtils.removeXpathIndex(path); 271 } 272 273 /** 274 * Returns the last Xpath element from the provided path, 275 * without the index value.<p> 276 * 277 * Examples:<br> 278 * <code>title</code> is left untouched<br> 279 * <code>title[1]</code> becomes <code>title</code><br> 280 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 281 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p> 282 * 283 * @param path the path to get the last Xpath element from 284 * 285 * @return the last Xpath element from the provided path 286 */ 287 public static String getLastXpathElement(String path) { 288 289 int pos = path.lastIndexOf('/'); 290 if (pos >= 0) { 291 path = path.substring(pos + 1); 292 } 293 294 return CmsXmlUtils.removeXpathIndex(path); 295 } 296 297 /** 298 * Returns the last Xpath element from the provided path. 299 * 300 * 301 * Examples:<br> 302 * <code>title</code> is left untouched<br> 303 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 304 * 305 * @param path the path to get the last Xpath element from 306 * 307 * @return the last Xpath element from the provided path 308 */ 309 public static String getLastXpathElementWithIndex(String path) { 310 311 int pos = path.lastIndexOf('/'); 312 if (pos >= 0) { 313 path = path.substring(pos + 1); 314 } 315 return path; 316 } 317 318 /** 319 * Returns the last Xpath index from the given path.<p> 320 * 321 * Examples:<br> 322 * <code>title</code> returns the empty String<p> 323 * <code>title[1]</code> returns <code>[1]</code><p> 324 * <code>title/subtitle</code> returns them empty String<p> 325 * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p> 326 * 327 * @param path the path to extract the Xpath index from 328 * 329 * @return the last Xpath index from the given path 330 */ 331 public static String getXpathIndex(String path) { 332 333 int pos1 = path.lastIndexOf('/'); 334 int pos2 = path.lastIndexOf('['); 335 if ((pos2 < 0) || (pos1 > pos2)) { 336 return ""; 337 } 338 339 return path.substring(pos2); 340 } 341 342 /** 343 * Returns the last Xpath index from the given path as integer.<p> 344 * 345 * Examples:<br> 346 * <code>title</code> returns 1<p> 347 * <code>title[1]</code> returns 1<p> 348 * <code>title/subtitle</code> returns 1<p> 349 * <code>title[1]/subtitle[2]</code> returns 2<p> 350 * 351 * @param path the path to extract the Xpath index from 352 * 353 * @return the last Xpath index from the given path as integer 354 */ 355 public static int getXpathIndexInt(String path) { 356 357 int pos1 = path.lastIndexOf('/'); 358 int pos2 = path.lastIndexOf('['); 359 if ((pos2 < 0) || (pos1 > pos2)) { 360 return 1; 361 } 362 363 String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); 364 try { 365 return Integer.parseInt(idxStr); 366 } catch (NumberFormatException e) { 367 // NOOP 368 } 369 return 1; 370 } 371 372 /** 373 * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p> 374 * This is done for performance improvements only.<p> 375 */ 376 public static void initSystemProperties() { 377 378 String implName; 379 // initialize system properties 380 if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) { 381 implName = SAXParserFactory.newInstance().getClass().getName(); 382 LOG.info("Setting sax parser factory impl property to " + implName); 383 System.setProperty(SAX_PARSER_FACTORY_KEY, implName); 384 } 385 if (System.getProperty(XML_READER_KEY) == null) { 386 SAXReader reader = new SAXReader(); 387 try { 388 implName = reader.getXMLReader().getClass().getName(); 389 LOG.info("Setting xml reader impl property to " + implName); 390 System.setProperty(XML_READER_KEY, implName); 391 } catch (SAXException e) { 392 LOG.error("Error evaluating XMLReader impl.", e); 393 } 394 } 395 if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) { 396 ParserImpl saxParser = new ParserImpl(); 397 implName = saxParser.getConfigImplName(); 398 if (implName != null) { 399 LOG.info("Setting xml parser configuration impl property to " + implName); 400 System.setProperty(SAX_PARSER_CONFIG_KEY, implName); 401 } 402 } 403 } 404 405 /** 406 * Returns <code>true</code> if the given path is a Xpath with 407 * at least 2 elements.<p> 408 * 409 * Examples:<br> 410 * <code>title</code> returns <code>false</code><br> 411 * <code>title[1]</code> returns <code>false</code><br> 412 * <code>title/subtitle</code> returns <code>true</code><br> 413 * <code>title[1]/subtitle[1]</code> returns <code>true</code><p> 414 * 415 * @param path the path to check 416 * @return true if the given path is a Xpath with at least 2 elements 417 */ 418 public static boolean isDeepXpath(String path) { 419 420 return path.indexOf('/') > 0; 421 } 422 423 /** 424 * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p> 425 * 426 * @param document the XML document to marshal 427 * @param out the output stream to write to 428 * @param encoding the encoding to use 429 * @return the output stream with the xml content 430 * @throws CmsXmlException if something goes wrong 431 */ 432 public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { 433 434 try { 435 OutputFormat format = OutputFormat.createPrettyPrint(); 436 format.setEncoding(encoding); 437 438 XMLWriter writer = new XMLWriter(out, format); 439 writer.setEscapeText(false); 440 441 writer.write(document); 442 writer.close(); 443 444 } catch (Exception e) { 445 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 446 } 447 448 return out; 449 } 450 451 /** 452 * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p> 453 * 454 * @param document the XML document to marshal 455 * @param encoding the encoding to use 456 * @return the marshalled XML document 457 * @throws CmsXmlException if something goes wrong 458 */ 459 public static String marshal(Document document, String encoding) throws CmsXmlException { 460 461 ByteArrayOutputStream out = new ByteArrayOutputStream(); 462 marshal(document, out, encoding); 463 try { 464 return out.toString(encoding); 465 } catch (UnsupportedEncodingException e) { 466 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); 467 } 468 } 469 470 /** 471 * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p> 472 * 473 * @param node the XML node to marshal 474 * @param encoding the encoding to use 475 * 476 * @return the string with the xml content 477 * 478 * @throws CmsXmlException if something goes wrong 479 */ 480 public static String marshal(Node node, String encoding) throws CmsXmlException { 481 482 ByteArrayOutputStream out = new ByteArrayOutputStream(); 483 try { 484 OutputFormat format = OutputFormat.createPrettyPrint(); 485 format.setEncoding(encoding); 486 format.setSuppressDeclaration(true); 487 488 XMLWriter writer = new XMLWriter(out, format); 489 writer.setEscapeText(false); 490 491 writer.write(node); 492 writer.close(); 493 } catch (Exception e) { 494 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 495 } 496 return new String(out.toByteArray()); 497 } 498 499 /** 500 * Removes all Xpath indices from the given path.<p> 501 * 502 * Example:<br> 503 * <code>title</code> is left untouched<br> 504 * <code>title[1]</code> becomes <code>title</code><br> 505 * <code>title/subtitle</code> is left untouched<br> 506 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 507 * 508 * @param path the path to remove the Xpath index from 509 * 510 * @return the path with all Xpath indices removed 511 */ 512 public static String removeAllXpathIndices(String path) { 513 514 return path.replaceAll("\\[[0-9]+\\]", ""); 515 } 516 517 /** 518 * Removes the first Xpath element from the path.<p> 519 * 520 * If the provided path does not contain a "/" character, 521 * it is returned unchanged.<p> 522 * 523 * <p>Examples:<br> 524 * <code>title</code> is left untouched<br> 525 * <code>title[1]</code> is left untouched<br> 526 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 527 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 528 * 529 * @param path the Xpath to remove the first element from 530 * 531 * @return the path with the first element removed 532 */ 533 public static String removeFirstXpathElement(String path) { 534 535 int pos = path.indexOf('/'); 536 if (pos < 0) { 537 return path; 538 } 539 540 return path.substring(pos + 1); 541 } 542 543 /** 544 * Removes the last complex Xpath element from the path.<p> 545 * 546 * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. 547 * 548 * <p>Example:<br> 549 * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p> 550 * 551 * @param path the Xpath to remove the last element from 552 * 553 * @return the path with the last element removed 554 */ 555 public static String removeLastComplexXpathElement(String path) { 556 557 int pos = path.lastIndexOf('/'); 558 if (pos < 0) { 559 return path; 560 } 561 // count ' chars 562 int p = pos; 563 int count = -1; 564 while (p > 0) { 565 count++; 566 p = path.indexOf("\'", p + 1); 567 } 568 String parentPath = path.substring(0, pos); 569 if ((count % 2) == 0) { 570 // if substring is complete 571 return parentPath; 572 } 573 // if not complete 574 p = parentPath.lastIndexOf("'"); 575 if (p >= 0) { 576 // complete it if possible 577 return removeLastComplexXpathElement(parentPath.substring(0, p)); 578 } 579 return parentPath; 580 } 581 582 /** 583 * Removes the last Xpath element from the path.<p> 584 * 585 * If the provided path does not contain a "/" character, 586 * it is returned unchanged.<p> 587 * 588 * <p>Examples:<br> 589 * <code>title</code> is left untouched<br> 590 * <code>title[1]</code> is left untouched<br> 591 * <code>title/subtitle</code> becomes <code>title</code><br> 592 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p> 593 * 594 * @param path the Xpath to remove the last element from 595 * 596 * @return the path with the last element removed 597 */ 598 public static String removeLastXpathElement(String path) { 599 600 int pos = path.lastIndexOf('/'); 601 if (pos < 0) { 602 return path; 603 } 604 605 return path.substring(0, pos); 606 } 607 608 /** 609 * Removes all Xpath index information from the given input path.<p> 610 * 611 * Examples:<br> 612 * <code>title</code> is left untouched<br> 613 * <code>title[1]</code> becomes <code>title</code><br> 614 * <code>title/subtitle</code> is left untouched<br> 615 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 616 * 617 * @param path the path to remove the Xpath index information from 618 * 619 * @return the simplified Xpath for the given name 620 */ 621 public static String removeXpath(String path) { 622 623 if (path.indexOf('/') > -1) { 624 // this is a complex path over more then 1 node 625 StringBuffer result = new StringBuffer(path.length() + 32); 626 627 // split the path into sub-elements 628 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 629 int end = elements.size() - 1; 630 for (int i = 0; i <= end; i++) { 631 // remove [i] from path element if required 632 result.append(removeXpathIndex(elements.get(i))); 633 if (i < end) { 634 // append path delimiter if not final path element 635 result.append('/'); 636 } 637 } 638 return result.toString(); 639 } 640 641 // this path has only 1 node, remove last index if required 642 return removeXpathIndex(path); 643 } 644 645 /** 646 * Removes the last Xpath index from the given path.<p> 647 * 648 * Examples:<br> 649 * <code>title</code> is left untouched<br> 650 * <code>title[1]</code> becomes <code>title</code><br> 651 * <code>title/subtitle</code> is left untouched<br> 652 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p> 653 * 654 * @param path the path to remove the Xpath index from 655 * 656 * @return the path with the last Xpath index removed 657 */ 658 public static String removeXpathIndex(String path) { 659 660 int pos1 = path.lastIndexOf('/'); 661 int pos2 = path.lastIndexOf('['); 662 if ((pos2 < 0) || (pos1 > pos2)) { 663 return path; 664 } 665 666 return path.substring(0, pos2); 667 } 668 669 /** 670 * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 671 * 672 * Examples:<br> 673 * <code>title/</code> becomes <code>title</code><br> 674 * <code>/title[1]/</code> becomes <code>title[1]</code><br> 675 * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br> 676 * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p> 677 * 678 * @param path the path to process 679 * @return the input with a leading and a trailing slash removed 680 */ 681 public static String simplifyXpath(String path) { 682 683 StringBuffer result = new StringBuffer(path); 684 if (result.charAt(0) == '/') { 685 result.deleteCharAt(0); 686 } 687 int pos = result.length() - 1; 688 if (result.charAt(pos) == '/') { 689 result.deleteCharAt(pos); 690 } 691 return result.toString(); 692 } 693 694 /** 695 * Splits a content value path into its components, ignoring leading or trailing slashes.<p> 696 * 697 * Note: this does not work for XPaths in general, only for the paths used to identify values in OpenCms contents.<p> 698 * 699 * @param xpath the xpath 700 * 701 * @return the path components 702 */ 703 public static List<String> splitXpath(String xpath) { 704 705 return Arrays.stream(xpath.split("/")).filter(s -> !s.isEmpty()).collect(Collectors.toList()); 706 707 } 708 709 /** 710 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 711 * 712 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 713 * 714 * @param xmlData the XML data in a byte array 715 * @param resolver the XML entity resolver to use 716 * 717 * @return the base object initialized with the unmarshalled XML document 718 * 719 * @throws CmsXmlException if something goes wrong 720 * 721 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 722 */ 723 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 724 725 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); 726 } 727 728 /** 729 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 730 * 731 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 732 * 733 * @param xmlData the XML data in a byte array 734 * @param resolver the XML entity resolver to use 735 * @param validate if the reader should try to validate the xml code 736 * 737 * @return the base object initialized with the unmarshalled XML document 738 * 739 * @throws CmsXmlException if something goes wrong 740 * 741 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 742 */ 743 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) 744 throws CmsXmlException { 745 746 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); 747 } 748 749 /** 750 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 751 * 752 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 753 * 754 * Important: The encoding provided will NOT be used during unmarshalling, 755 * the XML parser will do this on the base of the information in the source String. 756 * The encoding is used for initializing the created instance of the document, 757 * which means it will be used when marshalling the document again later.<p> 758 * 759 * @param source the XML input source to use 760 * @param resolver the XML entity resolver to use 761 * 762 * @return the unmarshalled XML document 763 * 764 * @throws CmsXmlException if something goes wrong 765 */ 766 public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { 767 768 return unmarshalHelper(source, resolver, false); 769 } 770 771 /** 772 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 773 * 774 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 775 * 776 * Important: The encoding provided will NOT be used during unmarshalling, 777 * the XML parser will do this on the base of the information in the source String. 778 * The encoding is used for initializing the created instance of the document, 779 * which means it will be used when marshalling the document again later.<p> 780 * 781 * @param source the XML input source to use 782 * @param resolver the XML entity resolver to use 783 * @param validate if the reader should try to validate the xml code 784 * 785 * @return the unmarshalled XML document 786 * 787 * @throws CmsXmlException if something goes wrong 788 */ 789 public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) 790 throws CmsXmlException { 791 792 if (null == source) { 793 throw new CmsXmlException(Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "source==null!")); 794 } 795 796 try { 797 SAXReader reader = new SAXReader(); 798 if (resolver != null) { 799 reader.setEntityResolver(resolver); 800 } 801 reader.setMergeAdjacentText(true); 802 reader.setStripWhitespaceText(true); 803 if (!validate) { 804 reader.setValidation(false); 805 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 806 } else { 807 reader.setValidation(true); 808 } 809 return reader.read(source); 810 } catch (DocumentException e) { 811 String systemId = source != null ? source.getSystemId() : "???"; 812 throw new CmsXmlException( 813 Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"), 814 e); 815 } catch (SAXException e) { 816 String systemId = source != null ? source.getSystemId() : "???"; 817 throw new CmsXmlException( 818 Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"), 819 e); 820 } 821 } 822 823 /** 824 * Helper to unmarshal (read) xml contents from a String into a document.<p> 825 * 826 * Using this method ensures that the OpenCms XML entitiy resolver is used.<p> 827 * 828 * @param xmlData the xml data in a String 829 * @param resolver the XML entity resolver to use 830 * @return the base object initialized with the unmarshalled XML document 831 * @throws CmsXmlException if something goes wrong 832 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 833 */ 834 public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { 835 836 return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); 837 } 838 839 /** 840 * Validates the structure of a XML document contained in a byte array 841 * with the DTD or XML schema used by the document.<p> 842 * 843 * @param xmlData a byte array containing a XML document that should be validated 844 * @param resolver the XML entity resolver to use 845 * 846 * @throws CmsXmlException if the validation fails 847 */ 848 public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 849 850 validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); 851 } 852 853 /** 854 * Validates the structure of a XML document with the DTD or XML schema used 855 * by the document.<p> 856 * 857 * @param document a XML document that should be validated 858 * @param encoding the encoding to use when marshalling the XML document (required) 859 * @param resolver the XML entity resolver to use 860 * 861 * @throws CmsXmlException if the validation fails 862 */ 863 public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) 864 throws CmsXmlException { 865 866 // generate bytes from document 867 byte[] xmlData = ((ByteArrayOutputStream)marshal( 868 document, 869 new ByteArrayOutputStream(512), 870 encoding)).toByteArray(); 871 validateXmlStructure(xmlData, resolver); 872 } 873 874 /** 875 * Validates the structure of a XML document contained in a byte array 876 * with the DTD or XML schema used by the document.<p> 877 * 878 * @param xmlStream a source providing a XML document that should be validated 879 * @param resolver the XML entity resolver to use 880 * 881 * @throws CmsXmlException if the validation fails 882 */ 883 public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { 884 885 XMLReader reader; 886 try { 887 reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); 888 } catch (SAXException e) { 889 // xerces parser not available - no schema validation possible 890 if (LOG.isWarnEnabled()) { 891 LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); 892 } 893 // no validation of the content is possible 894 return; 895 } 896 // turn on validation 897 try { 898 reader.setFeature("http://xml.org/sax/features/validation", true); 899 // turn on schema validation 900 reader.setFeature("http://apache.org/xml/features/validation/schema", true); 901 // configure namespace support 902 reader.setFeature("http://xml.org/sax/features/namespaces", true); 903 reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); 904 } catch (SAXNotRecognizedException e) { 905 // should not happen as Xerces 2 support this feature 906 if (LOG.isWarnEnabled()) { 907 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); 908 } 909 // no validation of the content is possible 910 return; 911 } catch (SAXNotSupportedException e) { 912 // should not happen as Xerces 2 support this feature 913 if (LOG.isWarnEnabled()) { 914 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); 915 } 916 // no validation of the content is possible 917 return; 918 } 919 920 // add an error handler which turns any errors into XML 921 CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); 922 reader.setErrorHandler(errorHandler); 923 924 if (resolver != null) { 925 // set the resolver for the "opencms://" URIs 926 reader.setEntityResolver(resolver); 927 } 928 929 try { 930 reader.parse(new InputSource(xmlStream)); 931 } catch (IOException e) { 932 // should not happen since we read form a byte array 933 if (LOG.isErrorEnabled()) { 934 LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); 935 } 936 return; 937 } catch (SAXException e) { 938 // should not happen since all errors are handled in the XML error handler 939 if (LOG.isErrorEnabled()) { 940 LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); 941 } 942 return; 943 } 944 945 if (errorHandler.getErrors().elements().size() > 0) { 946 // there was at last one validation error, so throw an exception 947 StringWriter out = new StringWriter(256); 948 OutputFormat format = OutputFormat.createPrettyPrint(); 949 XMLWriter writer = new XMLWriter(out, format); 950 try { 951 writer.write(errorHandler.getErrors()); 952 writer.write(errorHandler.getWarnings()); 953 writer.close(); 954 } catch (IOException e) { 955 // should not happen since we write to a StringWriter 956 if (LOG.isErrorEnabled()) { 957 LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); 958 } 959 } 960 // generate String from XML for display of document in error message 961 throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); 962 } 963 } 964}