001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.file.CmsResource; 031import org.opencms.main.CmsLog; 032import org.opencms.util.CmsStringUtil; 033 034import java.io.ByteArrayInputStream; 035import java.io.ByteArrayOutputStream; 036import java.io.IOException; 037import java.io.InputStream; 038import java.io.OutputStream; 039import java.io.StringReader; 040import java.io.StringWriter; 041import java.io.UnsupportedEncodingException; 042import java.util.Arrays; 043import java.util.List; 044import java.util.stream.Collectors; 045 046import javax.xml.parsers.SAXParserFactory; 047 048import org.apache.commons.logging.Log; 049import org.apache.xerces.parsers.SAXParser; 050 051import org.dom4j.Document; 052import org.dom4j.DocumentException; 053import org.dom4j.Node; 054import org.dom4j.io.OutputFormat; 055import org.dom4j.io.SAXReader; 056import org.dom4j.io.XMLWriter; 057import org.xml.sax.EntityResolver; 058import org.xml.sax.InputSource; 059import org.xml.sax.SAXException; 060import org.xml.sax.SAXNotRecognizedException; 061import org.xml.sax.SAXNotSupportedException; 062import org.xml.sax.XMLReader; 063import org.xml.sax.helpers.XMLReaderFactory; 064 065/** 066 * Provides some basic XML handling utilities.<p> 067 * 068 * @since 6.0.0 069 */ 070public final class CmsXmlUtils { 071 072 /** 073 * This class is only used to expose the XML parser configuration implementation name.<p> 074 */ 075 private static class ParserImpl extends SAXParser { 076 077 /** 078 * Constructor.<p> 079 */ 080 ParserImpl() { 081 082 super(); 083 } 084 085 /** 086 * Returns the implementation name of the used XML parser configuration.<p> 087 * 088 * @return the implementation name 089 */ 090 String getConfigImplName() { 091 092 if (fConfiguration != null) { 093 return fConfiguration.getClass().getName(); 094 } else { 095 return null; 096 } 097 } 098 } 099 100 /** The log object for this class. */ 101 private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); 102 103 /** Key of the SAX parser configuration system property. */ 104 private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration"; 105 106 /** Key of the SAX parser factory system property. */ 107 private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory"; 108 109 /** Key of the XML reader system property. */ 110 private static final String XML_READER_KEY = "org.xml.sax.driver"; 111 112 /** 113 * Prevents instances of this class from being generated.<p> 114 */ 115 private CmsXmlUtils() { 116 117 // noop 118 } 119 120 /** 121 * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p> 122 * 123 * Use this method if it's uncertain if the given arguments are starting or ending with 124 * a slash "/".<p> 125 * 126 * Examples:<br> 127 * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br> 128 * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br> 129 * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p> 130 * 131 * @param prefix the prefix Xpath 132 * @param suffix the suffix Xpath 133 * 134 * @return the concatenated Xpath build from prefix and suffix 135 */ 136 public static String concatXpath(String prefix, String suffix) { 137 138 if (suffix == null) { 139 // ensure suffix is not null 140 suffix = ""; 141 } else { 142 if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { 143 // remove leading '/' form suffix 144 suffix = suffix.substring(1); 145 } 146 } 147 if (prefix != null) { 148 StringBuffer result = new StringBuffer(32); 149 result.append(prefix); 150 if (!CmsResource.isFolder(prefix) && (suffix.length() > 0)) { 151 result.append('/'); 152 } 153 result.append(suffix); 154 return result.toString(); 155 } 156 return suffix; 157 } 158 159 /** 160 * Translates a simple lookup path to the simplified Xpath format used for 161 * the internal bookmarks.<p> 162 * 163 * Examples:<br> 164 * <code>title</code> becomes <code>title[1]</code><br> 165 * <code>title[1]</code> is left untouched<br> 166 * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br> 167 * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p> 168 * 169 * Note: If the name already has the format <code>title[1]</code> then provided index parameter 170 * is ignored.<p> 171 * 172 * @param path the path to get the simplified Xpath for 173 * @param index the index to append (if required) 174 * 175 * @return the simplified Xpath for the given name 176 */ 177 public static String createXpath(String path, int index) { 178 179 if (path.indexOf('/') > -1) { 180 // this is a complex path over more then 1 node 181 StringBuffer result = new StringBuffer(path.length() + 32); 182 183 // split the path into sub elements 184 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 185 int end = elements.size() - 1; 186 for (int i = 0; i <= end; i++) { 187 // append [i] to path element if required 188 result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); 189 if (i < end) { 190 // append path delimiter if not final path element 191 result.append('/'); 192 } 193 } 194 return result.toString(); 195 } 196 197 // this path has only 1 node, append [index] if required 198 return createXpathElementCheck(path, index); 199 } 200 201 /** 202 * Appends the provided index parameter in square brackets to the given name, 203 * like <code>path[index]</code>.<p> 204 * 205 * This method is used if it's clear that some path does not have 206 * a square bracket already appended.<p> 207 * 208 * @param path the path append the index to 209 * @param index the index to append 210 * 211 * @return the simplified Xpath for the given name 212 */ 213 public static String createXpathElement(String path, int index) { 214 215 StringBuffer result = new StringBuffer(path.length() + 5); 216 result.append(path); 217 result.append('['); 218 result.append(index); 219 result.append(']'); 220 return result.toString(); 221 } 222 223 /** 224 * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p> 225 * 226 * This method is used if it's uncertain if some path does have 227 * a square bracket already appended or not.<p> 228 * 229 * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 230 * is ignored.<p> 231 * 232 * @param path the path to get the simplified Xpath for 233 * @param index the index to append (if required) 234 * 235 * @return the simplified Xpath for the given name 236 */ 237 public static String createXpathElementCheck(String path, int index) { 238 239 if (path.charAt(path.length() - 1) == ']') { 240 // path is already in the form "title[1]" 241 // ignore provided index and return the path "as is" 242 return path; 243 } 244 245 // append index in square brackets 246 return createXpathElement(path, index); 247 } 248 249 /** 250 * Returns the first Xpath element from the provided path, 251 * without the index value.<p> 252 * 253 * Examples:<br> 254 * <code>title</code> is left untouched<br> 255 * <code>title[1]</code> becomes <code>title</code><br> 256 * <code>title/subtitle</code> becomes <code>title</code><br> 257 * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p> 258 * 259 * @param path the path to get the first Xpath element from 260 * 261 * @return the first Xpath element from the provided path 262 */ 263 public static String getFirstXpathElement(String path) { 264 265 int pos = path.indexOf('/'); 266 if (pos >= 0) { 267 path = path.substring(0, pos); 268 } 269 270 return CmsXmlUtils.removeXpathIndex(path); 271 } 272 273 /** 274 * Returns the last Xpath element from the provided path, 275 * without the index value.<p> 276 * 277 * Examples:<br> 278 * <code>title</code> is left untouched<br> 279 * <code>title[1]</code> becomes <code>title</code><br> 280 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 281 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p> 282 * 283 * @param path the path to get the last Xpath element from 284 * 285 * @return the last Xpath element from the provided path 286 */ 287 public static String getLastXpathElement(String path) { 288 289 int pos = path.lastIndexOf('/'); 290 if (pos >= 0) { 291 path = path.substring(pos + 1); 292 } 293 294 return CmsXmlUtils.removeXpathIndex(path); 295 } 296 297 /** 298 * Returns the last Xpath index from the given path.<p> 299 * 300 * Examples:<br> 301 * <code>title</code> returns the empty String<p> 302 * <code>title[1]</code> returns <code>[1]</code><p> 303 * <code>title/subtitle</code> returns them empty String<p> 304 * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p> 305 * 306 * @param path the path to extract the Xpath index from 307 * 308 * @return the last Xpath index from the given path 309 */ 310 public static String getXpathIndex(String path) { 311 312 int pos1 = path.lastIndexOf('/'); 313 int pos2 = path.lastIndexOf('['); 314 if ((pos2 < 0) || (pos1 > pos2)) { 315 return ""; 316 } 317 318 return path.substring(pos2); 319 } 320 321 /** 322 * Returns the last Xpath index from the given path as integer.<p> 323 * 324 * Examples:<br> 325 * <code>title</code> returns 1<p> 326 * <code>title[1]</code> returns 1<p> 327 * <code>title/subtitle</code> returns 1<p> 328 * <code>title[1]/subtitle[2]</code> returns 2<p> 329 * 330 * @param path the path to extract the Xpath index from 331 * 332 * @return the last Xpath index from the given path as integer 333 */ 334 public static int getXpathIndexInt(String path) { 335 336 int pos1 = path.lastIndexOf('/'); 337 int pos2 = path.lastIndexOf('['); 338 if ((pos2 < 0) || (pos1 > pos2)) { 339 return 1; 340 } 341 342 String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); 343 try { 344 return Integer.parseInt(idxStr); 345 } catch (NumberFormatException e) { 346 // NOOP 347 } 348 return 1; 349 } 350 351 /** 352 * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p> 353 * This is done for performance improvements only.<p> 354 */ 355 public static void initSystemProperties() { 356 357 String implName; 358 // initialize system properties 359 if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) { 360 implName = SAXParserFactory.newInstance().getClass().getName(); 361 LOG.info("Setting sax parser factory impl property to " + implName); 362 System.setProperty(SAX_PARSER_FACTORY_KEY, implName); 363 } 364 if (System.getProperty(XML_READER_KEY) == null) { 365 SAXReader reader = new SAXReader(); 366 try { 367 implName = reader.getXMLReader().getClass().getName(); 368 LOG.info("Setting xml reader impl property to " + implName); 369 System.setProperty(XML_READER_KEY, implName); 370 } catch (SAXException e) { 371 LOG.error("Error evaluating XMLReader impl.", e); 372 } 373 } 374 if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) { 375 ParserImpl saxParser = new ParserImpl(); 376 implName = saxParser.getConfigImplName(); 377 if (implName != null) { 378 LOG.info("Setting xml parser configuration impl property to " + implName); 379 System.setProperty(SAX_PARSER_CONFIG_KEY, implName); 380 } 381 } 382 } 383 384 /** 385 * Returns <code>true</code> if the given path is a Xpath with 386 * at least 2 elements.<p> 387 * 388 * Examples:<br> 389 * <code>title</code> returns <code>false</code><br> 390 * <code>title[1]</code> returns <code>false</code><br> 391 * <code>title/subtitle</code> returns <code>true</code><br> 392 * <code>title[1]/subtitle[1]</code> returns <code>true</code><p> 393 * 394 * @param path the path to check 395 * @return true if the given path is a Xpath with at least 2 elements 396 */ 397 public static boolean isDeepXpath(String path) { 398 399 return path.indexOf('/') > 0; 400 } 401 402 /** 403 * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p> 404 * 405 * @param document the XML document to marshal 406 * @param out the output stream to write to 407 * @param encoding the encoding to use 408 * @return the output stream with the xml content 409 * @throws CmsXmlException if something goes wrong 410 */ 411 public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { 412 413 try { 414 OutputFormat format = OutputFormat.createPrettyPrint(); 415 format.setEncoding(encoding); 416 417 XMLWriter writer = new XMLWriter(out, format); 418 writer.setEscapeText(false); 419 420 writer.write(document); 421 writer.close(); 422 423 } catch (Exception e) { 424 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 425 } 426 427 return out; 428 } 429 430 /** 431 * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p> 432 * 433 * @param document the XML document to marshal 434 * @param encoding the encoding to use 435 * @return the marshalled XML document 436 * @throws CmsXmlException if something goes wrong 437 */ 438 public static String marshal(Document document, String encoding) throws CmsXmlException { 439 440 ByteArrayOutputStream out = new ByteArrayOutputStream(); 441 marshal(document, out, encoding); 442 try { 443 return out.toString(encoding); 444 } catch (UnsupportedEncodingException e) { 445 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); 446 } 447 } 448 449 /** 450 * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p> 451 * 452 * @param node the XML node to marshal 453 * @param encoding the encoding to use 454 * 455 * @return the string with the xml content 456 * 457 * @throws CmsXmlException if something goes wrong 458 */ 459 public static String marshal(Node node, String encoding) throws CmsXmlException { 460 461 ByteArrayOutputStream out = new ByteArrayOutputStream(); 462 try { 463 OutputFormat format = OutputFormat.createPrettyPrint(); 464 format.setEncoding(encoding); 465 format.setSuppressDeclaration(true); 466 467 XMLWriter writer = new XMLWriter(out, format); 468 writer.setEscapeText(false); 469 470 writer.write(node); 471 writer.close(); 472 } catch (Exception e) { 473 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 474 } 475 return new String(out.toByteArray()); 476 } 477 478 /** 479 * Removes all Xpath indices from the given path.<p> 480 * 481 * Example:<br> 482 * <code>title</code> is left untouched<br> 483 * <code>title[1]</code> becomes <code>title</code><br> 484 * <code>title/subtitle</code> is left untouched<br> 485 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 486 * 487 * @param path the path to remove the Xpath index from 488 * 489 * @return the path with all Xpath indices removed 490 */ 491 public static String removeAllXpathIndices(String path) { 492 493 return path.replaceAll("\\[[0-9]+\\]", ""); 494 } 495 496 /** 497 * Removes the first Xpath element from the path.<p> 498 * 499 * If the provided path does not contain a "/" character, 500 * it is returned unchanged.<p> 501 * 502 * <p>Examples:<br> 503 * <code>title</code> is left untouched<br> 504 * <code>title[1]</code> is left untouched<br> 505 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 506 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 507 * 508 * @param path the Xpath to remove the first element from 509 * 510 * @return the path with the first element removed 511 */ 512 public static String removeFirstXpathElement(String path) { 513 514 int pos = path.indexOf('/'); 515 if (pos < 0) { 516 return path; 517 } 518 519 return path.substring(pos + 1); 520 } 521 522 /** 523 * Removes the last complex Xpath element from the path.<p> 524 * 525 * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. 526 * 527 * <p>Example:<br> 528 * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p> 529 * 530 * @param path the Xpath to remove the last element from 531 * 532 * @return the path with the last element removed 533 */ 534 public static String removeLastComplexXpathElement(String path) { 535 536 int pos = path.lastIndexOf('/'); 537 if (pos < 0) { 538 return path; 539 } 540 // count ' chars 541 int p = pos; 542 int count = -1; 543 while (p > 0) { 544 count++; 545 p = path.indexOf("\'", p + 1); 546 } 547 String parentPath = path.substring(0, pos); 548 if ((count % 2) == 0) { 549 // if substring is complete 550 return parentPath; 551 } 552 // if not complete 553 p = parentPath.lastIndexOf("'"); 554 if (p >= 0) { 555 // complete it if possible 556 return removeLastComplexXpathElement(parentPath.substring(0, p)); 557 } 558 return parentPath; 559 } 560 561 /** 562 * Removes the last Xpath element from the path.<p> 563 * 564 * If the provided path does not contain a "/" character, 565 * it is returned unchanged.<p> 566 * 567 * <p>Examples:<br> 568 * <code>title</code> is left untouched<br> 569 * <code>title[1]</code> is left untouched<br> 570 * <code>title/subtitle</code> becomes <code>title</code><br> 571 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p> 572 * 573 * @param path the Xpath to remove the last element from 574 * 575 * @return the path with the last element removed 576 */ 577 public static String removeLastXpathElement(String path) { 578 579 int pos = path.lastIndexOf('/'); 580 if (pos < 0) { 581 return path; 582 } 583 584 return path.substring(0, pos); 585 } 586 587 /** 588 * Removes all Xpath index information from the given input path.<p> 589 * 590 * Examples:<br> 591 * <code>title</code> is left untouched<br> 592 * <code>title[1]</code> becomes <code>title</code><br> 593 * <code>title/subtitle</code> is left untouched<br> 594 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 595 * 596 * @param path the path to remove the Xpath index information from 597 * 598 * @return the simplified Xpath for the given name 599 */ 600 public static String removeXpath(String path) { 601 602 if (path.indexOf('/') > -1) { 603 // this is a complex path over more then 1 node 604 StringBuffer result = new StringBuffer(path.length() + 32); 605 606 // split the path into sub-elements 607 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 608 int end = elements.size() - 1; 609 for (int i = 0; i <= end; i++) { 610 // remove [i] from path element if required 611 result.append(removeXpathIndex(elements.get(i))); 612 if (i < end) { 613 // append path delimiter if not final path element 614 result.append('/'); 615 } 616 } 617 return result.toString(); 618 } 619 620 // this path has only 1 node, remove last index if required 621 return removeXpathIndex(path); 622 } 623 624 /** 625 * Removes the last Xpath index from the given path.<p> 626 * 627 * Examples:<br> 628 * <code>title</code> is left untouched<br> 629 * <code>title[1]</code> becomes <code>title</code><br> 630 * <code>title/subtitle</code> is left untouched<br> 631 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p> 632 * 633 * @param path the path to remove the Xpath index from 634 * 635 * @return the path with the last Xpath index removed 636 */ 637 public static String removeXpathIndex(String path) { 638 639 int pos1 = path.lastIndexOf('/'); 640 int pos2 = path.lastIndexOf('['); 641 if ((pos2 < 0) || (pos1 > pos2)) { 642 return path; 643 } 644 645 return path.substring(0, pos2); 646 } 647 648 /** 649 * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 650 * 651 * Examples:<br> 652 * <code>title/</code> becomes <code>title</code><br> 653 * <code>/title[1]/</code> becomes <code>title[1]</code><br> 654 * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br> 655 * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p> 656 * 657 * @param path the path to process 658 * @return the input with a leading and a trailing slash removed 659 */ 660 public static String simplifyXpath(String path) { 661 662 StringBuffer result = new StringBuffer(path); 663 if (result.charAt(0) == '/') { 664 result.deleteCharAt(0); 665 } 666 int pos = result.length() - 1; 667 if (result.charAt(pos) == '/') { 668 result.deleteCharAt(pos); 669 } 670 return result.toString(); 671 } 672 673 /** 674 * Splits a content value path into its components, ignoring leading or trailing slashes.<p> 675 * 676 * Note: this does not work for XPaths in general, only for the paths used to identify values in OpenCms contents.<p> 677 * 678 * @param xpath the xpath 679 * 680 * @return the path components 681 */ 682 public static List<String> splitXpath(String xpath) { 683 684 return Arrays.stream(xpath.split("/")).filter(s -> !s.isEmpty()).collect(Collectors.toList()); 685 686 } 687 688 /** 689 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 690 * 691 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 692 * 693 * @param xmlData the XML data in a byte array 694 * @param resolver the XML entity resolver to use 695 * 696 * @return the base object initialized with the unmarshalled XML document 697 * 698 * @throws CmsXmlException if something goes wrong 699 * 700 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 701 */ 702 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 703 704 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); 705 } 706 707 /** 708 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 709 * 710 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 711 * 712 * @param xmlData the XML data in a byte array 713 * @param resolver the XML entity resolver to use 714 * @param validate if the reader should try to validate the xml code 715 * 716 * @return the base object initialized with the unmarshalled XML document 717 * 718 * @throws CmsXmlException if something goes wrong 719 * 720 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 721 */ 722 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) 723 throws CmsXmlException { 724 725 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); 726 } 727 728 /** 729 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 730 * 731 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 732 * 733 * Important: The encoding provided will NOT be used during unmarshalling, 734 * the XML parser will do this on the base of the information in the source String. 735 * The encoding is used for initializing the created instance of the document, 736 * which means it will be used when marshalling the document again later.<p> 737 * 738 * @param source the XML input source to use 739 * @param resolver the XML entity resolver to use 740 * 741 * @return the unmarshalled XML document 742 * 743 * @throws CmsXmlException if something goes wrong 744 */ 745 public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { 746 747 return unmarshalHelper(source, resolver, false); 748 } 749 750 /** 751 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 752 * 753 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 754 * 755 * Important: The encoding provided will NOT be used during unmarshalling, 756 * the XML parser will do this on the base of the information in the source String. 757 * The encoding is used for initializing the created instance of the document, 758 * which means it will be used when marshalling the document again later.<p> 759 * 760 * @param source the XML input source to use 761 * @param resolver the XML entity resolver to use 762 * @param validate if the reader should try to validate the xml code 763 * 764 * @return the unmarshalled XML document 765 * 766 * @throws CmsXmlException if something goes wrong 767 */ 768 public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) 769 throws CmsXmlException { 770 771 if (null == source) { 772 throw new CmsXmlException(Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "source==null!")); 773 } 774 775 try { 776 SAXReader reader = new SAXReader(); 777 if (resolver != null) { 778 reader.setEntityResolver(resolver); 779 } 780 reader.setMergeAdjacentText(true); 781 reader.setStripWhitespaceText(true); 782 if (!validate) { 783 reader.setValidation(false); 784 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 785 } else { 786 reader.setValidation(true); 787 } 788 return reader.read(source); 789 } catch (DocumentException e) { 790 String systemId = source != null ? source.getSystemId() : "???"; 791 throw new CmsXmlException( 792 Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"), 793 e); 794 } catch (SAXException e) { 795 String systemId = source != null ? source.getSystemId() : "???"; 796 throw new CmsXmlException( 797 Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"), 798 e); 799 } 800 } 801 802 /** 803 * Helper to unmarshal (read) xml contents from a String into a document.<p> 804 * 805 * Using this method ensures that the OpenCms XML entitiy resolver is used.<p> 806 * 807 * @param xmlData the xml data in a String 808 * @param resolver the XML entity resolver to use 809 * @return the base object initialized with the unmarshalled XML document 810 * @throws CmsXmlException if something goes wrong 811 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 812 */ 813 public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { 814 815 return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); 816 } 817 818 /** 819 * Validates the structure of a XML document contained in a byte array 820 * with the DTD or XML schema used by the document.<p> 821 * 822 * @param xmlData a byte array containing a XML document that should be validated 823 * @param resolver the XML entity resolver to use 824 * 825 * @throws CmsXmlException if the validation fails 826 */ 827 public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 828 829 validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); 830 } 831 832 /** 833 * Validates the structure of a XML document with the DTD or XML schema used 834 * by the document.<p> 835 * 836 * @param document a XML document that should be validated 837 * @param encoding the encoding to use when marshalling the XML document (required) 838 * @param resolver the XML entity resolver to use 839 * 840 * @throws CmsXmlException if the validation fails 841 */ 842 public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) 843 throws CmsXmlException { 844 845 // generate bytes from document 846 byte[] xmlData = ((ByteArrayOutputStream)marshal( 847 document, 848 new ByteArrayOutputStream(512), 849 encoding)).toByteArray(); 850 validateXmlStructure(xmlData, resolver); 851 } 852 853 /** 854 * Validates the structure of a XML document contained in a byte array 855 * with the DTD or XML schema used by the document.<p> 856 * 857 * @param xmlStream a source providing a XML document that should be validated 858 * @param resolver the XML entity resolver to use 859 * 860 * @throws CmsXmlException if the validation fails 861 */ 862 public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { 863 864 XMLReader reader; 865 try { 866 reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); 867 } catch (SAXException e) { 868 // xerces parser not available - no schema validation possible 869 if (LOG.isWarnEnabled()) { 870 LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); 871 } 872 // no validation of the content is possible 873 return; 874 } 875 // turn on validation 876 try { 877 reader.setFeature("http://xml.org/sax/features/validation", true); 878 // turn on schema validation 879 reader.setFeature("http://apache.org/xml/features/validation/schema", true); 880 // configure namespace support 881 reader.setFeature("http://xml.org/sax/features/namespaces", true); 882 reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); 883 } catch (SAXNotRecognizedException e) { 884 // should not happen as Xerces 2 support this feature 885 if (LOG.isWarnEnabled()) { 886 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); 887 } 888 // no validation of the content is possible 889 return; 890 } catch (SAXNotSupportedException e) { 891 // should not happen as Xerces 2 support this feature 892 if (LOG.isWarnEnabled()) { 893 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); 894 } 895 // no validation of the content is possible 896 return; 897 } 898 899 // add an error handler which turns any errors into XML 900 CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); 901 reader.setErrorHandler(errorHandler); 902 903 if (resolver != null) { 904 // set the resolver for the "opencms://" URIs 905 reader.setEntityResolver(resolver); 906 } 907 908 try { 909 reader.parse(new InputSource(xmlStream)); 910 } catch (IOException e) { 911 // should not happen since we read form a byte array 912 if (LOG.isErrorEnabled()) { 913 LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); 914 } 915 return; 916 } catch (SAXException e) { 917 // should not happen since all errors are handled in the XML error handler 918 if (LOG.isErrorEnabled()) { 919 LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); 920 } 921 return; 922 } 923 924 if (errorHandler.getErrors().elements().size() > 0) { 925 // there was at last one validation error, so throw an exception 926 StringWriter out = new StringWriter(256); 927 OutputFormat format = OutputFormat.createPrettyPrint(); 928 XMLWriter writer = new XMLWriter(out, format); 929 try { 930 writer.write(errorHandler.getErrors()); 931 writer.write(errorHandler.getWarnings()); 932 writer.close(); 933 } catch (IOException e) { 934 // should not happen since we write to a StringWriter 935 if (LOG.isErrorEnabled()) { 936 LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); 937 } 938 } 939 // generate String from XML for display of document in error message 940 throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); 941 } 942 } 943}