001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH & Co. KG, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.file.CmsResource; 031import org.opencms.main.CmsLog; 032import org.opencms.util.CmsStringUtil; 033 034import java.io.ByteArrayInputStream; 035import java.io.ByteArrayOutputStream; 036import java.io.IOException; 037import java.io.InputStream; 038import java.io.OutputStream; 039import java.io.StringReader; 040import java.io.StringWriter; 041import java.io.UnsupportedEncodingException; 042import java.util.List; 043 044import javax.xml.parsers.SAXParserFactory; 045 046import org.apache.commons.logging.Log; 047import org.apache.xerces.parsers.SAXParser; 048 049import org.dom4j.Document; 050import org.dom4j.DocumentException; 051import org.dom4j.Node; 052import org.dom4j.io.OutputFormat; 053import org.dom4j.io.SAXReader; 054import org.dom4j.io.XMLWriter; 055import org.xml.sax.EntityResolver; 056import org.xml.sax.InputSource; 057import org.xml.sax.SAXException; 058import org.xml.sax.SAXNotRecognizedException; 059import org.xml.sax.SAXNotSupportedException; 060import org.xml.sax.XMLReader; 061import org.xml.sax.helpers.XMLReaderFactory; 062 063/** 064 * Provides some basic XML handling utilities.<p> 065 * 066 * @since 6.0.0 067 */ 068public final class CmsXmlUtils { 069 070 /** 071 * This class is only used to expose the XML parser configuration implementation name.<p> 072 */ 073 private static class ParserImpl extends SAXParser { 074 075 /** 076 * Constructor.<p> 077 */ 078 ParserImpl() { 079 super(); 080 } 081 082 /** 083 * Returns the implementation name of the used XML parser configuration.<p> 084 * 085 * @return the implementation name 086 */ 087 String getConfigImplName() { 088 089 if (fConfiguration != null) { 090 return fConfiguration.getClass().getName(); 091 } else { 092 return null; 093 } 094 } 095 } 096 097 /** The log object for this class. */ 098 private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); 099 100 /** Key of the SAX parser configuration system property. */ 101 private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration"; 102 103 /** Key of the SAX parser factory system property. */ 104 private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory"; 105 106 /** Key of the XML reader system property. */ 107 private static final String XML_READER_KEY = "org.xml.sax.driver"; 108 109 /** 110 * Prevents instances of this class from being generated.<p> 111 */ 112 private CmsXmlUtils() { 113 114 // noop 115 } 116 117 /** 118 * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p> 119 * 120 * Use this method if it's uncertain if the given arguments are starting or ending with 121 * a slash "/".<p> 122 * 123 * Examples:<br> 124 * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br> 125 * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br> 126 * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p> 127 * 128 * @param prefix the prefix Xpath 129 * @param suffix the suffix Xpath 130 * 131 * @return the concatenated Xpath build from prefix and suffix 132 */ 133 public static String concatXpath(String prefix, String suffix) { 134 135 if (suffix == null) { 136 // ensure suffix is not null 137 suffix = ""; 138 } else { 139 if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { 140 // remove leading '/' form suffix 141 suffix = suffix.substring(1); 142 } 143 } 144 if (prefix != null) { 145 StringBuffer result = new StringBuffer(32); 146 result.append(prefix); 147 if (!CmsResource.isFolder(prefix)) { 148 result.append('/'); 149 } 150 result.append(suffix); 151 return result.toString(); 152 } 153 return suffix; 154 } 155 156 /** 157 * Translates a simple lookup path to the simplified Xpath format used for 158 * the internal bookmarks.<p> 159 * 160 * Examples:<br> 161 * <code>title</code> becomes <code>title[1]</code><br> 162 * <code>title[1]</code> is left untouched<br> 163 * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br> 164 * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p> 165 * 166 * Note: If the name already has the format <code>title[1]</code> then provided index parameter 167 * is ignored.<p> 168 * 169 * @param path the path to get the simplified Xpath for 170 * @param index the index to append (if required) 171 * 172 * @return the simplified Xpath for the given name 173 */ 174 public static String createXpath(String path, int index) { 175 176 if (path.indexOf('/') > -1) { 177 // this is a complex path over more then 1 node 178 StringBuffer result = new StringBuffer(path.length() + 32); 179 180 // split the path into sub elements 181 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 182 int end = elements.size() - 1; 183 for (int i = 0; i <= end; i++) { 184 // append [i] to path element if required 185 result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); 186 if (i < end) { 187 // append path delimiter if not final path element 188 result.append('/'); 189 } 190 } 191 return result.toString(); 192 } 193 194 // this path has only 1 node, append [index] if required 195 return createXpathElementCheck(path, index); 196 } 197 198 /** 199 * Appends the provided index parameter in square brackets to the given name, 200 * like <code>path[index]</code>.<p> 201 * 202 * This method is used if it's clear that some path does not have 203 * a square bracket already appended.<p> 204 * 205 * @param path the path append the index to 206 * @param index the index to append 207 * 208 * @return the simplified Xpath for the given name 209 */ 210 public static String createXpathElement(String path, int index) { 211 212 StringBuffer result = new StringBuffer(path.length() + 5); 213 result.append(path); 214 result.append('['); 215 result.append(index); 216 result.append(']'); 217 return result.toString(); 218 } 219 220 /** 221 * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p> 222 * 223 * This method is used if it's uncertain if some path does have 224 * a square bracket already appended or not.<p> 225 * 226 * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 227 * is ignored.<p> 228 * 229 * @param path the path to get the simplified Xpath for 230 * @param index the index to append (if required) 231 * 232 * @return the simplified Xpath for the given name 233 */ 234 public static String createXpathElementCheck(String path, int index) { 235 236 if (path.charAt(path.length() - 1) == ']') { 237 // path is already in the form "title[1]" 238 // ignore provided index and return the path "as is" 239 return path; 240 } 241 242 // append index in square brackets 243 return createXpathElement(path, index); 244 } 245 246 /** 247 * Returns the first Xpath element from the provided path, 248 * without the index value.<p> 249 * 250 * Examples:<br> 251 * <code>title</code> is left untouched<br> 252 * <code>title[1]</code> becomes <code>title</code><br> 253 * <code>title/subtitle</code> becomes <code>title</code><br> 254 * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p> 255 * 256 * @param path the path to get the first Xpath element from 257 * 258 * @return the first Xpath element from the provided path 259 */ 260 public static String getFirstXpathElement(String path) { 261 262 int pos = path.indexOf('/'); 263 if (pos >= 0) { 264 path = path.substring(0, pos); 265 } 266 267 return CmsXmlUtils.removeXpathIndex(path); 268 } 269 270 /** 271 * Returns the last Xpath element from the provided path, 272 * without the index value.<p> 273 * 274 * Examples:<br> 275 * <code>title</code> is left untouched<br> 276 * <code>title[1]</code> becomes <code>title</code><br> 277 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 278 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p> 279 * 280 * @param path the path to get the last Xpath element from 281 * 282 * @return the last Xpath element from the provided path 283 */ 284 public static String getLastXpathElement(String path) { 285 286 int pos = path.lastIndexOf('/'); 287 if (pos >= 0) { 288 path = path.substring(pos + 1); 289 } 290 291 return CmsXmlUtils.removeXpathIndex(path); 292 } 293 294 /** 295 * Returns the last Xpath index from the given path.<p> 296 * 297 * Examples:<br> 298 * <code>title</code> returns the empty String<p> 299 * <code>title[1]</code> returns <code>[1]</code><p> 300 * <code>title/subtitle</code> returns them empty String<p> 301 * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p> 302 * 303 * @param path the path to extract the Xpath index from 304 * 305 * @return the last Xpath index from the given path 306 */ 307 public static String getXpathIndex(String path) { 308 309 int pos1 = path.lastIndexOf('/'); 310 int pos2 = path.lastIndexOf('['); 311 if ((pos2 < 0) || (pos1 > pos2)) { 312 return ""; 313 } 314 315 return path.substring(pos2); 316 } 317 318 /** 319 * Returns the last Xpath index from the given path as integer.<p> 320 * 321 * Examples:<br> 322 * <code>title</code> returns 1<p> 323 * <code>title[1]</code> returns 1<p> 324 * <code>title/subtitle</code> returns 1<p> 325 * <code>title[1]/subtitle[2]</code> returns 2<p> 326 * 327 * @param path the path to extract the Xpath index from 328 * 329 * @return the last Xpath index from the given path as integer 330 */ 331 public static int getXpathIndexInt(String path) { 332 333 int pos1 = path.lastIndexOf('/'); 334 int pos2 = path.lastIndexOf('['); 335 if ((pos2 < 0) || (pos1 > pos2)) { 336 return 1; 337 } 338 339 String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); 340 try { 341 return Integer.parseInt(idxStr); 342 } catch (NumberFormatException e) { 343 // NOOP 344 } 345 return 1; 346 } 347 348 /** 349 * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p> 350 * This is done for performance improvements only.<p> 351 */ 352 public static void initSystemProperties() { 353 354 String implName; 355 // initialize system properties 356 if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) { 357 implName = SAXParserFactory.newInstance().getClass().getName(); 358 LOG.info("Setting sax parser factory impl property to " + implName); 359 System.setProperty(SAX_PARSER_FACTORY_KEY, implName); 360 } 361 if (System.getProperty(XML_READER_KEY) == null) { 362 SAXReader reader = new SAXReader(); 363 try { 364 implName = reader.getXMLReader().getClass().getName(); 365 LOG.info("Setting xml reader impl property to " + implName); 366 System.setProperty(XML_READER_KEY, implName); 367 } catch (SAXException e) { 368 LOG.error("Error evaluating XMLReader impl.", e); 369 } 370 } 371 if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) { 372 ParserImpl saxParser = new ParserImpl(); 373 implName = saxParser.getConfigImplName(); 374 if (implName != null) { 375 LOG.info("Setting xml parser configuration impl property to " + implName); 376 System.setProperty(SAX_PARSER_CONFIG_KEY, implName); 377 } 378 } 379 } 380 381 /** 382 * Returns <code>true</code> if the given path is a Xpath with 383 * at least 2 elements.<p> 384 * 385 * Examples:<br> 386 * <code>title</code> returns <code>false</code><br> 387 * <code>title[1]</code> returns <code>false</code><br> 388 * <code>title/subtitle</code> returns <code>true</code><br> 389 * <code>title[1]/subtitle[1]</code> returns <code>true</code><p> 390 * 391 * @param path the path to check 392 * @return true if the given path is a Xpath with at least 2 elements 393 */ 394 public static boolean isDeepXpath(String path) { 395 396 return path.indexOf('/') > 0; 397 } 398 399 /** 400 * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p> 401 * 402 * @param document the XML document to marshal 403 * @param out the output stream to write to 404 * @param encoding the encoding to use 405 * @return the output stream with the xml content 406 * @throws CmsXmlException if something goes wrong 407 */ 408 public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { 409 410 try { 411 OutputFormat format = OutputFormat.createPrettyPrint(); 412 format.setEncoding(encoding); 413 414 XMLWriter writer = new XMLWriter(out, format); 415 writer.setEscapeText(false); 416 417 writer.write(document); 418 writer.close(); 419 420 } catch (Exception e) { 421 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 422 } 423 424 return out; 425 } 426 427 /** 428 * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p> 429 * 430 * @param document the XML document to marshal 431 * @param encoding the encoding to use 432 * @return the marshalled XML document 433 * @throws CmsXmlException if something goes wrong 434 */ 435 public static String marshal(Document document, String encoding) throws CmsXmlException { 436 437 ByteArrayOutputStream out = new ByteArrayOutputStream(); 438 marshal(document, out, encoding); 439 try { 440 return out.toString(encoding); 441 } catch (UnsupportedEncodingException e) { 442 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); 443 } 444 } 445 446 /** 447 * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p> 448 * 449 * @param node the XML node to marshal 450 * @param encoding the encoding to use 451 * 452 * @return the string with the xml content 453 * 454 * @throws CmsXmlException if something goes wrong 455 */ 456 public static String marshal(Node node, String encoding) throws CmsXmlException { 457 458 ByteArrayOutputStream out = new ByteArrayOutputStream(); 459 try { 460 OutputFormat format = OutputFormat.createPrettyPrint(); 461 format.setEncoding(encoding); 462 format.setSuppressDeclaration(true); 463 464 XMLWriter writer = new XMLWriter(out, format); 465 writer.setEscapeText(false); 466 467 writer.write(node); 468 writer.close(); 469 } catch (Exception e) { 470 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 471 } 472 return new String(out.toByteArray()); 473 } 474 475 /** 476 * Removes all Xpath indices from the given path.<p> 477 * 478 * Example:<br> 479 * <code>title</code> is left untouched<br> 480 * <code>title[1]</code> becomes <code>title</code><br> 481 * <code>title/subtitle</code> is left untouched<br> 482 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 483 * 484 * @param path the path to remove the Xpath index from 485 * 486 * @return the path with all Xpath indices removed 487 */ 488 public static String removeAllXpathIndices(String path) { 489 490 return path.replaceAll("\\[[0-9]+\\]", ""); 491 } 492 493 /** 494 * Removes the first Xpath element from the path.<p> 495 * 496 * If the provided path does not contain a "/" character, 497 * it is returned unchanged.<p> 498 * 499 * <p>Examples:<br> 500 * <code>title</code> is left untouched<br> 501 * <code>title[1]</code> is left untouched<br> 502 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 503 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 504 * 505 * @param path the Xpath to remove the first element from 506 * 507 * @return the path with the first element removed 508 */ 509 public static String removeFirstXpathElement(String path) { 510 511 int pos = path.indexOf('/'); 512 if (pos < 0) { 513 return path; 514 } 515 516 return path.substring(pos + 1); 517 } 518 519 /** 520 * Removes the last complex Xpath element from the path.<p> 521 * 522 * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. 523 * 524 * <p>Example:<br> 525 * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p> 526 * 527 * @param path the Xpath to remove the last element from 528 * 529 * @return the path with the last element removed 530 */ 531 public static String removeLastComplexXpathElement(String path) { 532 533 int pos = path.lastIndexOf('/'); 534 if (pos < 0) { 535 return path; 536 } 537 // count ' chars 538 int p = pos; 539 int count = -1; 540 while (p > 0) { 541 count++; 542 p = path.indexOf("\'", p + 1); 543 } 544 String parentPath = path.substring(0, pos); 545 if ((count % 2) == 0) { 546 // if substring is complete 547 return parentPath; 548 } 549 // if not complete 550 p = parentPath.lastIndexOf("'"); 551 if (p >= 0) { 552 // complete it if possible 553 return removeLastComplexXpathElement(parentPath.substring(0, p)); 554 } 555 return parentPath; 556 } 557 558 /** 559 * Removes the last Xpath element from the path.<p> 560 * 561 * If the provided path does not contain a "/" character, 562 * it is returned unchanged.<p> 563 * 564 * <p>Examples:<br> 565 * <code>title</code> is left untouched<br> 566 * <code>title[1]</code> is left untouched<br> 567 * <code>title/subtitle</code> becomes <code>title</code><br> 568 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p> 569 * 570 * @param path the Xpath to remove the last element from 571 * 572 * @return the path with the last element removed 573 */ 574 public static String removeLastXpathElement(String path) { 575 576 int pos = path.lastIndexOf('/'); 577 if (pos < 0) { 578 return path; 579 } 580 581 return path.substring(0, pos); 582 } 583 584 /** 585 * Removes all Xpath index information from the given input path.<p> 586 * 587 * Examples:<br> 588 * <code>title</code> is left untouched<br> 589 * <code>title[1]</code> becomes <code>title</code><br> 590 * <code>title/subtitle</code> is left untouched<br> 591 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 592 * 593 * @param path the path to remove the Xpath index information from 594 * 595 * @return the simplified Xpath for the given name 596 */ 597 public static String removeXpath(String path) { 598 599 if (path.indexOf('/') > -1) { 600 // this is a complex path over more then 1 node 601 StringBuffer result = new StringBuffer(path.length() + 32); 602 603 // split the path into sub-elements 604 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 605 int end = elements.size() - 1; 606 for (int i = 0; i <= end; i++) { 607 // remove [i] from path element if required 608 result.append(removeXpathIndex(elements.get(i))); 609 if (i < end) { 610 // append path delimiter if not final path element 611 result.append('/'); 612 } 613 } 614 return result.toString(); 615 } 616 617 // this path has only 1 node, remove last index if required 618 return removeXpathIndex(path); 619 } 620 621 /** 622 * Removes the last Xpath index from the given path.<p> 623 * 624 * Examples:<br> 625 * <code>title</code> is left untouched<br> 626 * <code>title[1]</code> becomes <code>title</code><br> 627 * <code>title/subtitle</code> is left untouched<br> 628 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p> 629 * 630 * @param path the path to remove the Xpath index from 631 * 632 * @return the path with the last Xpath index removed 633 */ 634 public static String removeXpathIndex(String path) { 635 636 int pos1 = path.lastIndexOf('/'); 637 int pos2 = path.lastIndexOf('['); 638 if ((pos2 < 0) || (pos1 > pos2)) { 639 return path; 640 } 641 642 return path.substring(0, pos2); 643 } 644 645 /** 646 * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 647 * 648 * Examples:<br> 649 * <code>title/</code> becomes <code>title</code><br> 650 * <code>/title[1]/</code> becomes <code>title[1]</code><br> 651 * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br> 652 * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p> 653 * 654 * @param path the path to process 655 * @return the input with a leading and a trailing slash removed 656 */ 657 public static String simplifyXpath(String path) { 658 659 StringBuffer result = new StringBuffer(path); 660 if (result.charAt(0) == '/') { 661 result.deleteCharAt(0); 662 } 663 int pos = result.length() - 1; 664 if (result.charAt(pos) == '/') { 665 result.deleteCharAt(pos); 666 } 667 return result.toString(); 668 } 669 670 /** 671 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 672 * 673 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 674 * 675 * @param xmlData the XML data in a byte array 676 * @param resolver the XML entity resolver to use 677 * 678 * @return the base object initialized with the unmarshalled XML document 679 * 680 * @throws CmsXmlException if something goes wrong 681 * 682 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 683 */ 684 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 685 686 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); 687 } 688 689 /** 690 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 691 * 692 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 693 * 694 * @param xmlData the XML data in a byte array 695 * @param resolver the XML entity resolver to use 696 * @param validate if the reader should try to validate the xml code 697 * 698 * @return the base object initialized with the unmarshalled XML document 699 * 700 * @throws CmsXmlException if something goes wrong 701 * 702 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 703 */ 704 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) 705 throws CmsXmlException { 706 707 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); 708 } 709 710 /** 711 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 712 * 713 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 714 * 715 * Important: The encoding provided will NOT be used during unmarshalling, 716 * the XML parser will do this on the base of the information in the source String. 717 * The encoding is used for initializing the created instance of the document, 718 * which means it will be used when marshalling the document again later.<p> 719 * 720 * @param source the XML input source to use 721 * @param resolver the XML entity resolver to use 722 * 723 * @return the unmarshalled XML document 724 * 725 * @throws CmsXmlException if something goes wrong 726 */ 727 public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { 728 729 return unmarshalHelper(source, resolver, false); 730 } 731 732 /** 733 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 734 * 735 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 736 * 737 * Important: The encoding provided will NOT be used during unmarshalling, 738 * the XML parser will do this on the base of the information in the source String. 739 * The encoding is used for initializing the created instance of the document, 740 * which means it will be used when marshalling the document again later.<p> 741 * 742 * @param source the XML input source to use 743 * @param resolver the XML entity resolver to use 744 * @param validate if the reader should try to validate the xml code 745 * 746 * @return the unmarshalled XML document 747 * 748 * @throws CmsXmlException if something goes wrong 749 */ 750 public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) 751 throws CmsXmlException { 752 753 try { 754 SAXReader reader = new SAXReader(); 755 if (resolver != null) { 756 reader.setEntityResolver(resolver); 757 } 758 reader.setMergeAdjacentText(true); 759 reader.setStripWhitespaceText(true); 760 if (!validate) { 761 reader.setValidation(false); 762 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 763 } 764 return reader.read(source); 765 } catch (DocumentException e) { 766 throw new CmsXmlException( 767 Messages.get().container( 768 Messages.ERR_UNMARSHALLING_XML_DOC_1, 769 "(systemId = " + source.getSystemId() + ")"), 770 e); 771 } catch (SAXException e) { 772 throw new CmsXmlException( 773 Messages.get().container( 774 Messages.ERR_UNMARSHALLING_XML_DOC_1, 775 "(systemId = " + source.getSystemId() + ")"), 776 e); 777 } 778 } 779 780 /** 781 * Helper to unmarshal (read) xml contents from a String into a document.<p> 782 * 783 * Using this method ensures that the OpenCms XML entitiy resolver is used.<p> 784 * 785 * @param xmlData the xml data in a String 786 * @param resolver the XML entity resolver to use 787 * @return the base object initialized with the unmarshalled XML document 788 * @throws CmsXmlException if something goes wrong 789 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 790 */ 791 public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { 792 793 return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); 794 } 795 796 /** 797 * Validates the structure of a XML document contained in a byte array 798 * with the DTD or XML schema used by the document.<p> 799 * 800 * @param xmlData a byte array containing a XML document that should be validated 801 * @param resolver the XML entity resolver to use 802 * 803 * @throws CmsXmlException if the validation fails 804 */ 805 public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 806 807 validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); 808 } 809 810 /** 811 * Validates the structure of a XML document with the DTD or XML schema used 812 * by the document.<p> 813 * 814 * @param document a XML document that should be validated 815 * @param encoding the encoding to use when marshalling the XML document (required) 816 * @param resolver the XML entity resolver to use 817 * 818 * @throws CmsXmlException if the validation fails 819 */ 820 public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) 821 throws CmsXmlException { 822 823 // generate bytes from document 824 byte[] xmlData = ((ByteArrayOutputStream)marshal( 825 document, 826 new ByteArrayOutputStream(512), 827 encoding)).toByteArray(); 828 validateXmlStructure(xmlData, resolver); 829 } 830 831 /** 832 * Validates the structure of a XML document contained in a byte array 833 * with the DTD or XML schema used by the document.<p> 834 * 835 * @param xmlStream a source providing a XML document that should be validated 836 * @param resolver the XML entity resolver to use 837 * 838 * @throws CmsXmlException if the validation fails 839 */ 840 public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { 841 842 XMLReader reader; 843 try { 844 reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); 845 } catch (SAXException e) { 846 // xerces parser not available - no schema validation possible 847 if (LOG.isWarnEnabled()) { 848 LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); 849 } 850 // no validation of the content is possible 851 return; 852 } 853 // turn on validation 854 try { 855 reader.setFeature("http://xml.org/sax/features/validation", true); 856 // turn on schema validation 857 reader.setFeature("http://apache.org/xml/features/validation/schema", true); 858 // configure namespace support 859 reader.setFeature("http://xml.org/sax/features/namespaces", true); 860 reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); 861 } catch (SAXNotRecognizedException e) { 862 // should not happen as Xerces 2 support this feature 863 if (LOG.isWarnEnabled()) { 864 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); 865 } 866 // no validation of the content is possible 867 return; 868 } catch (SAXNotSupportedException e) { 869 // should not happen as Xerces 2 support this feature 870 if (LOG.isWarnEnabled()) { 871 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); 872 } 873 // no validation of the content is possible 874 return; 875 } 876 877 // add an error handler which turns any errors into XML 878 CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); 879 reader.setErrorHandler(errorHandler); 880 881 if (resolver != null) { 882 // set the resolver for the "opencms://" URIs 883 reader.setEntityResolver(resolver); 884 } 885 886 try { 887 reader.parse(new InputSource(xmlStream)); 888 } catch (IOException e) { 889 // should not happen since we read form a byte array 890 if (LOG.isErrorEnabled()) { 891 LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); 892 } 893 return; 894 } catch (SAXException e) { 895 // should not happen since all errors are handled in the XML error handler 896 if (LOG.isErrorEnabled()) { 897 LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); 898 } 899 return; 900 } 901 902 if (errorHandler.getErrors().elements().size() > 0) { 903 // there was at last one validation error, so throw an exception 904 StringWriter out = new StringWriter(256); 905 OutputFormat format = OutputFormat.createPrettyPrint(); 906 XMLWriter writer = new XMLWriter(out, format); 907 try { 908 writer.write(errorHandler.getErrors()); 909 writer.write(errorHandler.getWarnings()); 910 writer.close(); 911 } catch (IOException e) { 912 // should not happen since we write to a StringWriter 913 if (LOG.isErrorEnabled()) { 914 LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); 915 } 916 } 917 // generate String from XML for display of document in error message 918 throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); 919 } 920 } 921}