001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.file.CmsResource; 031import org.opencms.main.CmsLog; 032import org.opencms.util.CmsStringUtil; 033 034import java.io.ByteArrayInputStream; 035import java.io.ByteArrayOutputStream; 036import java.io.IOException; 037import java.io.InputStream; 038import java.io.OutputStream; 039import java.io.StringReader; 040import java.io.StringWriter; 041import java.io.UnsupportedEncodingException; 042import java.util.List; 043 044import org.apache.commons.logging.Log; 045 046import org.dom4j.Document; 047import org.dom4j.DocumentException; 048import org.dom4j.Node; 049import org.dom4j.io.OutputFormat; 050import org.dom4j.io.SAXReader; 051import org.dom4j.io.XMLWriter; 052import org.xml.sax.EntityResolver; 053import org.xml.sax.InputSource; 054import org.xml.sax.SAXException; 055import org.xml.sax.SAXNotRecognizedException; 056import org.xml.sax.SAXNotSupportedException; 057import org.xml.sax.XMLReader; 058import org.xml.sax.helpers.XMLReaderFactory; 059 060/** 061 * Provides some basic XML handling utilities.<p> 062 * 063 * @since 6.0.0 064 */ 065public final class CmsXmlUtils { 066 067 /** The log object for this class. */ 068 private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); 069 070 /** 071 * Prevents instances of this class from being generated.<p> 072 */ 073 private CmsXmlUtils() { 074 075 // noop 076 } 077 078 /** 079 * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p> 080 * 081 * Use this method if it's uncertain if the given arguments are starting or ending with 082 * a slash "/".<p> 083 * 084 * Examples:<br> 085 * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br> 086 * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br> 087 * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p> 088 * 089 * @param prefix the prefix Xpath 090 * @param suffix the suffix Xpath 091 * 092 * @return the concatenated Xpath build from prefix and suffix 093 */ 094 public static String concatXpath(String prefix, String suffix) { 095 096 if (suffix == null) { 097 // ensure suffix is not null 098 suffix = ""; 099 } else { 100 if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { 101 // remove leading '/' form suffix 102 suffix = suffix.substring(1); 103 } 104 } 105 if (prefix != null) { 106 StringBuffer result = new StringBuffer(32); 107 result.append(prefix); 108 if (!CmsResource.isFolder(prefix)) { 109 result.append('/'); 110 } 111 result.append(suffix); 112 return result.toString(); 113 } 114 return suffix; 115 } 116 117 /** 118 * Translates a simple lookup path to the simplified Xpath format used for 119 * the internal bookmarks.<p> 120 * 121 * Examples:<br> 122 * <code>title</code> becomes <code>title[1]</code><br> 123 * <code>title[1]</code> is left untouched<br> 124 * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br> 125 * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p> 126 * 127 * Note: If the name already has the format <code>title[1]</code> then provided index parameter 128 * is ignored.<p> 129 * 130 * @param path the path to get the simplified Xpath for 131 * @param index the index to append (if required) 132 * 133 * @return the simplified Xpath for the given name 134 */ 135 public static String createXpath(String path, int index) { 136 137 if (path.indexOf('/') > -1) { 138 // this is a complex path over more then 1 node 139 StringBuffer result = new StringBuffer(path.length() + 32); 140 141 // split the path into sub elements 142 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 143 int end = elements.size() - 1; 144 for (int i = 0; i <= end; i++) { 145 // append [i] to path element if required 146 result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); 147 if (i < end) { 148 // append path delimiter if not final path element 149 result.append('/'); 150 } 151 } 152 return result.toString(); 153 } 154 155 // this path has only 1 node, append [index] if required 156 return createXpathElementCheck(path, index); 157 } 158 159 /** 160 * Appends the provided index parameter in square brackets to the given name, 161 * like <code>path[index]</code>.<p> 162 * 163 * This method is used if it's clear that some path does not have 164 * a square bracket already appended.<p> 165 * 166 * @param path the path append the index to 167 * @param index the index to append 168 * 169 * @return the simplified Xpath for the given name 170 */ 171 public static String createXpathElement(String path, int index) { 172 173 StringBuffer result = new StringBuffer(path.length() + 5); 174 result.append(path); 175 result.append('['); 176 result.append(index); 177 result.append(']'); 178 return result.toString(); 179 } 180 181 /** 182 * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p> 183 * 184 * This method is used if it's uncertain if some path does have 185 * a square bracket already appended or not.<p> 186 * 187 * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 188 * is ignored.<p> 189 * 190 * @param path the path to get the simplified Xpath for 191 * @param index the index to append (if required) 192 * 193 * @return the simplified Xpath for the given name 194 */ 195 public static String createXpathElementCheck(String path, int index) { 196 197 if (path.charAt(path.length() - 1) == ']') { 198 // path is already in the form "title[1]" 199 // ignore provided index and return the path "as is" 200 return path; 201 } 202 203 // append index in square brackets 204 return createXpathElement(path, index); 205 } 206 207 /** 208 * Returns the first Xpath element from the provided path, 209 * without the index value.<p> 210 * 211 * Examples:<br> 212 * <code>title</code> is left untouched<br> 213 * <code>title[1]</code> becomes <code>title</code><br> 214 * <code>title/subtitle</code> becomes <code>title</code><br> 215 * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p> 216 * 217 * @param path the path to get the first Xpath element from 218 * 219 * @return the first Xpath element from the provided path 220 */ 221 public static String getFirstXpathElement(String path) { 222 223 int pos = path.indexOf('/'); 224 if (pos >= 0) { 225 path = path.substring(0, pos); 226 } 227 228 return CmsXmlUtils.removeXpathIndex(path); 229 } 230 231 /** 232 * Returns the last Xpath element from the provided path, 233 * without the index value.<p> 234 * 235 * Examples:<br> 236 * <code>title</code> is left untouched<br> 237 * <code>title[1]</code> becomes <code>title</code><br> 238 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 239 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p> 240 * 241 * @param path the path to get the last Xpath element from 242 * 243 * @return the last Xpath element from the provided path 244 */ 245 public static String getLastXpathElement(String path) { 246 247 int pos = path.lastIndexOf('/'); 248 if (pos >= 0) { 249 path = path.substring(pos + 1); 250 } 251 252 return CmsXmlUtils.removeXpathIndex(path); 253 } 254 255 /** 256 * Returns the last Xpath index from the given path.<p> 257 * 258 * Examples:<br> 259 * <code>title</code> returns the empty String<p> 260 * <code>title[1]</code> returns <code>[1]</code><p> 261 * <code>title/subtitle</code> returns them empty String<p> 262 * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p> 263 * 264 * @param path the path to extract the Xpath index from 265 * 266 * @return the last Xpath index from the given path 267 */ 268 public static String getXpathIndex(String path) { 269 270 int pos1 = path.lastIndexOf('/'); 271 int pos2 = path.lastIndexOf('['); 272 if ((pos2 < 0) || (pos1 > pos2)) { 273 return ""; 274 } 275 276 return path.substring(pos2); 277 } 278 279 /** 280 * Returns the last Xpath index from the given path as integer.<p> 281 * 282 * Examples:<br> 283 * <code>title</code> returns 1<p> 284 * <code>title[1]</code> returns 1<p> 285 * <code>title/subtitle</code> returns 1<p> 286 * <code>title[1]/subtitle[2]</code> returns 2<p> 287 * 288 * @param path the path to extract the Xpath index from 289 * 290 * @return the last Xpath index from the given path as integer 291 */ 292 public static int getXpathIndexInt(String path) { 293 294 int pos1 = path.lastIndexOf('/'); 295 int pos2 = path.lastIndexOf('['); 296 if ((pos2 < 0) || (pos1 > pos2)) { 297 return 1; 298 } 299 300 String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); 301 try { 302 return Integer.parseInt(idxStr); 303 } catch (NumberFormatException e) { 304 // NOOP 305 } 306 return 1; 307 } 308 309 /** 310 * Returns <code>true</code> if the given path is a Xpath with 311 * at least 2 elements.<p> 312 * 313 * Examples:<br> 314 * <code>title</code> returns <code>false</code><br> 315 * <code>title[1]</code> returns <code>false</code><br> 316 * <code>title/subtitle</code> returns <code>true</code><br> 317 * <code>title[1]/subtitle[1]</code> returns <code>true</code><p> 318 * 319 * @param path the path to check 320 * @return true if the given path is a Xpath with at least 2 elements 321 */ 322 public static boolean isDeepXpath(String path) { 323 324 return path.indexOf('/') > 0; 325 } 326 327 /** 328 * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p> 329 * 330 * @param document the XML document to marshal 331 * @param out the output stream to write to 332 * @param encoding the encoding to use 333 * @return the output stream with the xml content 334 * @throws CmsXmlException if something goes wrong 335 */ 336 public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { 337 338 try { 339 OutputFormat format = OutputFormat.createPrettyPrint(); 340 format.setEncoding(encoding); 341 342 XMLWriter writer = new XMLWriter(out, format); 343 writer.setEscapeText(false); 344 345 writer.write(document); 346 writer.close(); 347 348 } catch (Exception e) { 349 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 350 } 351 352 return out; 353 } 354 355 /** 356 * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p> 357 * 358 * @param document the XML document to marshal 359 * @param encoding the encoding to use 360 * @return the marshalled XML document 361 * @throws CmsXmlException if something goes wrong 362 */ 363 public static String marshal(Document document, String encoding) throws CmsXmlException { 364 365 ByteArrayOutputStream out = new ByteArrayOutputStream(); 366 marshal(document, out, encoding); 367 try { 368 return out.toString(encoding); 369 } catch (UnsupportedEncodingException e) { 370 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); 371 } 372 } 373 374 /** 375 * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p> 376 * 377 * @param node the XML node to marshal 378 * @param encoding the encoding to use 379 * 380 * @return the string with the xml content 381 * 382 * @throws CmsXmlException if something goes wrong 383 */ 384 public static String marshal(Node node, String encoding) throws CmsXmlException { 385 386 ByteArrayOutputStream out = new ByteArrayOutputStream(); 387 try { 388 OutputFormat format = OutputFormat.createPrettyPrint(); 389 format.setEncoding(encoding); 390 format.setSuppressDeclaration(true); 391 392 XMLWriter writer = new XMLWriter(out, format); 393 writer.setEscapeText(false); 394 395 writer.write(node); 396 writer.close(); 397 } catch (Exception e) { 398 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 399 } 400 return new String(out.toByteArray()); 401 } 402 403 /** 404 * Removes all Xpath indices from the given path.<p> 405 * 406 * Example:<br> 407 * <code>title</code> is left untouched<br> 408 * <code>title[1]</code> becomes <code>title</code><br> 409 * <code>title/subtitle</code> is left untouched<br> 410 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 411 * 412 * @param path the path to remove the Xpath index from 413 * 414 * @return the path with all Xpath indices removed 415 */ 416 public static String removeAllXpathIndices(String path) { 417 418 return path.replaceAll("\\[[0-9]+\\]", ""); 419 } 420 421 /** 422 * Removes the first Xpath element from the path.<p> 423 * 424 * If the provided path does not contain a "/" character, 425 * it is returned unchanged.<p> 426 * 427 * <p>Examples:<br> 428 * <code>title</code> is left untouched<br> 429 * <code>title[1]</code> is left untouched<br> 430 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 431 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 432 * 433 * @param path the Xpath to remove the first element from 434 * 435 * @return the path with the first element removed 436 */ 437 public static String removeFirstXpathElement(String path) { 438 439 int pos = path.indexOf('/'); 440 if (pos < 0) { 441 return path; 442 } 443 444 return path.substring(pos + 1); 445 } 446 447 /** 448 * Removes the last complex Xpath element from the path.<p> 449 * 450 * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. 451 * 452 * <p>Example:<br> 453 * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p> 454 * 455 * @param path the Xpath to remove the last element from 456 * 457 * @return the path with the last element removed 458 */ 459 public static String removeLastComplexXpathElement(String path) { 460 461 int pos = path.lastIndexOf('/'); 462 if (pos < 0) { 463 return path; 464 } 465 // count ' chars 466 int p = pos; 467 int count = -1; 468 while (p > 0) { 469 count++; 470 p = path.indexOf("\'", p + 1); 471 } 472 String parentPath = path.substring(0, pos); 473 if ((count % 2) == 0) { 474 // if substring is complete 475 return parentPath; 476 } 477 // if not complete 478 p = parentPath.lastIndexOf("'"); 479 if (p >= 0) { 480 // complete it if possible 481 return removeLastComplexXpathElement(parentPath.substring(0, p)); 482 } 483 return parentPath; 484 } 485 486 /** 487 * Removes the last Xpath element from the path.<p> 488 * 489 * If the provided path does not contain a "/" character, 490 * it is returned unchanged.<p> 491 * 492 * <p>Examples:<br> 493 * <code>title</code> is left untouched<br> 494 * <code>title[1]</code> is left untouched<br> 495 * <code>title/subtitle</code> becomes <code>title</code><br> 496 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p> 497 * 498 * @param path the Xpath to remove the last element from 499 * 500 * @return the path with the last element removed 501 */ 502 public static String removeLastXpathElement(String path) { 503 504 int pos = path.lastIndexOf('/'); 505 if (pos < 0) { 506 return path; 507 } 508 509 return path.substring(0, pos); 510 } 511 512 /** 513 * Removes all Xpath index information from the given input path.<p> 514 * 515 * Examples:<br> 516 * <code>title</code> is left untouched<br> 517 * <code>title[1]</code> becomes <code>title</code><br> 518 * <code>title/subtitle</code> is left untouched<br> 519 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 520 * 521 * @param path the path to remove the Xpath index information from 522 * 523 * @return the simplified Xpath for the given name 524 */ 525 public static String removeXpath(String path) { 526 527 if (path.indexOf('/') > -1) { 528 // this is a complex path over more then 1 node 529 StringBuffer result = new StringBuffer(path.length() + 32); 530 531 // split the path into sub-elements 532 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 533 int end = elements.size() - 1; 534 for (int i = 0; i <= end; i++) { 535 // remove [i] from path element if required 536 result.append(removeXpathIndex(elements.get(i))); 537 if (i < end) { 538 // append path delimiter if not final path element 539 result.append('/'); 540 } 541 } 542 return result.toString(); 543 } 544 545 // this path has only 1 node, remove last index if required 546 return removeXpathIndex(path); 547 } 548 549 /** 550 * Removes the last Xpath index from the given path.<p> 551 * 552 * Examples:<br> 553 * <code>title</code> is left untouched<br> 554 * <code>title[1]</code> becomes <code>title</code><br> 555 * <code>title/subtitle</code> is left untouched<br> 556 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p> 557 * 558 * @param path the path to remove the Xpath index from 559 * 560 * @return the path with the last Xpath index removed 561 */ 562 public static String removeXpathIndex(String path) { 563 564 int pos1 = path.lastIndexOf('/'); 565 int pos2 = path.lastIndexOf('['); 566 if ((pos2 < 0) || (pos1 > pos2)) { 567 return path; 568 } 569 570 return path.substring(0, pos2); 571 } 572 573 /** 574 * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 575 * 576 * Examples:<br> 577 * <code>title/</code> becomes <code>title</code><br> 578 * <code>/title[1]/</code> becomes <code>title[1]</code><br> 579 * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br> 580 * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p> 581 * 582 * @param path the path to process 583 * @return the input with a leading and a trailing slash removed 584 */ 585 public static String simplifyXpath(String path) { 586 587 StringBuffer result = new StringBuffer(path); 588 if (result.charAt(0) == '/') { 589 result.deleteCharAt(0); 590 } 591 int pos = result.length() - 1; 592 if (result.charAt(pos) == '/') { 593 result.deleteCharAt(pos); 594 } 595 return result.toString(); 596 } 597 598 /** 599 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 600 * 601 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 602 * 603 * @param xmlData the XML data in a byte array 604 * @param resolver the XML entity resolver to use 605 * 606 * @return the base object initialized with the unmarshalled XML document 607 * 608 * @throws CmsXmlException if something goes wrong 609 * 610 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 611 */ 612 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 613 614 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); 615 } 616 617 /** 618 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 619 * 620 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 621 * 622 * @param xmlData the XML data in a byte array 623 * @param resolver the XML entity resolver to use 624 * @param validate if the reader should try to validate the xml code 625 * 626 * @return the base object initialized with the unmarshalled XML document 627 * 628 * @throws CmsXmlException if something goes wrong 629 * 630 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 631 */ 632 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) 633 throws CmsXmlException { 634 635 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); 636 } 637 638 /** 639 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 640 * 641 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 642 * 643 * Important: The encoding provided will NOT be used during unmarshalling, 644 * the XML parser will do this on the base of the information in the source String. 645 * The encoding is used for initializing the created instance of the document, 646 * which means it will be used when marshalling the document again later.<p> 647 * 648 * @param source the XML input source to use 649 * @param resolver the XML entity resolver to use 650 * 651 * @return the unmarshalled XML document 652 * 653 * @throws CmsXmlException if something goes wrong 654 */ 655 public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { 656 657 return unmarshalHelper(source, resolver, false); 658 } 659 660 /** 661 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 662 * 663 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 664 * 665 * Important: The encoding provided will NOT be used during unmarshalling, 666 * the XML parser will do this on the base of the information in the source String. 667 * The encoding is used for initializing the created instance of the document, 668 * which means it will be used when marshalling the document again later.<p> 669 * 670 * @param source the XML input source to use 671 * @param resolver the XML entity resolver to use 672 * @param validate if the reader should try to validate the xml code 673 * 674 * @return the unmarshalled XML document 675 * 676 * @throws CmsXmlException if something goes wrong 677 */ 678 public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) 679 throws CmsXmlException { 680 681 try { 682 SAXReader reader = new SAXReader(); 683 if (resolver != null) { 684 reader.setEntityResolver(resolver); 685 } 686 reader.setMergeAdjacentText(true); 687 reader.setStripWhitespaceText(true); 688 if (!validate) { 689 reader.setValidation(false); 690 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 691 } 692 return reader.read(source); 693 } catch (DocumentException e) { 694 throw new CmsXmlException( 695 Messages.get().container( 696 Messages.ERR_UNMARSHALLING_XML_DOC_1, 697 "(systemId = " + source.getSystemId() + ")"), 698 e); 699 } catch (SAXException e) { 700 throw new CmsXmlException( 701 Messages.get().container( 702 Messages.ERR_UNMARSHALLING_XML_DOC_1, 703 "(systemId = " + source.getSystemId() + ")"), 704 e); 705 } 706 } 707 708 /** 709 * Helper to unmarshal (read) xml contents from a String into a document.<p> 710 * 711 * Using this method ensures that the OpenCms XML entitiy resolver is used.<p> 712 * 713 * @param xmlData the xml data in a String 714 * @param resolver the XML entity resolver to use 715 * @return the base object initialized with the unmarshalled XML document 716 * @throws CmsXmlException if something goes wrong 717 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 718 */ 719 public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { 720 721 return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); 722 } 723 724 /** 725 * Validates the structure of a XML document contained in a byte array 726 * with the DTD or XML schema used by the document.<p> 727 * 728 * @param xmlData a byte array containing a XML document that should be validated 729 * @param resolver the XML entity resolver to use 730 * 731 * @throws CmsXmlException if the validation fails 732 */ 733 public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 734 735 validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); 736 } 737 738 /** 739 * Validates the structure of a XML document with the DTD or XML schema used 740 * by the document.<p> 741 * 742 * @param document a XML document that should be validated 743 * @param encoding the encoding to use when marshalling the XML document (required) 744 * @param resolver the XML entity resolver to use 745 * 746 * @throws CmsXmlException if the validation fails 747 */ 748 public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) 749 throws CmsXmlException { 750 751 // generate bytes from document 752 byte[] xmlData = ((ByteArrayOutputStream)marshal( 753 document, 754 new ByteArrayOutputStream(512), 755 encoding)).toByteArray(); 756 validateXmlStructure(xmlData, resolver); 757 } 758 759 /** 760 * Validates the structure of a XML document contained in a byte array 761 * with the DTD or XML schema used by the document.<p> 762 * 763 * @param xmlStream a source providing a XML document that should be validated 764 * @param resolver the XML entity resolver to use 765 * 766 * @throws CmsXmlException if the validation fails 767 */ 768 public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { 769 770 XMLReader reader; 771 try { 772 reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); 773 } catch (SAXException e) { 774 // xerces parser not available - no schema validation possible 775 if (LOG.isWarnEnabled()) { 776 LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); 777 } 778 // no validation of the content is possible 779 return; 780 } 781 // turn on validation 782 try { 783 reader.setFeature("http://xml.org/sax/features/validation", true); 784 // turn on schema validation 785 reader.setFeature("http://apache.org/xml/features/validation/schema", true); 786 // configure namespace support 787 reader.setFeature("http://xml.org/sax/features/namespaces", true); 788 reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); 789 } catch (SAXNotRecognizedException e) { 790 // should not happen as Xerces 2 support this feature 791 if (LOG.isWarnEnabled()) { 792 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); 793 } 794 // no validation of the content is possible 795 return; 796 } catch (SAXNotSupportedException e) { 797 // should not happen as Xerces 2 support this feature 798 if (LOG.isWarnEnabled()) { 799 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); 800 } 801 // no validation of the content is possible 802 return; 803 } 804 805 // add an error handler which turns any errors into XML 806 CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); 807 reader.setErrorHandler(errorHandler); 808 809 if (resolver != null) { 810 // set the resolver for the "opencms://" URIs 811 reader.setEntityResolver(resolver); 812 } 813 814 try { 815 reader.parse(new InputSource(xmlStream)); 816 } catch (IOException e) { 817 // should not happen since we read form a byte array 818 if (LOG.isErrorEnabled()) { 819 LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); 820 } 821 return; 822 } catch (SAXException e) { 823 // should not happen since all errors are handled in the XML error handler 824 if (LOG.isErrorEnabled()) { 825 LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); 826 } 827 return; 828 } 829 830 if (errorHandler.getErrors().elements().size() > 0) { 831 // there was at last one validation error, so throw an exception 832 StringWriter out = new StringWriter(256); 833 OutputFormat format = OutputFormat.createPrettyPrint(); 834 XMLWriter writer = new XMLWriter(out, format); 835 try { 836 writer.write(errorHandler.getErrors()); 837 writer.write(errorHandler.getWarnings()); 838 writer.close(); 839 } catch (IOException e) { 840 // should not happen since we write to a StringWriter 841 if (LOG.isErrorEnabled()) { 842 LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); 843 } 844 } 845 // generate String from XML for display of document in error message 846 throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); 847 } 848 } 849}