001/* 002 * This library is part of OpenCms - 003 * the Open Source Content Management System 004 * 005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com) 006 * 007 * This library is free software; you can redistribute it and/or 008 * modify it under the terms of the GNU Lesser General Public 009 * License as published by the Free Software Foundation; either 010 * version 2.1 of the License, or (at your option) any later version. 011 * 012 * This library is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015 * Lesser General Public License for more details. 016 * 017 * For further information about Alkacon Software GmbH, please see the 018 * company website: http://www.alkacon.com 019 * 020 * For further information about OpenCms, please see the 021 * project website: http://www.opencms.org 022 * 023 * You should have received a copy of the GNU Lesser General Public 024 * License along with this library; if not, write to the Free Software 025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 026 */ 027 028package org.opencms.xml; 029 030import org.opencms.file.CmsResource; 031import org.opencms.main.CmsLog; 032import org.opencms.util.CmsStringUtil; 033 034import java.io.ByteArrayInputStream; 035import java.io.ByteArrayOutputStream; 036import java.io.IOException; 037import java.io.InputStream; 038import java.io.OutputStream; 039import java.io.StringReader; 040import java.io.StringWriter; 041import java.io.UnsupportedEncodingException; 042import java.util.List; 043 044import org.apache.commons.logging.Log; 045 046import org.dom4j.Document; 047import org.dom4j.DocumentException; 048import org.dom4j.Node; 049import org.dom4j.io.OutputFormat; 050import org.dom4j.io.SAXReader; 051import org.dom4j.io.XMLWriter; 052import org.xml.sax.EntityResolver; 053import org.xml.sax.InputSource; 054import org.xml.sax.SAXException; 055import org.xml.sax.SAXNotRecognizedException; 056import org.xml.sax.SAXNotSupportedException; 057import org.xml.sax.XMLReader; 058import org.xml.sax.helpers.XMLReaderFactory; 059 060/** 061 * Provides some basic XML handling utilities.<p> 062 * 063 * @since 6.0.0 064 */ 065public final class CmsXmlUtils { 066 067 /** The log object for this class. */ 068 private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class); 069 070 /** 071 * Prevents instances of this class from being generated.<p> 072 */ 073 private CmsXmlUtils() { 074 075 // noop 076 } 077 078 /** 079 * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p> 080 * 081 * Use this method if it's uncertain if the given arguments are starting or ending with 082 * a slash "/".<p> 083 * 084 * Examples:<br> 085 * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br> 086 * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br> 087 * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p> 088 * 089 * @param prefix the prefix Xpath 090 * @param suffix the suffix Xpath 091 * 092 * @return the concatenated Xpath build from prefix and suffix 093 */ 094 public static String concatXpath(String prefix, String suffix) { 095 096 if (suffix == null) { 097 // ensure suffix is not null 098 suffix = ""; 099 } else { 100 if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) { 101 // remove leading '/' form suffix 102 suffix = suffix.substring(1); 103 } 104 } 105 if (prefix != null) { 106 StringBuffer result = new StringBuffer(32); 107 result.append(prefix); 108 if (!CmsResource.isFolder(prefix)) { 109 result.append('/'); 110 } 111 result.append(suffix); 112 return result.toString(); 113 } 114 return suffix; 115 } 116 117 /** 118 * Translates a simple lookup path to the simplified Xpath format used for 119 * the internal bookmarks.<p> 120 * 121 * Examples:<br> 122 * <code>title</code> becomes <code>title[1]</code><br> 123 * <code>title[1]</code> is left untouched<br> 124 * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br> 125 * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p> 126 * 127 * Note: If the name already has the format <code>title[1]</code> then provided index parameter 128 * is ignored.<p> 129 * 130 * @param path the path to get the simplified Xpath for 131 * @param index the index to append (if required) 132 * 133 * @return the simplified Xpath for the given name 134 */ 135 public static String createXpath(String path, int index) { 136 137 if (path.indexOf('/') > -1) { 138 // this is a complex path over more then 1 node 139 StringBuffer result = new StringBuffer(path.length() + 32); 140 141 // split the path into sub elements 142 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 143 int end = elements.size() - 1; 144 for (int i = 0; i <= end; i++) { 145 // append [i] to path element if required 146 result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1)); 147 if (i < end) { 148 // append path delimiter if not final path element 149 result.append('/'); 150 } 151 } 152 return result.toString(); 153 } 154 155 // this path has only 1 node, append [index] if required 156 return createXpathElementCheck(path, index); 157 } 158 159 /** 160 * Appends the provided index parameter in square brackets to the given name, 161 * like <code>path[index]</code>.<p> 162 * 163 * This method is used if it's clear that some path does not have 164 * a square bracket already appended.<p> 165 * 166 * @param path the path append the index to 167 * @param index the index to append 168 * 169 * @return the simplified Xpath for the given name 170 */ 171 public static String createXpathElement(String path, int index) { 172 173 StringBuffer result = new StringBuffer(path.length() + 5); 174 result.append(path); 175 result.append('['); 176 result.append(index); 177 result.append(']'); 178 return result.toString(); 179 } 180 181 /** 182 * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p> 183 * 184 * This method is used if it's uncertain if some path does have 185 * a square bracket already appended or not.<p> 186 * 187 * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 188 * is ignored.<p> 189 * 190 * @param path the path to get the simplified Xpath for 191 * @param index the index to append (if required) 192 * 193 * @return the simplified Xpath for the given name 194 */ 195 public static String createXpathElementCheck(String path, int index) { 196 197 if (path.charAt(path.length() - 1) == ']') { 198 // path is already in the form "title[1]" 199 // ignore provided index and return the path "as is" 200 return path; 201 } 202 203 // append index in square brackets 204 return createXpathElement(path, index); 205 } 206 207 /** 208 * Returns the first Xpath element from the provided path, 209 * without the index value.<p> 210 * 211 * Examples:<br> 212 * <code>title</code> is left untouched<br> 213 * <code>title[1]</code> becomes <code>title</code><br> 214 * <code>title/subtitle</code> becomes <code>title</code><br> 215 * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p> 216 * 217 * @param path the path to get the first Xpath element from 218 * 219 * @return the first Xpath element from the provided path 220 */ 221 public static String getFirstXpathElement(String path) { 222 223 int pos = path.indexOf('/'); 224 if (pos >= 0) { 225 path = path.substring(0, pos); 226 } 227 228 return CmsXmlUtils.removeXpathIndex(path); 229 } 230 231 /** 232 * Returns the last Xpath element from the provided path, 233 * without the index value.<p> 234 * 235 * Examples:<br> 236 * <code>title</code> is left untouched<br> 237 * <code>title[1]</code> becomes <code>title</code><br> 238 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 239 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p> 240 * 241 * @param path the path to get the last Xpath element from 242 * 243 * @return the last Xpath element from the provided path 244 */ 245 public static String getLastXpathElement(String path) { 246 247 int pos = path.lastIndexOf('/'); 248 if (pos >= 0) { 249 path = path.substring(pos + 1); 250 } 251 252 return CmsXmlUtils.removeXpathIndex(path); 253 } 254 255 /** 256 * Returns the last Xpath index from the given path.<p> 257 * 258 * Examples:<br> 259 * <code>title</code> returns the empty String<p> 260 * <code>title[1]</code> returns <code>[1]</code><p> 261 * <code>title/subtitle</code> returns them empty String<p> 262 * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p> 263 * 264 * @param path the path to extract the Xpath index from 265 * 266 * @return the last Xpath index from the given path 267 */ 268 public static String getXpathIndex(String path) { 269 270 int pos1 = path.lastIndexOf('/'); 271 int pos2 = path.lastIndexOf('['); 272 if ((pos2 < 0) || (pos1 > pos2)) { 273 return ""; 274 } 275 276 return path.substring(pos2); 277 } 278 279 /** 280 * Returns the last Xpath index from the given path as integer.<p> 281 * 282 * Examples:<br> 283 * <code>title</code> returns 1<p> 284 * <code>title[1]</code> returns 1<p> 285 * <code>title/subtitle</code> returns 1<p> 286 * <code>title[1]/subtitle[2]</code> returns 2<p> 287 * 288 * @param path the path to extract the Xpath index from 289 * 290 * @return the last Xpath index from the given path as integer 291 */ 292 public static int getXpathIndexInt(String path) { 293 294 int pos1 = path.lastIndexOf('/'); 295 int pos2 = path.lastIndexOf('['); 296 if ((pos2 < 0) || (pos1 > pos2)) { 297 return 1; 298 } 299 300 String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']')); 301 try { 302 return Integer.parseInt(idxStr); 303 } catch (NumberFormatException e) { 304 // NOOP 305 } 306 return 1; 307 } 308 309 /** 310 * Returns <code>true</code> if the given path is a Xpath with 311 * at least 2 elements.<p> 312 * 313 * Examples:<br> 314 * <code>title</code> returns <code>false</code><br> 315 * <code>title[1]</code> returns <code>false</code><br> 316 * <code>title/subtitle</code> returns <code>true</code><br> 317 * <code>title[1]/subtitle[1]</code> returns <code>true</code><p> 318 * 319 * @param path the path to check 320 * @return true if the given path is a Xpath with at least 2 elements 321 */ 322 public static boolean isDeepXpath(String path) { 323 324 return path.indexOf('/') > 0; 325 } 326 327 /** 328 * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p> 329 * 330 * @param document the XML document to marshal 331 * @param out the output stream to write to 332 * @param encoding the encoding to use 333 * @return the output stream with the xml content 334 * @throws CmsXmlException if something goes wrong 335 */ 336 public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException { 337 338 try { 339 OutputFormat format = OutputFormat.createPrettyPrint(); 340 format.setEncoding(encoding); 341 342 XMLWriter writer = new XMLWriter(out, format); 343 writer.setEscapeText(false); 344 345 writer.write(document); 346 writer.close(); 347 348 } catch (Exception e) { 349 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 350 } 351 352 return out; 353 } 354 355 /** 356 * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p> 357 * 358 * @param document the XML document to marshal 359 * @param encoding the encoding to use 360 * @return the marshalled XML document 361 * @throws CmsXmlException if something goes wrong 362 */ 363 public static String marshal(Document document, String encoding) throws CmsXmlException { 364 365 ByteArrayOutputStream out = new ByteArrayOutputStream(); 366 marshal(document, out, encoding); 367 try { 368 return out.toString(encoding); 369 } catch (UnsupportedEncodingException e) { 370 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e); 371 } 372 } 373 374 /** 375 * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p> 376 * 377 * @param node the XML node to marshal 378 * @param encoding the encoding to use 379 * 380 * @return the string with the xml content 381 * 382 * @throws CmsXmlException if something goes wrong 383 */ 384 public static String marshal(Node node, String encoding) throws CmsXmlException { 385 386 ByteArrayOutputStream out = new ByteArrayOutputStream(); 387 try { 388 OutputFormat format = OutputFormat.createPrettyPrint(); 389 format.setEncoding(encoding); 390 format.setSuppressDeclaration(true); 391 392 XMLWriter writer = new XMLWriter(out, format); 393 writer.setEscapeText(false); 394 395 writer.write(node); 396 writer.close(); 397 } catch (Exception e) { 398 throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e); 399 } 400 return new String(out.toByteArray()); 401 } 402 403 /** 404 * Removes all Xpath indices from the given path.<p> 405 * 406 * Example:<br> 407 * <code>title</code> is left untouched<br> 408 * <code>title[1]</code> becomes <code>title</code><br> 409 * <code>title/subtitle</code> is left untouched<br> 410 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 411 * 412 * @param path the path to remove the Xpath index from 413 * 414 * @return the path with all Xpath indices removed 415 */ 416 public static String removeAllXpathIndices(String path) { 417 418 return path.replaceAll("\\[[0-9]+\\]", ""); 419 } 420 421 /** 422 * Removes the first Xpath element from the path.<p> 423 * 424 * If the provided path does not contain a "/" character, 425 * it is returned unchanged.<p> 426 * 427 * <p>Examples:<br> 428 * <code>title</code> is left untouched<br> 429 * <code>title[1]</code> is left untouched<br> 430 * <code>title/subtitle</code> becomes <code>subtitle</code><br> 431 * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p> 432 * 433 * @param path the Xpath to remove the first element from 434 * 435 * @return the path with the first element removed 436 */ 437 public static String removeFirstXpathElement(String path) { 438 439 int pos = path.indexOf('/'); 440 if (pos < 0) { 441 return path; 442 } 443 444 return path.substring(pos + 1); 445 } 446 447 /** 448 * Removes the last complex Xpath element from the path.<p> 449 * 450 * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths. 451 * 452 * <p>Example:<br> 453 * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p> 454 * 455 * @param path the Xpath to remove the last element from 456 * 457 * @return the path with the last element removed 458 */ 459 public static String removeLastComplexXpathElement(String path) { 460 461 int pos = path.lastIndexOf('/'); 462 if (pos < 0) { 463 return path; 464 } 465 // count ' chars 466 int p = pos; 467 int count = -1; 468 while (p > 0) { 469 count++; 470 p = path.indexOf("\'", p + 1); 471 } 472 String parentPath = path.substring(0, pos); 473 if ((count % 2) == 0) { 474 // if substring is complete 475 return parentPath; 476 } 477 // if not complete 478 p = parentPath.lastIndexOf("'"); 479 if (p >= 0) { 480 // complete it if possible 481 return removeLastComplexXpathElement(parentPath.substring(0, p)); 482 } 483 return parentPath; 484 } 485 486 /** 487 * Removes the last Xpath element from the path.<p> 488 * 489 * If the provided path does not contain a "/" character, 490 * it is returned unchanged.<p> 491 * 492 * <p>Examples:<br> 493 * <code>title</code> is left untouched<br> 494 * <code>title[1]</code> is left untouched<br> 495 * <code>title/subtitle</code> becomes <code>title</code><br> 496 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p> 497 * 498 * @param path the Xpath to remove the last element from 499 * 500 * @return the path with the last element removed 501 */ 502 public static String removeLastXpathElement(String path) { 503 504 int pos = path.lastIndexOf('/'); 505 if (pos < 0) { 506 return path; 507 } 508 509 return path.substring(0, pos); 510 } 511 512 /** 513 * Removes all Xpath index information from the given input path.<p> 514 * 515 * Examples:<br> 516 * <code>title</code> is left untouched<br> 517 * <code>title[1]</code> becomes <code>title</code><br> 518 * <code>title/subtitle</code> is left untouched<br> 519 * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p> 520 * 521 * @param path the path to remove the Xpath index information from 522 * 523 * @return the simplified Xpath for the given name 524 */ 525 public static String removeXpath(String path) { 526 527 if (path.indexOf('/') > -1) { 528 // this is a complex path over more then 1 node 529 StringBuffer result = new StringBuffer(path.length() + 32); 530 531 // split the path into sub-elements 532 List<String> elements = CmsStringUtil.splitAsList(path, '/'); 533 int end = elements.size() - 1; 534 for (int i = 0; i <= end; i++) { 535 // remove [i] from path element if required 536 result.append(removeXpathIndex(elements.get(i))); 537 if (i < end) { 538 // append path delimiter if not final path element 539 result.append('/'); 540 } 541 } 542 return result.toString(); 543 } 544 545 // this path has only 1 node, remove last index if required 546 return removeXpathIndex(path); 547 } 548 549 /** 550 * Removes the last Xpath index from the given path.<p> 551 * 552 * Examples:<br> 553 * <code>title</code> is left untouched<br> 554 * <code>title[1]</code> becomes <code>title</code><br> 555 * <code>title/subtitle</code> is left untouched<br> 556 * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p> 557 * 558 * @param path the path to remove the Xpath index from 559 * 560 * @return the path with the last Xpath index removed 561 */ 562 public static String removeXpathIndex(String path) { 563 564 int pos1 = path.lastIndexOf('/'); 565 int pos2 = path.lastIndexOf('['); 566 if ((pos2 < 0) || (pos1 > pos2)) { 567 return path; 568 } 569 570 return path.substring(0, pos2); 571 } 572 573 /** 574 * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 575 * 576 * Examples:<br> 577 * <code>title/</code> becomes <code>title</code><br> 578 * <code>/title[1]/</code> becomes <code>title[1]</code><br> 579 * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br> 580 * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p> 581 * 582 * @param path the path to process 583 * @return the input with a leading and a trailing slash removed 584 */ 585 public static String simplifyXpath(String path) { 586 587 StringBuffer result = new StringBuffer(path); 588 if (result.charAt(0) == '/') { 589 result.deleteCharAt(0); 590 } 591 int pos = result.length() - 1; 592 if (result.charAt(pos) == '/') { 593 result.deleteCharAt(pos); 594 } 595 return result.toString(); 596 } 597 598 /** 599 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 600 * 601 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 602 * 603 * @param xmlData the XML data in a byte array 604 * @param resolver the XML entity resolver to use 605 * 606 * @return the base object initialized with the unmarshalled XML document 607 * 608 * @throws CmsXmlException if something goes wrong 609 * 610 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 611 */ 612 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 613 614 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver); 615 } 616 617 /** 618 * Helper to unmarshal (read) xml contents from a byte array into a document.<p> 619 * 620 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 621 * 622 * @param xmlData the XML data in a byte array 623 * @param resolver the XML entity resolver to use 624 * @param validate if the reader should try to validate the xml code 625 * 626 * @return the base object initialized with the unmarshalled XML document 627 * 628 * @throws CmsXmlException if something goes wrong 629 * 630 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 631 */ 632 public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate) 633 throws CmsXmlException { 634 635 return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate); 636 } 637 638 /** 639 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 640 * 641 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 642 * 643 * Important: The encoding provided will NOT be used during unmarshalling, 644 * the XML parser will do this on the base of the information in the source String. 645 * The encoding is used for initializing the created instance of the document, 646 * which means it will be used when marshalling the document again later.<p> 647 * 648 * @param source the XML input source to use 649 * @param resolver the XML entity resolver to use 650 * 651 * @return the unmarshalled XML document 652 * 653 * @throws CmsXmlException if something goes wrong 654 */ 655 public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException { 656 657 return unmarshalHelper(source, resolver, false); 658 } 659 660 /** 661 * Helper to unmarshal (read) xml contents from an input source into a document.<p> 662 * 663 * Using this method ensures that the OpenCms XML entity resolver is used.<p> 664 * 665 * Important: The encoding provided will NOT be used during unmarshalling, 666 * the XML parser will do this on the base of the information in the source String. 667 * The encoding is used for initializing the created instance of the document, 668 * which means it will be used when marshalling the document again later.<p> 669 * 670 * @param source the XML input source to use 671 * @param resolver the XML entity resolver to use 672 * @param validate if the reader should try to validate the xml code 673 * 674 * @return the unmarshalled XML document 675 * 676 * @throws CmsXmlException if something goes wrong 677 */ 678 public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate) 679 throws CmsXmlException { 680 681 try { 682 SAXReader reader = new SAXReader(); 683 if (resolver != null) { 684 reader.setEntityResolver(resolver); 685 } 686 reader.setMergeAdjacentText(true); 687 reader.setStripWhitespaceText(true); 688 if (!validate) { 689 reader.setValidation(false); 690 reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 691 } 692 return reader.read(source); 693 } catch (DocumentException e) { 694 throw new CmsXmlException(Messages.get().container( 695 Messages.ERR_UNMARSHALLING_XML_DOC_1, 696 "(systemId = " + source.getSystemId() + ")"), e); 697 } catch (SAXException e) { 698 throw new CmsXmlException(Messages.get().container( 699 Messages.ERR_UNMARSHALLING_XML_DOC_1, 700 "(systemId = " + source.getSystemId() + ")"), e); 701 } 702 } 703 704 /** 705 * Helper to unmarshal (read) xml contents from a String into a document.<p> 706 * 707 * Using this method ensures that the OpenCms XML entitiy resolver is used.<p> 708 * 709 * @param xmlData the xml data in a String 710 * @param resolver the XML entity resolver to use 711 * @return the base object initialized with the unmarshalled XML document 712 * @throws CmsXmlException if something goes wrong 713 * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver) 714 */ 715 public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException { 716 717 return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver); 718 } 719 720 /** 721 * Validates the structure of a XML document contained in a byte array 722 * with the DTD or XML schema used by the document.<p> 723 * 724 * @param xmlData a byte array containing a XML document that should be validated 725 * @param resolver the XML entity resolver to use 726 * 727 * @throws CmsXmlException if the validation fails 728 */ 729 public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException { 730 731 validateXmlStructure(new ByteArrayInputStream(xmlData), resolver); 732 } 733 734 /** 735 * Validates the structure of a XML document with the DTD or XML schema used 736 * by the document.<p> 737 * 738 * @param document a XML document that should be validated 739 * @param encoding the encoding to use when marshalling the XML document (required) 740 * @param resolver the XML entity resolver to use 741 * 742 * @throws CmsXmlException if the validation fails 743 */ 744 public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver) 745 throws CmsXmlException { 746 747 // generate bytes from document 748 byte[] xmlData = ((ByteArrayOutputStream)marshal(document, new ByteArrayOutputStream(512), encoding)).toByteArray(); 749 validateXmlStructure(xmlData, resolver); 750 } 751 752 /** 753 * Validates the structure of a XML document contained in a byte array 754 * with the DTD or XML schema used by the document.<p> 755 * 756 * @param xmlStream a source providing a XML document that should be validated 757 * @param resolver the XML entity resolver to use 758 * 759 * @throws CmsXmlException if the validation fails 760 */ 761 public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException { 762 763 XMLReader reader; 764 try { 765 reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); 766 } catch (SAXException e) { 767 // xerces parser not available - no schema validation possible 768 if (LOG.isWarnEnabled()) { 769 LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e); 770 } 771 // no validation of the content is possible 772 return; 773 } 774 // turn on validation 775 try { 776 reader.setFeature("http://xml.org/sax/features/validation", true); 777 // turn on schema validation 778 reader.setFeature("http://apache.org/xml/features/validation/schema", true); 779 // configure namespace support 780 reader.setFeature("http://xml.org/sax/features/namespaces", true); 781 reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); 782 } catch (SAXNotRecognizedException e) { 783 // should not happen as Xerces 2 support this feature 784 if (LOG.isWarnEnabled()) { 785 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e); 786 } 787 // no validation of the content is possible 788 return; 789 } catch (SAXNotSupportedException e) { 790 // should not happen as Xerces 2 support this feature 791 if (LOG.isWarnEnabled()) { 792 LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e); 793 } 794 // no validation of the content is possible 795 return; 796 } 797 798 // add an error handler which turns any errors into XML 799 CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler(); 800 reader.setErrorHandler(errorHandler); 801 802 if (resolver != null) { 803 // set the resolver for the "opencms://" URIs 804 reader.setEntityResolver(resolver); 805 } 806 807 try { 808 reader.parse(new InputSource(xmlStream)); 809 } catch (IOException e) { 810 // should not happen since we read form a byte array 811 if (LOG.isErrorEnabled()) { 812 LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e); 813 } 814 return; 815 } catch (SAXException e) { 816 // should not happen since all errors are handled in the XML error handler 817 if (LOG.isErrorEnabled()) { 818 LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e); 819 } 820 return; 821 } 822 823 if (errorHandler.getErrors().elements().size() > 0) { 824 // there was at last one validation error, so throw an exception 825 StringWriter out = new StringWriter(256); 826 OutputFormat format = OutputFormat.createPrettyPrint(); 827 XMLWriter writer = new XMLWriter(out, format); 828 try { 829 writer.write(errorHandler.getErrors()); 830 writer.write(errorHandler.getWarnings()); 831 writer.close(); 832 } catch (IOException e) { 833 // should not happen since we write to a StringWriter 834 if (LOG.isErrorEnabled()) { 835 LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e); 836 } 837 } 838 // generate String from XML for display of document in error message 839 throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString())); 840 } 841 } 842}