001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 * 
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.file.CmsResource;
031import org.opencms.main.CmsLog;
032import org.opencms.util.CmsStringUtil;
033
034import java.io.ByteArrayInputStream;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.io.OutputStream;
039import java.io.StringReader;
040import java.io.StringWriter;
041import java.io.UnsupportedEncodingException;
042import java.util.List;
043
044import org.apache.commons.logging.Log;
045
046import org.dom4j.Document;
047import org.dom4j.DocumentException;
048import org.dom4j.Node;
049import org.dom4j.io.OutputFormat;
050import org.dom4j.io.SAXReader;
051import org.dom4j.io.XMLWriter;
052import org.xml.sax.EntityResolver;
053import org.xml.sax.InputSource;
054import org.xml.sax.SAXException;
055import org.xml.sax.SAXNotRecognizedException;
056import org.xml.sax.SAXNotSupportedException;
057import org.xml.sax.XMLReader;
058import org.xml.sax.helpers.XMLReaderFactory;
059
060/**
061 * Provides some basic XML handling utilities.<p>
062 * 
063 * @since 6.0.0 
064 */
065public final class CmsXmlUtils {
066
067    /** The log object for this class. */
068    private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class);
069
070    /**
071     * Prevents instances of this class from being generated.<p> 
072     */
073    private CmsXmlUtils() {
074
075        // noop
076    }
077
078    /**
079     * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p>  
080     * 
081     * Use this method if it's uncertain if the given arguments are starting or ending with 
082     * a slash "/".<p>
083     * 
084     * Examples:<br> 
085     * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br>
086     * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br>
087     * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p>
088     * 
089     * @param prefix the prefix Xpath
090     * @param suffix the suffix Xpath
091     * 
092     * @return the concatenated Xpath build from prefix and suffix
093     */
094    public static String concatXpath(String prefix, String suffix) {
095
096        if (suffix == null) {
097            // ensure suffix is not null
098            suffix = "";
099        } else {
100            if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) {
101                // remove leading '/' form suffix
102                suffix = suffix.substring(1);
103            }
104        }
105        if (prefix != null) {
106            StringBuffer result = new StringBuffer(32);
107            result.append(prefix);
108            if (!CmsResource.isFolder(prefix)) {
109                result.append('/');
110            }
111            result.append(suffix);
112            return result.toString();
113        }
114        return suffix;
115    }
116
117    /**
118     * Translates a simple lookup path to the simplified Xpath format used for 
119     * the internal bookmarks.<p>
120     * 
121     * Examples:<br> 
122     * <code>title</code> becomes <code>title[1]</code><br>
123     * <code>title[1]</code> is left untouched<br>
124     * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br>
125     * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p>
126     * 
127     * Note: If the name already has the format <code>title[1]</code> then provided index parameter 
128     * is ignored.<p> 
129     * 
130     * @param path the path to get the simplified Xpath for
131     * @param index the index to append (if required)
132     * 
133     * @return the simplified Xpath for the given name
134     */
135    public static String createXpath(String path, int index) {
136
137        if (path.indexOf('/') > -1) {
138            // this is a complex path over more then 1 node
139            StringBuffer result = new StringBuffer(path.length() + 32);
140
141            // split the path into sub elements
142            List<String> elements = CmsStringUtil.splitAsList(path, '/');
143            int end = elements.size() - 1;
144            for (int i = 0; i <= end; i++) {
145                // append [i] to path element if required 
146                result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1));
147                if (i < end) {
148                    // append path delimiter if not final path element
149                    result.append('/');
150                }
151            }
152            return result.toString();
153        }
154
155        // this path has only 1 node, append [index] if required
156        return createXpathElementCheck(path, index);
157    }
158
159    /**
160     * Appends the provided index parameter in square brackets to the given name,
161     * like <code>path[index]</code>.<p>
162     * 
163     * This method is used if it's clear that some path does not have 
164     * a square bracket already appended.<p>
165     * 
166     * @param path the path append the index to
167     * @param index the index to append
168     * 
169     * @return the simplified Xpath for the given name
170     */
171    public static String createXpathElement(String path, int index) {
172
173        StringBuffer result = new StringBuffer(path.length() + 5);
174        result.append(path);
175        result.append('[');
176        result.append(index);
177        result.append(']');
178        return result.toString();
179    }
180
181    /**
182     * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p>
183     * 
184     * This method is used if it's uncertain if some path does have 
185     * a square bracket already appended or not.<p>
186     * 
187     * Note: If the name already has the format <code>title[1]</code>, then provided index parameter 
188     * is ignored.<p> 
189     * 
190     * @param path the path to get the simplified Xpath for
191     * @param index the index to append (if required)
192     * 
193     * @return the simplified Xpath for the given name
194     */
195    public static String createXpathElementCheck(String path, int index) {
196
197        if (path.charAt(path.length() - 1) == ']') {
198            // path is already in the form "title[1]"
199            // ignore provided index and return the path "as is"
200            return path;
201        }
202
203        // append index in square brackets
204        return createXpathElement(path, index);
205    }
206
207    /**
208     * Returns the first Xpath element from the provided path, 
209     * without the index value.<p>
210     * 
211     * Examples:<br> 
212     * <code>title</code> is left untouched<br>
213     * <code>title[1]</code> becomes <code>title</code><br>
214     * <code>title/subtitle</code> becomes <code>title</code><br>
215     * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p>
216     * 
217     * @param path the path to get the first Xpath element from
218     * 
219     * @return the first Xpath element from the provided path
220     */
221    public static String getFirstXpathElement(String path) {
222
223        int pos = path.indexOf('/');
224        if (pos >= 0) {
225            path = path.substring(0, pos);
226        }
227
228        return CmsXmlUtils.removeXpathIndex(path);
229    }
230
231    /**
232     * Returns the last Xpath element from the provided path, 
233     * without the index value.<p>
234     * 
235     * Examples:<br> 
236     * <code>title</code> is left untouched<br>
237     * <code>title[1]</code> becomes <code>title</code><br>
238     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
239     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p>
240     * 
241     * @param path the path to get the last Xpath element from
242     * 
243     * @return the last Xpath element from the provided path
244     */
245    public static String getLastXpathElement(String path) {
246
247        int pos = path.lastIndexOf('/');
248        if (pos >= 0) {
249            path = path.substring(pos + 1);
250        }
251
252        return CmsXmlUtils.removeXpathIndex(path);
253    }
254
255    /**
256     * Returns the last Xpath index from the given path.<p>
257     * 
258     * Examples:<br> 
259     * <code>title</code> returns the empty String<p>
260     * <code>title[1]</code> returns <code>[1]</code><p>
261     * <code>title/subtitle</code> returns them empty String<p>
262     * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p>
263     * 
264     * @param path the path to extract the Xpath index from
265     * 
266     * @return  the last Xpath index from the given path
267     */
268    public static String getXpathIndex(String path) {
269
270        int pos1 = path.lastIndexOf('/');
271        int pos2 = path.lastIndexOf('[');
272        if ((pos2 < 0) || (pos1 > pos2)) {
273            return "";
274        }
275
276        return path.substring(pos2);
277    }
278
279    /**
280     * Returns the last Xpath index from the given path as integer.<p>
281     * 
282     * Examples:<br> 
283     * <code>title</code> returns 1<p>
284     * <code>title[1]</code> returns 1<p>
285     * <code>title/subtitle</code> returns 1<p>
286     * <code>title[1]/subtitle[2]</code> returns 2<p>
287     * 
288     * @param path the path to extract the Xpath index from
289     * 
290     * @return the last Xpath index from the given path as integer
291     */
292    public static int getXpathIndexInt(String path) {
293
294        int pos1 = path.lastIndexOf('/');
295        int pos2 = path.lastIndexOf('[');
296        if ((pos2 < 0) || (pos1 > pos2)) {
297            return 1;
298        }
299
300        String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']'));
301        try {
302            return Integer.parseInt(idxStr);
303        } catch (NumberFormatException e) {
304            // NOOP
305        }
306        return 1;
307    }
308
309    /**
310     * Returns <code>true</code> if the given path is a Xpath with 
311     * at least 2 elements.<p>
312     * 
313     * Examples:<br> 
314     * <code>title</code> returns <code>false</code><br>
315     * <code>title[1]</code> returns <code>false</code><br>
316     * <code>title/subtitle</code> returns <code>true</code><br>
317     * <code>title[1]/subtitle[1]</code> returns <code>true</code><p>
318     * 
319     * @param path the path to check
320     * @return true if the given path is a Xpath with at least 2 elements
321     */
322    public static boolean isDeepXpath(String path) {
323
324        return path.indexOf('/') > 0;
325    }
326
327    /**
328     * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p>
329     * 
330     * @param document the XML document to marshal
331     * @param out the output stream to write to
332     * @param encoding the encoding to use
333     * @return the output stream with the xml content
334     * @throws CmsXmlException if something goes wrong
335     */
336    public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException {
337
338        try {
339            OutputFormat format = OutputFormat.createPrettyPrint();
340            format.setEncoding(encoding);
341
342            XMLWriter writer = new XMLWriter(out, format);
343            writer.setEscapeText(false);
344
345            writer.write(document);
346            writer.close();
347
348        } catch (Exception e) {
349            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
350        }
351
352        return out;
353    }
354
355    /**
356     * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p>
357     * 
358     * @param document the XML document to marshal
359     * @param encoding the encoding to use
360     * @return the marshalled XML document
361     * @throws CmsXmlException if something goes wrong
362     */
363    public static String marshal(Document document, String encoding) throws CmsXmlException {
364
365        ByteArrayOutputStream out = new ByteArrayOutputStream();
366        marshal(document, out, encoding);
367        try {
368            return out.toString(encoding);
369        } catch (UnsupportedEncodingException e) {
370            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e);
371        }
372    }
373
374    /**
375     * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p>
376     * 
377     * @param node the XML node to marshal
378     * @param encoding the encoding to use
379     * 
380     * @return the string with the xml content
381     * 
382     * @throws CmsXmlException if something goes wrong
383     */
384    public static String marshal(Node node, String encoding) throws CmsXmlException {
385
386        ByteArrayOutputStream out = new ByteArrayOutputStream();
387        try {
388            OutputFormat format = OutputFormat.createPrettyPrint();
389            format.setEncoding(encoding);
390            format.setSuppressDeclaration(true);
391
392            XMLWriter writer = new XMLWriter(out, format);
393            writer.setEscapeText(false);
394
395            writer.write(node);
396            writer.close();
397        } catch (Exception e) {
398            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
399        }
400        return new String(out.toByteArray());
401    }
402
403    /**
404     * Removes all Xpath indices from the given path.<p>
405     * 
406     * Example:<br>
407     * <code>title</code> is left untouched<br>
408     * <code>title[1]</code> becomes <code>title</code><br>
409     * <code>title/subtitle</code> is left untouched<br>
410     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
411     * 
412     * @param path the path to remove the Xpath index from
413     * 
414     * @return the path with all Xpath indices removed
415     */
416    public static String removeAllXpathIndices(String path) {
417
418        return path.replaceAll("\\[[0-9]+\\]", "");
419    }
420
421    /**
422     * Removes the first Xpath element from the path.<p>
423     * 
424     * If the provided path does not contain a "/" character, 
425     * it is returned unchanged.<p>
426     * 
427     * <p>Examples:<br> 
428     * <code>title</code> is left untouched<br>
429     * <code>title[1]</code> is left untouched<br>
430     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
431     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
432     * 
433     * @param path the Xpath to remove the first element from
434     * 
435     * @return the path with the first element removed
436     */
437    public static String removeFirstXpathElement(String path) {
438
439        int pos = path.indexOf('/');
440        if (pos < 0) {
441            return path;
442        }
443
444        return path.substring(pos + 1);
445    }
446
447    /**
448     * Removes the last complex Xpath element from the path.<p>
449     * 
450     * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths.
451     * 
452     * <p>Example:<br> 
453     * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p>
454     * 
455     * @param path the Xpath to remove the last element from
456     * 
457     * @return the path with the last element removed
458     */
459    public static String removeLastComplexXpathElement(String path) {
460
461        int pos = path.lastIndexOf('/');
462        if (pos < 0) {
463            return path;
464        }
465        // count ' chars
466        int p = pos;
467        int count = -1;
468        while (p > 0) {
469            count++;
470            p = path.indexOf("\'", p + 1);
471        }
472        String parentPath = path.substring(0, pos);
473        if ((count % 2) == 0) {
474            // if substring is complete 
475            return parentPath;
476        }
477        // if not complete
478        p = parentPath.lastIndexOf("'");
479        if (p >= 0) {
480            // complete it if possible
481            return removeLastComplexXpathElement(parentPath.substring(0, p));
482        }
483        return parentPath;
484    }
485
486    /**
487     * Removes the last Xpath element from the path.<p>
488     * 
489     * If the provided path does not contain a "/" character, 
490     * it is returned unchanged.<p>
491     * 
492     * <p>Examples:<br> 
493     * <code>title</code> is left untouched<br>
494     * <code>title[1]</code> is left untouched<br>
495     * <code>title/subtitle</code> becomes <code>title</code><br>
496     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p>
497     * 
498     * @param path the Xpath to remove the last element from
499     * 
500     * @return the path with the last element removed
501     */
502    public static String removeLastXpathElement(String path) {
503
504        int pos = path.lastIndexOf('/');
505        if (pos < 0) {
506            return path;
507        }
508
509        return path.substring(0, pos);
510    }
511
512    /**
513     * Removes all Xpath index information from the given input path.<p>
514     * 
515     * Examples:<br> 
516     * <code>title</code> is left untouched<br>
517     * <code>title[1]</code> becomes <code>title</code><br>
518     * <code>title/subtitle</code> is left untouched<br>
519     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
520     * 
521     * @param path the path to remove the Xpath index information from
522     * 
523     * @return the simplified Xpath for the given name
524     */
525    public static String removeXpath(String path) {
526
527        if (path.indexOf('/') > -1) {
528            // this is a complex path over more then 1 node
529            StringBuffer result = new StringBuffer(path.length() + 32);
530
531            // split the path into sub-elements
532            List<String> elements = CmsStringUtil.splitAsList(path, '/');
533            int end = elements.size() - 1;
534            for (int i = 0; i <= end; i++) {
535                // remove [i] from path element if required 
536                result.append(removeXpathIndex(elements.get(i)));
537                if (i < end) {
538                    // append path delimiter if not final path element
539                    result.append('/');
540                }
541            }
542            return result.toString();
543        }
544
545        // this path has only 1 node, remove last index if required
546        return removeXpathIndex(path);
547    }
548
549    /**
550     * Removes the last Xpath index from the given path.<p>
551     * 
552     * Examples:<br> 
553     * <code>title</code> is left untouched<br>
554     * <code>title[1]</code> becomes <code>title</code><br>
555     * <code>title/subtitle</code> is left untouched<br>
556     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p>
557     * 
558     * @param path the path to remove the Xpath index from
559     * 
560     * @return the path with the last Xpath index removed
561     */
562    public static String removeXpathIndex(String path) {
563
564        int pos1 = path.lastIndexOf('/');
565        int pos2 = path.lastIndexOf('[');
566        if ((pos2 < 0) || (pos1 > pos2)) {
567            return path;
568        }
569
570        return path.substring(0, pos2);
571    }
572
573    /**
574     * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p> 
575     * 
576     * Examples:<br> 
577     * <code>title/</code> becomes <code>title</code><br>
578     * <code>/title[1]/</code> becomes <code>title[1]</code><br>
579     * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br>
580     * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p>
581     * 
582     * @param path the path to process
583     * @return the input with a leading and a trailing slash removed
584     */
585    public static String simplifyXpath(String path) {
586
587        StringBuffer result = new StringBuffer(path);
588        if (result.charAt(0) == '/') {
589            result.deleteCharAt(0);
590        }
591        int pos = result.length() - 1;
592        if (result.charAt(pos) == '/') {
593            result.deleteCharAt(pos);
594        }
595        return result.toString();
596    }
597
598    /**
599     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
600     * 
601     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
602     * 
603     * @param xmlData the XML data in a byte array
604     * @param resolver the XML entity resolver to use
605     * 
606     * @return the base object initialized with the unmarshalled XML document
607     * 
608     * @throws CmsXmlException if something goes wrong
609     * 
610     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
611     */
612    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
613
614        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver);
615    }
616
617    /**
618     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
619     * 
620     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
621     * 
622     * @param xmlData the XML data in a byte array
623     * @param resolver the XML entity resolver to use
624     * @param validate if the reader should try to validate the xml code
625     * 
626     * @return the base object initialized with the unmarshalled XML document
627     * 
628     * @throws CmsXmlException if something goes wrong
629     * 
630     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
631     */
632    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate)
633    throws CmsXmlException {
634
635        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate);
636    }
637
638    /**
639     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
640     * 
641     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
642     * 
643     * Important: The encoding provided will NOT be used during unmarshalling,
644     * the XML parser will do this on the base of the information in the source String.
645     * The encoding is used for initializing the created instance of the document,
646     * which means it will be used when marshalling the document again later.<p>
647     *  
648     * @param source the XML input source to use
649     * @param resolver the XML entity resolver to use
650     * 
651     * @return the unmarshalled XML document
652     * 
653     * @throws CmsXmlException if something goes wrong
654     */
655    public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException {
656
657        return unmarshalHelper(source, resolver, false);
658    }
659
660    /**
661     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
662     * 
663     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
664     * 
665     * Important: The encoding provided will NOT be used during unmarshalling,
666     * the XML parser will do this on the base of the information in the source String.
667     * The encoding is used for initializing the created instance of the document,
668     * which means it will be used when marshalling the document again later.<p>
669     *  
670     * @param source the XML input source to use
671     * @param resolver the XML entity resolver to use
672     * @param validate if the reader should try to validate the xml code
673     * 
674     * @return the unmarshalled XML document
675     * 
676     * @throws CmsXmlException if something goes wrong
677     */
678    public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate)
679    throws CmsXmlException {
680
681        try {
682            SAXReader reader = new SAXReader();
683            if (resolver != null) {
684                reader.setEntityResolver(resolver);
685            }
686            reader.setMergeAdjacentText(true);
687            reader.setStripWhitespaceText(true);
688            if (!validate) {
689                reader.setValidation(false);
690                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
691            }
692            return reader.read(source);
693        } catch (DocumentException e) {
694            throw new CmsXmlException(Messages.get().container(
695                Messages.ERR_UNMARSHALLING_XML_DOC_1,
696                "(systemId = " + source.getSystemId() + ")"), e);
697        } catch (SAXException e) {
698            throw new CmsXmlException(Messages.get().container(
699                Messages.ERR_UNMARSHALLING_XML_DOC_1,
700                "(systemId = " + source.getSystemId() + ")"), e);
701        }
702    }
703
704    /**
705     * Helper to unmarshal (read) xml contents from a String into a document.<p>
706     * 
707     * Using this method ensures that the OpenCms XML entitiy resolver is used.<p>
708     * 
709     * @param xmlData the xml data in a String 
710     * @param resolver the XML entity resolver to use
711     * @return the base object initialized with the unmarshalled XML document
712     * @throws CmsXmlException if something goes wrong
713     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
714     */
715    public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException {
716
717        return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver);
718    }
719
720    /**
721     * Validates the structure of a XML document contained in a byte array 
722     * with the DTD or XML schema used by the document.<p>
723     * 
724     * @param xmlData a byte array containing a XML document that should be validated
725     * @param resolver the XML entity resolver to use
726     * 
727     * @throws CmsXmlException if the validation fails
728     */
729    public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
730
731        validateXmlStructure(new ByteArrayInputStream(xmlData), resolver);
732    }
733
734    /**
735     * Validates the structure of a XML document with the DTD or XML schema used 
736     * by the document.<p>
737     * 
738     * @param document a XML document that should be validated
739     * @param encoding the encoding to use when marshalling the XML document (required)
740     * @param resolver the XML entity resolver to use
741     * 
742     * @throws CmsXmlException if the validation fails
743     */
744    public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver)
745    throws CmsXmlException {
746
747        // generate bytes from document
748        byte[] xmlData = ((ByteArrayOutputStream)marshal(document, new ByteArrayOutputStream(512), encoding)).toByteArray();
749        validateXmlStructure(xmlData, resolver);
750    }
751
752    /**
753     * Validates the structure of a XML document contained in a byte array 
754     * with the DTD or XML schema used by the document.<p>
755     * 
756     * @param xmlStream a source providing a XML document that should be validated
757     * @param resolver the XML entity resolver to use
758     * 
759     * @throws CmsXmlException if the validation fails
760     */
761    public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException {
762
763        XMLReader reader;
764        try {
765            reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
766        } catch (SAXException e) {
767            // xerces parser not available - no schema validation possible
768            if (LOG.isWarnEnabled()) {
769                LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e);
770            }
771            // no validation of the content is possible
772            return;
773        }
774        // turn on validation
775        try {
776            reader.setFeature("http://xml.org/sax/features/validation", true);
777            // turn on schema validation
778            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
779            // configure namespace support
780            reader.setFeature("http://xml.org/sax/features/namespaces", true);
781            reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
782        } catch (SAXNotRecognizedException e) {
783            // should not happen as Xerces 2 support this feature
784            if (LOG.isWarnEnabled()) {
785                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e);
786            }
787            // no validation of the content is possible
788            return;
789        } catch (SAXNotSupportedException e) {
790            // should not happen as Xerces 2 support this feature
791            if (LOG.isWarnEnabled()) {
792                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e);
793            }
794            // no validation of the content is possible
795            return;
796        }
797
798        // add an error handler which turns any errors into XML
799        CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler();
800        reader.setErrorHandler(errorHandler);
801
802        if (resolver != null) {
803            // set the resolver for the "opencms://" URIs
804            reader.setEntityResolver(resolver);
805        }
806
807        try {
808            reader.parse(new InputSource(xmlStream));
809        } catch (IOException e) {
810            // should not happen since we read form a byte array
811            if (LOG.isErrorEnabled()) {
812                LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e);
813            }
814            return;
815        } catch (SAXException e) {
816            // should not happen since all errors are handled in the XML error handler
817            if (LOG.isErrorEnabled()) {
818                LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e);
819            }
820            return;
821        }
822
823        if (errorHandler.getErrors().elements().size() > 0) {
824            // there was at last one validation error, so throw an exception
825            StringWriter out = new StringWriter(256);
826            OutputFormat format = OutputFormat.createPrettyPrint();
827            XMLWriter writer = new XMLWriter(out, format);
828            try {
829                writer.write(errorHandler.getErrors());
830                writer.write(errorHandler.getWarnings());
831                writer.close();
832            } catch (IOException e) {
833                // should not happen since we write to a StringWriter
834                if (LOG.isErrorEnabled()) {
835                    LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e);
836                }
837            }
838            // generate String from XML for display of document in error message
839            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString()));
840        }
841    }
842}