001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.file.CmsResource;
031import org.opencms.main.CmsLog;
032import org.opencms.util.CmsStringUtil;
033
034import java.io.ByteArrayInputStream;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.io.OutputStream;
039import java.io.StringReader;
040import java.io.StringWriter;
041import java.io.UnsupportedEncodingException;
042import java.util.List;
043
044import org.apache.commons.logging.Log;
045
046import org.dom4j.Document;
047import org.dom4j.DocumentException;
048import org.dom4j.Node;
049import org.dom4j.io.OutputFormat;
050import org.dom4j.io.SAXReader;
051import org.dom4j.io.XMLWriter;
052import org.xml.sax.EntityResolver;
053import org.xml.sax.InputSource;
054import org.xml.sax.SAXException;
055import org.xml.sax.SAXNotRecognizedException;
056import org.xml.sax.SAXNotSupportedException;
057import org.xml.sax.XMLReader;
058import org.xml.sax.helpers.XMLReaderFactory;
059
060/**
061 * Provides some basic XML handling utilities.<p>
062 *
063 * @since 6.0.0
064 */
065public final class CmsXmlUtils {
066
067    /** The log object for this class. */
068    private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class);
069
070    /**
071     * Prevents instances of this class from being generated.<p>
072     */
073    private CmsXmlUtils() {
074
075        // noop
076    }
077
078    /**
079     * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p>
080     *
081     * Use this method if it's uncertain if the given arguments are starting or ending with
082     * a slash "/".<p>
083     *
084     * Examples:<br>
085     * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br>
086     * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br>
087     * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p>
088     *
089     * @param prefix the prefix Xpath
090     * @param suffix the suffix Xpath
091     *
092     * @return the concatenated Xpath build from prefix and suffix
093     */
094    public static String concatXpath(String prefix, String suffix) {
095
096        if (suffix == null) {
097            // ensure suffix is not null
098            suffix = "";
099        } else {
100            if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) {
101                // remove leading '/' form suffix
102                suffix = suffix.substring(1);
103            }
104        }
105        if (prefix != null) {
106            StringBuffer result = new StringBuffer(32);
107            result.append(prefix);
108            if (!CmsResource.isFolder(prefix)) {
109                result.append('/');
110            }
111            result.append(suffix);
112            return result.toString();
113        }
114        return suffix;
115    }
116
117    /**
118     * Translates a simple lookup path to the simplified Xpath format used for
119     * the internal bookmarks.<p>
120     *
121     * Examples:<br>
122     * <code>title</code> becomes <code>title[1]</code><br>
123     * <code>title[1]</code> is left untouched<br>
124     * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br>
125     * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p>
126     *
127     * Note: If the name already has the format <code>title[1]</code> then provided index parameter
128     * is ignored.<p>
129     *
130     * @param path the path to get the simplified Xpath for
131     * @param index the index to append (if required)
132     *
133     * @return the simplified Xpath for the given name
134     */
135    public static String createXpath(String path, int index) {
136
137        if (path.indexOf('/') > -1) {
138            // this is a complex path over more then 1 node
139            StringBuffer result = new StringBuffer(path.length() + 32);
140
141            // split the path into sub elements
142            List<String> elements = CmsStringUtil.splitAsList(path, '/');
143            int end = elements.size() - 1;
144            for (int i = 0; i <= end; i++) {
145                // append [i] to path element if required
146                result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1));
147                if (i < end) {
148                    // append path delimiter if not final path element
149                    result.append('/');
150                }
151            }
152            return result.toString();
153        }
154
155        // this path has only 1 node, append [index] if required
156        return createXpathElementCheck(path, index);
157    }
158
159    /**
160     * Appends the provided index parameter in square brackets to the given name,
161     * like <code>path[index]</code>.<p>
162     *
163     * This method is used if it's clear that some path does not have
164     * a square bracket already appended.<p>
165     *
166     * @param path the path append the index to
167     * @param index the index to append
168     *
169     * @return the simplified Xpath for the given name
170     */
171    public static String createXpathElement(String path, int index) {
172
173        StringBuffer result = new StringBuffer(path.length() + 5);
174        result.append(path);
175        result.append('[');
176        result.append(index);
177        result.append(']');
178        return result.toString();
179    }
180
181    /**
182     * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p>
183     *
184     * This method is used if it's uncertain if some path does have
185     * a square bracket already appended or not.<p>
186     *
187     * Note: If the name already has the format <code>title[1]</code>, then provided index parameter
188     * is ignored.<p>
189     *
190     * @param path the path to get the simplified Xpath for
191     * @param index the index to append (if required)
192     *
193     * @return the simplified Xpath for the given name
194     */
195    public static String createXpathElementCheck(String path, int index) {
196
197        if (path.charAt(path.length() - 1) == ']') {
198            // path is already in the form "title[1]"
199            // ignore provided index and return the path "as is"
200            return path;
201        }
202
203        // append index in square brackets
204        return createXpathElement(path, index);
205    }
206
207    /**
208     * Returns the first Xpath element from the provided path,
209     * without the index value.<p>
210     *
211     * Examples:<br>
212     * <code>title</code> is left untouched<br>
213     * <code>title[1]</code> becomes <code>title</code><br>
214     * <code>title/subtitle</code> becomes <code>title</code><br>
215     * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p>
216     *
217     * @param path the path to get the first Xpath element from
218     *
219     * @return the first Xpath element from the provided path
220     */
221    public static String getFirstXpathElement(String path) {
222
223        int pos = path.indexOf('/');
224        if (pos >= 0) {
225            path = path.substring(0, pos);
226        }
227
228        return CmsXmlUtils.removeXpathIndex(path);
229    }
230
231    /**
232     * Returns the last Xpath element from the provided path,
233     * without the index value.<p>
234     *
235     * Examples:<br>
236     * <code>title</code> is left untouched<br>
237     * <code>title[1]</code> becomes <code>title</code><br>
238     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
239     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p>
240     *
241     * @param path the path to get the last Xpath element from
242     *
243     * @return the last Xpath element from the provided path
244     */
245    public static String getLastXpathElement(String path) {
246
247        int pos = path.lastIndexOf('/');
248        if (pos >= 0) {
249            path = path.substring(pos + 1);
250        }
251
252        return CmsXmlUtils.removeXpathIndex(path);
253    }
254
255    /**
256     * Returns the last Xpath index from the given path.<p>
257     *
258     * Examples:<br>
259     * <code>title</code> returns the empty String<p>
260     * <code>title[1]</code> returns <code>[1]</code><p>
261     * <code>title/subtitle</code> returns them empty String<p>
262     * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p>
263     *
264     * @param path the path to extract the Xpath index from
265     *
266     * @return  the last Xpath index from the given path
267     */
268    public static String getXpathIndex(String path) {
269
270        int pos1 = path.lastIndexOf('/');
271        int pos2 = path.lastIndexOf('[');
272        if ((pos2 < 0) || (pos1 > pos2)) {
273            return "";
274        }
275
276        return path.substring(pos2);
277    }
278
279    /**
280     * Returns the last Xpath index from the given path as integer.<p>
281     *
282     * Examples:<br>
283     * <code>title</code> returns 1<p>
284     * <code>title[1]</code> returns 1<p>
285     * <code>title/subtitle</code> returns 1<p>
286     * <code>title[1]/subtitle[2]</code> returns 2<p>
287     *
288     * @param path the path to extract the Xpath index from
289     *
290     * @return the last Xpath index from the given path as integer
291     */
292    public static int getXpathIndexInt(String path) {
293
294        int pos1 = path.lastIndexOf('/');
295        int pos2 = path.lastIndexOf('[');
296        if ((pos2 < 0) || (pos1 > pos2)) {
297            return 1;
298        }
299
300        String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']'));
301        try {
302            return Integer.parseInt(idxStr);
303        } catch (NumberFormatException e) {
304            // NOOP
305        }
306        return 1;
307    }
308
309    /**
310     * Returns <code>true</code> if the given path is a Xpath with
311     * at least 2 elements.<p>
312     *
313     * Examples:<br>
314     * <code>title</code> returns <code>false</code><br>
315     * <code>title[1]</code> returns <code>false</code><br>
316     * <code>title/subtitle</code> returns <code>true</code><br>
317     * <code>title[1]/subtitle[1]</code> returns <code>true</code><p>
318     *
319     * @param path the path to check
320     * @return true if the given path is a Xpath with at least 2 elements
321     */
322    public static boolean isDeepXpath(String path) {
323
324        return path.indexOf('/') > 0;
325    }
326
327    /**
328     * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p>
329     *
330     * @param document the XML document to marshal
331     * @param out the output stream to write to
332     * @param encoding the encoding to use
333     * @return the output stream with the xml content
334     * @throws CmsXmlException if something goes wrong
335     */
336    public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException {
337
338        try {
339            OutputFormat format = OutputFormat.createPrettyPrint();
340            format.setEncoding(encoding);
341
342            XMLWriter writer = new XMLWriter(out, format);
343            writer.setEscapeText(false);
344
345            writer.write(document);
346            writer.close();
347
348        } catch (Exception e) {
349            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
350        }
351
352        return out;
353    }
354
355    /**
356     * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p>
357     *
358     * @param document the XML document to marshal
359     * @param encoding the encoding to use
360     * @return the marshalled XML document
361     * @throws CmsXmlException if something goes wrong
362     */
363    public static String marshal(Document document, String encoding) throws CmsXmlException {
364
365        ByteArrayOutputStream out = new ByteArrayOutputStream();
366        marshal(document, out, encoding);
367        try {
368            return out.toString(encoding);
369        } catch (UnsupportedEncodingException e) {
370            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e);
371        }
372    }
373
374    /**
375     * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p>
376     *
377     * @param node the XML node to marshal
378     * @param encoding the encoding to use
379     *
380     * @return the string with the xml content
381     *
382     * @throws CmsXmlException if something goes wrong
383     */
384    public static String marshal(Node node, String encoding) throws CmsXmlException {
385
386        ByteArrayOutputStream out = new ByteArrayOutputStream();
387        try {
388            OutputFormat format = OutputFormat.createPrettyPrint();
389            format.setEncoding(encoding);
390            format.setSuppressDeclaration(true);
391
392            XMLWriter writer = new XMLWriter(out, format);
393            writer.setEscapeText(false);
394
395            writer.write(node);
396            writer.close();
397        } catch (Exception e) {
398            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
399        }
400        return new String(out.toByteArray());
401    }
402
403    /**
404     * Removes all Xpath indices from the given path.<p>
405     *
406     * Example:<br>
407     * <code>title</code> is left untouched<br>
408     * <code>title[1]</code> becomes <code>title</code><br>
409     * <code>title/subtitle</code> is left untouched<br>
410     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
411     *
412     * @param path the path to remove the Xpath index from
413     *
414     * @return the path with all Xpath indices removed
415     */
416    public static String removeAllXpathIndices(String path) {
417
418        return path.replaceAll("\\[[0-9]+\\]", "");
419    }
420
421    /**
422     * Removes the first Xpath element from the path.<p>
423     *
424     * If the provided path does not contain a "/" character,
425     * it is returned unchanged.<p>
426     *
427     * <p>Examples:<br>
428     * <code>title</code> is left untouched<br>
429     * <code>title[1]</code> is left untouched<br>
430     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
431     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
432     *
433     * @param path the Xpath to remove the first element from
434     *
435     * @return the path with the first element removed
436     */
437    public static String removeFirstXpathElement(String path) {
438
439        int pos = path.indexOf('/');
440        if (pos < 0) {
441            return path;
442        }
443
444        return path.substring(pos + 1);
445    }
446
447    /**
448     * Removes the last complex Xpath element from the path.<p>
449     *
450     * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths.
451     *
452     * <p>Example:<br>
453     * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p>
454     *
455     * @param path the Xpath to remove the last element from
456     *
457     * @return the path with the last element removed
458     */
459    public static String removeLastComplexXpathElement(String path) {
460
461        int pos = path.lastIndexOf('/');
462        if (pos < 0) {
463            return path;
464        }
465        // count ' chars
466        int p = pos;
467        int count = -1;
468        while (p > 0) {
469            count++;
470            p = path.indexOf("\'", p + 1);
471        }
472        String parentPath = path.substring(0, pos);
473        if ((count % 2) == 0) {
474            // if substring is complete
475            return parentPath;
476        }
477        // if not complete
478        p = parentPath.lastIndexOf("'");
479        if (p >= 0) {
480            // complete it if possible
481            return removeLastComplexXpathElement(parentPath.substring(0, p));
482        }
483        return parentPath;
484    }
485
486    /**
487     * Removes the last Xpath element from the path.<p>
488     *
489     * If the provided path does not contain a "/" character,
490     * it is returned unchanged.<p>
491     *
492     * <p>Examples:<br>
493     * <code>title</code> is left untouched<br>
494     * <code>title[1]</code> is left untouched<br>
495     * <code>title/subtitle</code> becomes <code>title</code><br>
496     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p>
497     *
498     * @param path the Xpath to remove the last element from
499     *
500     * @return the path with the last element removed
501     */
502    public static String removeLastXpathElement(String path) {
503
504        int pos = path.lastIndexOf('/');
505        if (pos < 0) {
506            return path;
507        }
508
509        return path.substring(0, pos);
510    }
511
512    /**
513     * Removes all Xpath index information from the given input path.<p>
514     *
515     * Examples:<br>
516     * <code>title</code> is left untouched<br>
517     * <code>title[1]</code> becomes <code>title</code><br>
518     * <code>title/subtitle</code> is left untouched<br>
519     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
520     *
521     * @param path the path to remove the Xpath index information from
522     *
523     * @return the simplified Xpath for the given name
524     */
525    public static String removeXpath(String path) {
526
527        if (path.indexOf('/') > -1) {
528            // this is a complex path over more then 1 node
529            StringBuffer result = new StringBuffer(path.length() + 32);
530
531            // split the path into sub-elements
532            List<String> elements = CmsStringUtil.splitAsList(path, '/');
533            int end = elements.size() - 1;
534            for (int i = 0; i <= end; i++) {
535                // remove [i] from path element if required
536                result.append(removeXpathIndex(elements.get(i)));
537                if (i < end) {
538                    // append path delimiter if not final path element
539                    result.append('/');
540                }
541            }
542            return result.toString();
543        }
544
545        // this path has only 1 node, remove last index if required
546        return removeXpathIndex(path);
547    }
548
549    /**
550     * Removes the last Xpath index from the given path.<p>
551     *
552     * Examples:<br>
553     * <code>title</code> is left untouched<br>
554     * <code>title[1]</code> becomes <code>title</code><br>
555     * <code>title/subtitle</code> is left untouched<br>
556     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p>
557     *
558     * @param path the path to remove the Xpath index from
559     *
560     * @return the path with the last Xpath index removed
561     */
562    public static String removeXpathIndex(String path) {
563
564        int pos1 = path.lastIndexOf('/');
565        int pos2 = path.lastIndexOf('[');
566        if ((pos2 < 0) || (pos1 > pos2)) {
567            return path;
568        }
569
570        return path.substring(0, pos2);
571    }
572
573    /**
574     * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p>
575     *
576     * Examples:<br>
577     * <code>title/</code> becomes <code>title</code><br>
578     * <code>/title[1]/</code> becomes <code>title[1]</code><br>
579     * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br>
580     * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p>
581     *
582     * @param path the path to process
583     * @return the input with a leading and a trailing slash removed
584     */
585    public static String simplifyXpath(String path) {
586
587        StringBuffer result = new StringBuffer(path);
588        if (result.charAt(0) == '/') {
589            result.deleteCharAt(0);
590        }
591        int pos = result.length() - 1;
592        if (result.charAt(pos) == '/') {
593            result.deleteCharAt(pos);
594        }
595        return result.toString();
596    }
597
598    /**
599     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
600     *
601     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
602     *
603     * @param xmlData the XML data in a byte array
604     * @param resolver the XML entity resolver to use
605     *
606     * @return the base object initialized with the unmarshalled XML document
607     *
608     * @throws CmsXmlException if something goes wrong
609     *
610     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
611     */
612    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
613
614        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver);
615    }
616
617    /**
618     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
619     *
620     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
621     *
622     * @param xmlData the XML data in a byte array
623     * @param resolver the XML entity resolver to use
624     * @param validate if the reader should try to validate the xml code
625     *
626     * @return the base object initialized with the unmarshalled XML document
627     *
628     * @throws CmsXmlException if something goes wrong
629     *
630     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
631     */
632    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate)
633    throws CmsXmlException {
634
635        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate);
636    }
637
638    /**
639     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
640     *
641     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
642     *
643     * Important: The encoding provided will NOT be used during unmarshalling,
644     * the XML parser will do this on the base of the information in the source String.
645     * The encoding is used for initializing the created instance of the document,
646     * which means it will be used when marshalling the document again later.<p>
647     *
648     * @param source the XML input source to use
649     * @param resolver the XML entity resolver to use
650     *
651     * @return the unmarshalled XML document
652     *
653     * @throws CmsXmlException if something goes wrong
654     */
655    public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException {
656
657        return unmarshalHelper(source, resolver, false);
658    }
659
660    /**
661     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
662     *
663     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
664     *
665     * Important: The encoding provided will NOT be used during unmarshalling,
666     * the XML parser will do this on the base of the information in the source String.
667     * The encoding is used for initializing the created instance of the document,
668     * which means it will be used when marshalling the document again later.<p>
669     *
670     * @param source the XML input source to use
671     * @param resolver the XML entity resolver to use
672     * @param validate if the reader should try to validate the xml code
673     *
674     * @return the unmarshalled XML document
675     *
676     * @throws CmsXmlException if something goes wrong
677     */
678    public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate)
679    throws CmsXmlException {
680
681        try {
682            SAXReader reader = new SAXReader();
683            if (resolver != null) {
684                reader.setEntityResolver(resolver);
685            }
686            reader.setMergeAdjacentText(true);
687            reader.setStripWhitespaceText(true);
688            if (!validate) {
689                reader.setValidation(false);
690                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
691            }
692            return reader.read(source);
693        } catch (DocumentException e) {
694            throw new CmsXmlException(
695                Messages.get().container(
696                    Messages.ERR_UNMARSHALLING_XML_DOC_1,
697                    "(systemId = " + source.getSystemId() + ")"),
698                e);
699        } catch (SAXException e) {
700            throw new CmsXmlException(
701                Messages.get().container(
702                    Messages.ERR_UNMARSHALLING_XML_DOC_1,
703                    "(systemId = " + source.getSystemId() + ")"),
704                e);
705        }
706    }
707
708    /**
709     * Helper to unmarshal (read) xml contents from a String into a document.<p>
710     *
711     * Using this method ensures that the OpenCms XML entitiy resolver is used.<p>
712     *
713     * @param xmlData the xml data in a String
714     * @param resolver the XML entity resolver to use
715     * @return the base object initialized with the unmarshalled XML document
716     * @throws CmsXmlException if something goes wrong
717     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
718     */
719    public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException {
720
721        return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver);
722    }
723
724    /**
725     * Validates the structure of a XML document contained in a byte array
726     * with the DTD or XML schema used by the document.<p>
727     *
728     * @param xmlData a byte array containing a XML document that should be validated
729     * @param resolver the XML entity resolver to use
730     *
731     * @throws CmsXmlException if the validation fails
732     */
733    public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
734
735        validateXmlStructure(new ByteArrayInputStream(xmlData), resolver);
736    }
737
738    /**
739     * Validates the structure of a XML document with the DTD or XML schema used
740     * by the document.<p>
741     *
742     * @param document a XML document that should be validated
743     * @param encoding the encoding to use when marshalling the XML document (required)
744     * @param resolver the XML entity resolver to use
745     *
746     * @throws CmsXmlException if the validation fails
747     */
748    public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver)
749    throws CmsXmlException {
750
751        // generate bytes from document
752        byte[] xmlData = ((ByteArrayOutputStream)marshal(
753            document,
754            new ByteArrayOutputStream(512),
755            encoding)).toByteArray();
756        validateXmlStructure(xmlData, resolver);
757    }
758
759    /**
760     * Validates the structure of a XML document contained in a byte array
761     * with the DTD or XML schema used by the document.<p>
762     *
763     * @param xmlStream a source providing a XML document that should be validated
764     * @param resolver the XML entity resolver to use
765     *
766     * @throws CmsXmlException if the validation fails
767     */
768    public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException {
769
770        XMLReader reader;
771        try {
772            reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
773        } catch (SAXException e) {
774            // xerces parser not available - no schema validation possible
775            if (LOG.isWarnEnabled()) {
776                LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e);
777            }
778            // no validation of the content is possible
779            return;
780        }
781        // turn on validation
782        try {
783            reader.setFeature("http://xml.org/sax/features/validation", true);
784            // turn on schema validation
785            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
786            // configure namespace support
787            reader.setFeature("http://xml.org/sax/features/namespaces", true);
788            reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
789        } catch (SAXNotRecognizedException e) {
790            // should not happen as Xerces 2 support this feature
791            if (LOG.isWarnEnabled()) {
792                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e);
793            }
794            // no validation of the content is possible
795            return;
796        } catch (SAXNotSupportedException e) {
797            // should not happen as Xerces 2 support this feature
798            if (LOG.isWarnEnabled()) {
799                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e);
800            }
801            // no validation of the content is possible
802            return;
803        }
804
805        // add an error handler which turns any errors into XML
806        CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler();
807        reader.setErrorHandler(errorHandler);
808
809        if (resolver != null) {
810            // set the resolver for the "opencms://" URIs
811            reader.setEntityResolver(resolver);
812        }
813
814        try {
815            reader.parse(new InputSource(xmlStream));
816        } catch (IOException e) {
817            // should not happen since we read form a byte array
818            if (LOG.isErrorEnabled()) {
819                LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e);
820            }
821            return;
822        } catch (SAXException e) {
823            // should not happen since all errors are handled in the XML error handler
824            if (LOG.isErrorEnabled()) {
825                LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e);
826            }
827            return;
828        }
829
830        if (errorHandler.getErrors().elements().size() > 0) {
831            // there was at last one validation error, so throw an exception
832            StringWriter out = new StringWriter(256);
833            OutputFormat format = OutputFormat.createPrettyPrint();
834            XMLWriter writer = new XMLWriter(out, format);
835            try {
836                writer.write(errorHandler.getErrors());
837                writer.write(errorHandler.getWarnings());
838                writer.close();
839            } catch (IOException e) {
840                // should not happen since we write to a StringWriter
841                if (LOG.isErrorEnabled()) {
842                    LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e);
843                }
844            }
845            // generate String from XML for display of document in error message
846            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString()));
847        }
848    }
849}