001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.file.CmsResource;
031import org.opencms.main.CmsLog;
032import org.opencms.util.CmsStringUtil;
033
034import java.io.ByteArrayInputStream;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.io.OutputStream;
039import java.io.StringReader;
040import java.io.StringWriter;
041import java.io.UnsupportedEncodingException;
042import java.util.List;
043
044import javax.xml.parsers.SAXParserFactory;
045
046import org.apache.commons.logging.Log;
047import org.apache.xerces.parsers.SAXParser;
048
049import org.dom4j.Document;
050import org.dom4j.DocumentException;
051import org.dom4j.Node;
052import org.dom4j.io.OutputFormat;
053import org.dom4j.io.SAXReader;
054import org.dom4j.io.XMLWriter;
055import org.xml.sax.EntityResolver;
056import org.xml.sax.InputSource;
057import org.xml.sax.SAXException;
058import org.xml.sax.SAXNotRecognizedException;
059import org.xml.sax.SAXNotSupportedException;
060import org.xml.sax.XMLReader;
061import org.xml.sax.helpers.XMLReaderFactory;
062
063/**
064 * Provides some basic XML handling utilities.<p>
065 *
066 * @since 6.0.0
067 */
068public final class CmsXmlUtils {
069
070    /**
071     * This class is only used to expose the XML parser configuration implementation name.<p>
072     */
073    private static class ParserImpl extends SAXParser {
074
075        /**
076         * Constructor.<p>
077         */
078        ParserImpl() {
079            super();
080        }
081
082        /**
083         * Returns the implementation name of the used XML parser configuration.<p>
084         *
085         * @return the implementation name
086         */
087        String getConfigImplName() {
088
089            if (fConfiguration != null) {
090                return fConfiguration.getClass().getName();
091            } else {
092                return null;
093            }
094        }
095    }
096
097    /** The log object for this class. */
098    private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class);
099
100    /** Key of the SAX parser configuration system property. */
101    private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration";
102
103    /** Key of the SAX parser factory system property. */
104    private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory";
105
106    /** Key of the XML reader system property. */
107    private static final String XML_READER_KEY = "org.xml.sax.driver";
108
109    /**
110     * Prevents instances of this class from being generated.<p>
111     */
112    private CmsXmlUtils() {
113
114        // noop
115    }
116
117    /**
118     * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p>
119     *
120     * Use this method if it's uncertain if the given arguments are starting or ending with
121     * a slash "/".<p>
122     *
123     * Examples:<br>
124     * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br>
125     * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br>
126     * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p>
127     *
128     * @param prefix the prefix Xpath
129     * @param suffix the suffix Xpath
130     *
131     * @return the concatenated Xpath build from prefix and suffix
132     */
133    public static String concatXpath(String prefix, String suffix) {
134
135        if (suffix == null) {
136            // ensure suffix is not null
137            suffix = "";
138        } else {
139            if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) {
140                // remove leading '/' form suffix
141                suffix = suffix.substring(1);
142            }
143        }
144        if (prefix != null) {
145            StringBuffer result = new StringBuffer(32);
146            result.append(prefix);
147            if (!CmsResource.isFolder(prefix)) {
148                result.append('/');
149            }
150            result.append(suffix);
151            return result.toString();
152        }
153        return suffix;
154    }
155
156    /**
157     * Translates a simple lookup path to the simplified Xpath format used for
158     * the internal bookmarks.<p>
159     *
160     * Examples:<br>
161     * <code>title</code> becomes <code>title[1]</code><br>
162     * <code>title[1]</code> is left untouched<br>
163     * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br>
164     * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p>
165     *
166     * Note: If the name already has the format <code>title[1]</code> then provided index parameter
167     * is ignored.<p>
168     *
169     * @param path the path to get the simplified Xpath for
170     * @param index the index to append (if required)
171     *
172     * @return the simplified Xpath for the given name
173     */
174    public static String createXpath(String path, int index) {
175
176        if (path.indexOf('/') > -1) {
177            // this is a complex path over more then 1 node
178            StringBuffer result = new StringBuffer(path.length() + 32);
179
180            // split the path into sub elements
181            List<String> elements = CmsStringUtil.splitAsList(path, '/');
182            int end = elements.size() - 1;
183            for (int i = 0; i <= end; i++) {
184                // append [i] to path element if required
185                result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1));
186                if (i < end) {
187                    // append path delimiter if not final path element
188                    result.append('/');
189                }
190            }
191            return result.toString();
192        }
193
194        // this path has only 1 node, append [index] if required
195        return createXpathElementCheck(path, index);
196    }
197
198    /**
199     * Appends the provided index parameter in square brackets to the given name,
200     * like <code>path[index]</code>.<p>
201     *
202     * This method is used if it's clear that some path does not have
203     * a square bracket already appended.<p>
204     *
205     * @param path the path append the index to
206     * @param index the index to append
207     *
208     * @return the simplified Xpath for the given name
209     */
210    public static String createXpathElement(String path, int index) {
211
212        StringBuffer result = new StringBuffer(path.length() + 5);
213        result.append(path);
214        result.append('[');
215        result.append(index);
216        result.append(']');
217        return result.toString();
218    }
219
220    /**
221     * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p>
222     *
223     * This method is used if it's uncertain if some path does have
224     * a square bracket already appended or not.<p>
225     *
226     * Note: If the name already has the format <code>title[1]</code>, then provided index parameter
227     * is ignored.<p>
228     *
229     * @param path the path to get the simplified Xpath for
230     * @param index the index to append (if required)
231     *
232     * @return the simplified Xpath for the given name
233     */
234    public static String createXpathElementCheck(String path, int index) {
235
236        if (path.charAt(path.length() - 1) == ']') {
237            // path is already in the form "title[1]"
238            // ignore provided index and return the path "as is"
239            return path;
240        }
241
242        // append index in square brackets
243        return createXpathElement(path, index);
244    }
245
246    /**
247     * Returns the first Xpath element from the provided path,
248     * without the index value.<p>
249     *
250     * Examples:<br>
251     * <code>title</code> is left untouched<br>
252     * <code>title[1]</code> becomes <code>title</code><br>
253     * <code>title/subtitle</code> becomes <code>title</code><br>
254     * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p>
255     *
256     * @param path the path to get the first Xpath element from
257     *
258     * @return the first Xpath element from the provided path
259     */
260    public static String getFirstXpathElement(String path) {
261
262        int pos = path.indexOf('/');
263        if (pos >= 0) {
264            path = path.substring(0, pos);
265        }
266
267        return CmsXmlUtils.removeXpathIndex(path);
268    }
269
270    /**
271     * Returns the last Xpath element from the provided path,
272     * without the index value.<p>
273     *
274     * Examples:<br>
275     * <code>title</code> is left untouched<br>
276     * <code>title[1]</code> becomes <code>title</code><br>
277     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
278     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p>
279     *
280     * @param path the path to get the last Xpath element from
281     *
282     * @return the last Xpath element from the provided path
283     */
284    public static String getLastXpathElement(String path) {
285
286        int pos = path.lastIndexOf('/');
287        if (pos >= 0) {
288            path = path.substring(pos + 1);
289        }
290
291        return CmsXmlUtils.removeXpathIndex(path);
292    }
293
294    /**
295     * Returns the last Xpath index from the given path.<p>
296     *
297     * Examples:<br>
298     * <code>title</code> returns the empty String<p>
299     * <code>title[1]</code> returns <code>[1]</code><p>
300     * <code>title/subtitle</code> returns them empty String<p>
301     * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p>
302     *
303     * @param path the path to extract the Xpath index from
304     *
305     * @return  the last Xpath index from the given path
306     */
307    public static String getXpathIndex(String path) {
308
309        int pos1 = path.lastIndexOf('/');
310        int pos2 = path.lastIndexOf('[');
311        if ((pos2 < 0) || (pos1 > pos2)) {
312            return "";
313        }
314
315        return path.substring(pos2);
316    }
317
318    /**
319     * Returns the last Xpath index from the given path as integer.<p>
320     *
321     * Examples:<br>
322     * <code>title</code> returns 1<p>
323     * <code>title[1]</code> returns 1<p>
324     * <code>title/subtitle</code> returns 1<p>
325     * <code>title[1]/subtitle[2]</code> returns 2<p>
326     *
327     * @param path the path to extract the Xpath index from
328     *
329     * @return the last Xpath index from the given path as integer
330     */
331    public static int getXpathIndexInt(String path) {
332
333        int pos1 = path.lastIndexOf('/');
334        int pos2 = path.lastIndexOf('[');
335        if ((pos2 < 0) || (pos1 > pos2)) {
336            return 1;
337        }
338
339        String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']'));
340        try {
341            return Integer.parseInt(idxStr);
342        } catch (NumberFormatException e) {
343            // NOOP
344        }
345        return 1;
346    }
347
348    /**
349     * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p>
350     * This is done for performance improvements only.<p>
351     */
352    public static void initSystemProperties() {
353
354        String implName;
355        // initialize system properties
356        if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) {
357            implName = SAXParserFactory.newInstance().getClass().getName();
358            LOG.info("Setting sax parser factory impl property to " + implName);
359            System.setProperty(SAX_PARSER_FACTORY_KEY, implName);
360        }
361        if (System.getProperty(XML_READER_KEY) == null) {
362            SAXReader reader = new SAXReader();
363            try {
364                implName = reader.getXMLReader().getClass().getName();
365                LOG.info("Setting xml reader impl property to " + implName);
366                System.setProperty(XML_READER_KEY, implName);
367            } catch (SAXException e) {
368                LOG.error("Error evaluating XMLReader impl.", e);
369            }
370        }
371        if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) {
372            ParserImpl saxParser = new ParserImpl();
373            implName = saxParser.getConfigImplName();
374            if (implName != null) {
375                LOG.info("Setting xml parser configuration impl property to " + implName);
376                System.setProperty(SAX_PARSER_CONFIG_KEY, implName);
377            }
378        }
379    }
380
381    /**
382     * Returns <code>true</code> if the given path is a Xpath with
383     * at least 2 elements.<p>
384     *
385     * Examples:<br>
386     * <code>title</code> returns <code>false</code><br>
387     * <code>title[1]</code> returns <code>false</code><br>
388     * <code>title/subtitle</code> returns <code>true</code><br>
389     * <code>title[1]/subtitle[1]</code> returns <code>true</code><p>
390     *
391     * @param path the path to check
392     * @return true if the given path is a Xpath with at least 2 elements
393     */
394    public static boolean isDeepXpath(String path) {
395
396        return path.indexOf('/') > 0;
397    }
398
399    /**
400     * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p>
401     *
402     * @param document the XML document to marshal
403     * @param out the output stream to write to
404     * @param encoding the encoding to use
405     * @return the output stream with the xml content
406     * @throws CmsXmlException if something goes wrong
407     */
408    public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException {
409
410        try {
411            OutputFormat format = OutputFormat.createPrettyPrint();
412            format.setEncoding(encoding);
413
414            XMLWriter writer = new XMLWriter(out, format);
415            writer.setEscapeText(false);
416
417            writer.write(document);
418            writer.close();
419
420        } catch (Exception e) {
421            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
422        }
423
424        return out;
425    }
426
427    /**
428     * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p>
429     *
430     * @param document the XML document to marshal
431     * @param encoding the encoding to use
432     * @return the marshalled XML document
433     * @throws CmsXmlException if something goes wrong
434     */
435    public static String marshal(Document document, String encoding) throws CmsXmlException {
436
437        ByteArrayOutputStream out = new ByteArrayOutputStream();
438        marshal(document, out, encoding);
439        try {
440            return out.toString(encoding);
441        } catch (UnsupportedEncodingException e) {
442            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e);
443        }
444    }
445
446    /**
447     * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p>
448     *
449     * @param node the XML node to marshal
450     * @param encoding the encoding to use
451     *
452     * @return the string with the xml content
453     *
454     * @throws CmsXmlException if something goes wrong
455     */
456    public static String marshal(Node node, String encoding) throws CmsXmlException {
457
458        ByteArrayOutputStream out = new ByteArrayOutputStream();
459        try {
460            OutputFormat format = OutputFormat.createPrettyPrint();
461            format.setEncoding(encoding);
462            format.setSuppressDeclaration(true);
463
464            XMLWriter writer = new XMLWriter(out, format);
465            writer.setEscapeText(false);
466
467            writer.write(node);
468            writer.close();
469        } catch (Exception e) {
470            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
471        }
472        return new String(out.toByteArray());
473    }
474
475    /**
476     * Removes all Xpath indices from the given path.<p>
477     *
478     * Example:<br>
479     * <code>title</code> is left untouched<br>
480     * <code>title[1]</code> becomes <code>title</code><br>
481     * <code>title/subtitle</code> is left untouched<br>
482     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
483     *
484     * @param path the path to remove the Xpath index from
485     *
486     * @return the path with all Xpath indices removed
487     */
488    public static String removeAllXpathIndices(String path) {
489
490        return path.replaceAll("\\[[0-9]+\\]", "");
491    }
492
493    /**
494     * Removes the first Xpath element from the path.<p>
495     *
496     * If the provided path does not contain a "/" character,
497     * it is returned unchanged.<p>
498     *
499     * <p>Examples:<br>
500     * <code>title</code> is left untouched<br>
501     * <code>title[1]</code> is left untouched<br>
502     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
503     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
504     *
505     * @param path the Xpath to remove the first element from
506     *
507     * @return the path with the first element removed
508     */
509    public static String removeFirstXpathElement(String path) {
510
511        int pos = path.indexOf('/');
512        if (pos < 0) {
513            return path;
514        }
515
516        return path.substring(pos + 1);
517    }
518
519    /**
520     * Removes the last complex Xpath element from the path.<p>
521     *
522     * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths.
523     *
524     * <p>Example:<br>
525     * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p>
526     *
527     * @param path the Xpath to remove the last element from
528     *
529     * @return the path with the last element removed
530     */
531    public static String removeLastComplexXpathElement(String path) {
532
533        int pos = path.lastIndexOf('/');
534        if (pos < 0) {
535            return path;
536        }
537        // count ' chars
538        int p = pos;
539        int count = -1;
540        while (p > 0) {
541            count++;
542            p = path.indexOf("\'", p + 1);
543        }
544        String parentPath = path.substring(0, pos);
545        if ((count % 2) == 0) {
546            // if substring is complete
547            return parentPath;
548        }
549        // if not complete
550        p = parentPath.lastIndexOf("'");
551        if (p >= 0) {
552            // complete it if possible
553            return removeLastComplexXpathElement(parentPath.substring(0, p));
554        }
555        return parentPath;
556    }
557
558    /**
559     * Removes the last Xpath element from the path.<p>
560     *
561     * If the provided path does not contain a "/" character,
562     * it is returned unchanged.<p>
563     *
564     * <p>Examples:<br>
565     * <code>title</code> is left untouched<br>
566     * <code>title[1]</code> is left untouched<br>
567     * <code>title/subtitle</code> becomes <code>title</code><br>
568     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p>
569     *
570     * @param path the Xpath to remove the last element from
571     *
572     * @return the path with the last element removed
573     */
574    public static String removeLastXpathElement(String path) {
575
576        int pos = path.lastIndexOf('/');
577        if (pos < 0) {
578            return path;
579        }
580
581        return path.substring(0, pos);
582    }
583
584    /**
585     * Removes all Xpath index information from the given input path.<p>
586     *
587     * Examples:<br>
588     * <code>title</code> is left untouched<br>
589     * <code>title[1]</code> becomes <code>title</code><br>
590     * <code>title/subtitle</code> is left untouched<br>
591     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
592     *
593     * @param path the path to remove the Xpath index information from
594     *
595     * @return the simplified Xpath for the given name
596     */
597    public static String removeXpath(String path) {
598
599        if (path.indexOf('/') > -1) {
600            // this is a complex path over more then 1 node
601            StringBuffer result = new StringBuffer(path.length() + 32);
602
603            // split the path into sub-elements
604            List<String> elements = CmsStringUtil.splitAsList(path, '/');
605            int end = elements.size() - 1;
606            for (int i = 0; i <= end; i++) {
607                // remove [i] from path element if required
608                result.append(removeXpathIndex(elements.get(i)));
609                if (i < end) {
610                    // append path delimiter if not final path element
611                    result.append('/');
612                }
613            }
614            return result.toString();
615        }
616
617        // this path has only 1 node, remove last index if required
618        return removeXpathIndex(path);
619    }
620
621    /**
622     * Removes the last Xpath index from the given path.<p>
623     *
624     * Examples:<br>
625     * <code>title</code> is left untouched<br>
626     * <code>title[1]</code> becomes <code>title</code><br>
627     * <code>title/subtitle</code> is left untouched<br>
628     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p>
629     *
630     * @param path the path to remove the Xpath index from
631     *
632     * @return the path with the last Xpath index removed
633     */
634    public static String removeXpathIndex(String path) {
635
636        int pos1 = path.lastIndexOf('/');
637        int pos2 = path.lastIndexOf('[');
638        if ((pos2 < 0) || (pos1 > pos2)) {
639            return path;
640        }
641
642        return path.substring(0, pos2);
643    }
644
645    /**
646     * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p>
647     *
648     * Examples:<br>
649     * <code>title/</code> becomes <code>title</code><br>
650     * <code>/title[1]/</code> becomes <code>title[1]</code><br>
651     * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br>
652     * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p>
653     *
654     * @param path the path to process
655     * @return the input with a leading and a trailing slash removed
656     */
657    public static String simplifyXpath(String path) {
658
659        StringBuffer result = new StringBuffer(path);
660        if (result.charAt(0) == '/') {
661            result.deleteCharAt(0);
662        }
663        int pos = result.length() - 1;
664        if (result.charAt(pos) == '/') {
665            result.deleteCharAt(pos);
666        }
667        return result.toString();
668    }
669
670    /**
671     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
672     *
673     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
674     *
675     * @param xmlData the XML data in a byte array
676     * @param resolver the XML entity resolver to use
677     *
678     * @return the base object initialized with the unmarshalled XML document
679     *
680     * @throws CmsXmlException if something goes wrong
681     *
682     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
683     */
684    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
685
686        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver);
687    }
688
689    /**
690     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
691     *
692     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
693     *
694     * @param xmlData the XML data in a byte array
695     * @param resolver the XML entity resolver to use
696     * @param validate if the reader should try to validate the xml code
697     *
698     * @return the base object initialized with the unmarshalled XML document
699     *
700     * @throws CmsXmlException if something goes wrong
701     *
702     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
703     */
704    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate)
705    throws CmsXmlException {
706
707        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate);
708    }
709
710    /**
711     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
712     *
713     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
714     *
715     * Important: The encoding provided will NOT be used during unmarshalling,
716     * the XML parser will do this on the base of the information in the source String.
717     * The encoding is used for initializing the created instance of the document,
718     * which means it will be used when marshalling the document again later.<p>
719     *
720     * @param source the XML input source to use
721     * @param resolver the XML entity resolver to use
722     *
723     * @return the unmarshalled XML document
724     *
725     * @throws CmsXmlException if something goes wrong
726     */
727    public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException {
728
729        return unmarshalHelper(source, resolver, false);
730    }
731
732    /**
733     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
734     *
735     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
736     *
737     * Important: The encoding provided will NOT be used during unmarshalling,
738     * the XML parser will do this on the base of the information in the source String.
739     * The encoding is used for initializing the created instance of the document,
740     * which means it will be used when marshalling the document again later.<p>
741     *
742     * @param source the XML input source to use
743     * @param resolver the XML entity resolver to use
744     * @param validate if the reader should try to validate the xml code
745     *
746     * @return the unmarshalled XML document
747     *
748     * @throws CmsXmlException if something goes wrong
749     */
750    public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate)
751    throws CmsXmlException {
752
753        try {
754            SAXReader reader = new SAXReader();
755            if (resolver != null) {
756                reader.setEntityResolver(resolver);
757            }
758            reader.setMergeAdjacentText(true);
759            reader.setStripWhitespaceText(true);
760            if (!validate) {
761                reader.setValidation(false);
762                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
763            }
764            return reader.read(source);
765        } catch (DocumentException e) {
766            throw new CmsXmlException(
767                Messages.get().container(
768                    Messages.ERR_UNMARSHALLING_XML_DOC_1,
769                    "(systemId = " + source.getSystemId() + ")"),
770                e);
771        } catch (SAXException e) {
772            throw new CmsXmlException(
773                Messages.get().container(
774                    Messages.ERR_UNMARSHALLING_XML_DOC_1,
775                    "(systemId = " + source.getSystemId() + ")"),
776                e);
777        }
778    }
779
780    /**
781     * Helper to unmarshal (read) xml contents from a String into a document.<p>
782     *
783     * Using this method ensures that the OpenCms XML entitiy resolver is used.<p>
784     *
785     * @param xmlData the xml data in a String
786     * @param resolver the XML entity resolver to use
787     * @return the base object initialized with the unmarshalled XML document
788     * @throws CmsXmlException if something goes wrong
789     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
790     */
791    public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException {
792
793        return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver);
794    }
795
796    /**
797     * Validates the structure of a XML document contained in a byte array
798     * with the DTD or XML schema used by the document.<p>
799     *
800     * @param xmlData a byte array containing a XML document that should be validated
801     * @param resolver the XML entity resolver to use
802     *
803     * @throws CmsXmlException if the validation fails
804     */
805    public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
806
807        validateXmlStructure(new ByteArrayInputStream(xmlData), resolver);
808    }
809
810    /**
811     * Validates the structure of a XML document with the DTD or XML schema used
812     * by the document.<p>
813     *
814     * @param document a XML document that should be validated
815     * @param encoding the encoding to use when marshalling the XML document (required)
816     * @param resolver the XML entity resolver to use
817     *
818     * @throws CmsXmlException if the validation fails
819     */
820    public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver)
821    throws CmsXmlException {
822
823        // generate bytes from document
824        byte[] xmlData = ((ByteArrayOutputStream)marshal(
825            document,
826            new ByteArrayOutputStream(512),
827            encoding)).toByteArray();
828        validateXmlStructure(xmlData, resolver);
829    }
830
831    /**
832     * Validates the structure of a XML document contained in a byte array
833     * with the DTD or XML schema used by the document.<p>
834     *
835     * @param xmlStream a source providing a XML document that should be validated
836     * @param resolver the XML entity resolver to use
837     *
838     * @throws CmsXmlException if the validation fails
839     */
840    public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException {
841
842        XMLReader reader;
843        try {
844            reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
845        } catch (SAXException e) {
846            // xerces parser not available - no schema validation possible
847            if (LOG.isWarnEnabled()) {
848                LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e);
849            }
850            // no validation of the content is possible
851            return;
852        }
853        // turn on validation
854        try {
855            reader.setFeature("http://xml.org/sax/features/validation", true);
856            // turn on schema validation
857            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
858            // configure namespace support
859            reader.setFeature("http://xml.org/sax/features/namespaces", true);
860            reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
861        } catch (SAXNotRecognizedException e) {
862            // should not happen as Xerces 2 support this feature
863            if (LOG.isWarnEnabled()) {
864                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e);
865            }
866            // no validation of the content is possible
867            return;
868        } catch (SAXNotSupportedException e) {
869            // should not happen as Xerces 2 support this feature
870            if (LOG.isWarnEnabled()) {
871                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e);
872            }
873            // no validation of the content is possible
874            return;
875        }
876
877        // add an error handler which turns any errors into XML
878        CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler();
879        reader.setErrorHandler(errorHandler);
880
881        if (resolver != null) {
882            // set the resolver for the "opencms://" URIs
883            reader.setEntityResolver(resolver);
884        }
885
886        try {
887            reader.parse(new InputSource(xmlStream));
888        } catch (IOException e) {
889            // should not happen since we read form a byte array
890            if (LOG.isErrorEnabled()) {
891                LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e);
892            }
893            return;
894        } catch (SAXException e) {
895            // should not happen since all errors are handled in the XML error handler
896            if (LOG.isErrorEnabled()) {
897                LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e);
898            }
899            return;
900        }
901
902        if (errorHandler.getErrors().elements().size() > 0) {
903            // there was at last one validation error, so throw an exception
904            StringWriter out = new StringWriter(256);
905            OutputFormat format = OutputFormat.createPrettyPrint();
906            XMLWriter writer = new XMLWriter(out, format);
907            try {
908                writer.write(errorHandler.getErrors());
909                writer.write(errorHandler.getWarnings());
910                writer.close();
911            } catch (IOException e) {
912                // should not happen since we write to a StringWriter
913                if (LOG.isErrorEnabled()) {
914                    LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e);
915                }
916            }
917            // generate String from XML for display of document in error message
918            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString()));
919        }
920    }
921}