001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.file.CmsResource;
031import org.opencms.main.CmsLog;
032import org.opencms.util.CmsStringUtil;
033
034import java.io.ByteArrayInputStream;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.io.OutputStream;
039import java.io.StringReader;
040import java.io.StringWriter;
041import java.io.UnsupportedEncodingException;
042import java.util.Arrays;
043import java.util.List;
044import java.util.stream.Collectors;
045
046import javax.xml.parsers.SAXParserFactory;
047
048import org.apache.commons.logging.Log;
049import org.apache.xerces.parsers.SAXParser;
050
051import org.dom4j.Document;
052import org.dom4j.DocumentException;
053import org.dom4j.Node;
054import org.dom4j.io.OutputFormat;
055import org.dom4j.io.SAXReader;
056import org.dom4j.io.XMLWriter;
057import org.xml.sax.EntityResolver;
058import org.xml.sax.InputSource;
059import org.xml.sax.SAXException;
060import org.xml.sax.SAXNotRecognizedException;
061import org.xml.sax.SAXNotSupportedException;
062import org.xml.sax.XMLReader;
063import org.xml.sax.helpers.XMLReaderFactory;
064
065/**
066 * Provides some basic XML handling utilities.<p>
067 *
068 * @since 6.0.0
069 */
070public final class CmsXmlUtils {
071
072    /**
073     * This class is only used to expose the XML parser configuration implementation name.<p>
074     */
075    private static class ParserImpl extends SAXParser {
076
077        /**
078         * Constructor.<p>
079         */
080        ParserImpl() {
081
082            super();
083        }
084
085        /**
086         * Returns the implementation name of the used XML parser configuration.<p>
087         *
088         * @return the implementation name
089         */
090        String getConfigImplName() {
091
092            if (fConfiguration != null) {
093                return fConfiguration.getClass().getName();
094            } else {
095                return null;
096            }
097        }
098    }
099
100    /** The log object for this class. */
101    private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class);
102
103    /** Key of the SAX parser configuration system property. */
104    private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration";
105
106    /** Key of the SAX parser factory system property. */
107    private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory";
108
109    /** Key of the XML reader system property. */
110    private static final String XML_READER_KEY = "org.xml.sax.driver";
111
112    /**
113     * Prevents instances of this class from being generated.<p>
114     */
115    private CmsXmlUtils() {
116
117        // noop
118    }
119
120    /**
121     * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p>
122     *
123     * Use this method if it's uncertain if the given arguments are starting or ending with
124     * a slash "/".<p>
125     *
126     * Examples:<br>
127     * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br>
128     * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br>
129     * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p>
130     *
131     * @param prefix the prefix Xpath
132     * @param suffix the suffix Xpath
133     *
134     * @return the concatenated Xpath build from prefix and suffix
135     */
136    public static String concatXpath(String prefix, String suffix) {
137
138        if (suffix == null) {
139            // ensure suffix is not null
140            suffix = "";
141        } else {
142            if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) {
143                // remove leading '/' form suffix
144                suffix = suffix.substring(1);
145            }
146        }
147        if (prefix != null) {
148            StringBuffer result = new StringBuffer(32);
149            result.append(prefix);
150            if (!CmsResource.isFolder(prefix) && (suffix.length() > 0)) {
151                result.append('/');
152            }
153            result.append(suffix);
154            return result.toString();
155        }
156        return suffix;
157    }
158
159    /**
160     * Translates a simple lookup path to the simplified Xpath format used for
161     * the internal bookmarks.<p>
162     *
163     * Examples:<br>
164     * <code>title</code> becomes <code>title[1]</code><br>
165     * <code>title[1]</code> is left untouched<br>
166     * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br>
167     * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p>
168     *
169     * Note: If the name already has the format <code>title[1]</code> then provided index parameter
170     * is ignored.<p>
171     *
172     * @param path the path to get the simplified Xpath for
173     * @param index the index to append (if required)
174     *
175     * @return the simplified Xpath for the given name
176     */
177    public static String createXpath(String path, int index) {
178
179        if (path.indexOf('/') > -1) {
180            // this is a complex path over more then 1 node
181            StringBuffer result = new StringBuffer(path.length() + 32);
182
183            // split the path into sub elements
184            List<String> elements = CmsStringUtil.splitAsList(path, '/');
185            int end = elements.size() - 1;
186            for (int i = 0; i <= end; i++) {
187                // append [i] to path element if required
188                result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1));
189                if (i < end) {
190                    // append path delimiter if not final path element
191                    result.append('/');
192                }
193            }
194            return result.toString();
195        }
196
197        // this path has only 1 node, append [index] if required
198        return createXpathElementCheck(path, index);
199    }
200
201    /**
202     * Appends the provided index parameter in square brackets to the given name,
203     * like <code>path[index]</code>.<p>
204     *
205     * This method is used if it's clear that some path does not have
206     * a square bracket already appended.<p>
207     *
208     * @param path the path append the index to
209     * @param index the index to append
210     *
211     * @return the simplified Xpath for the given name
212     */
213    public static String createXpathElement(String path, int index) {
214
215        StringBuffer result = new StringBuffer(path.length() + 5);
216        result.append(path);
217        result.append('[');
218        result.append(index);
219        result.append(']');
220        return result.toString();
221    }
222
223    /**
224     * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p>
225     *
226     * This method is used if it's uncertain if some path does have
227     * a square bracket already appended or not.<p>
228     *
229     * Note: If the name already has the format <code>title[1]</code>, then provided index parameter
230     * is ignored.<p>
231     *
232     * @param path the path to get the simplified Xpath for
233     * @param index the index to append (if required)
234     *
235     * @return the simplified Xpath for the given name
236     */
237    public static String createXpathElementCheck(String path, int index) {
238
239        if (path.charAt(path.length() - 1) == ']') {
240            // path is already in the form "title[1]"
241            // ignore provided index and return the path "as is"
242            return path;
243        }
244
245        // append index in square brackets
246        return createXpathElement(path, index);
247    }
248
249    /**
250     * Returns the first Xpath element from the provided path,
251     * without the index value.<p>
252     *
253     * Examples:<br>
254     * <code>title</code> is left untouched<br>
255     * <code>title[1]</code> becomes <code>title</code><br>
256     * <code>title/subtitle</code> becomes <code>title</code><br>
257     * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p>
258     *
259     * @param path the path to get the first Xpath element from
260     *
261     * @return the first Xpath element from the provided path
262     */
263    public static String getFirstXpathElement(String path) {
264
265        int pos = path.indexOf('/');
266        if (pos >= 0) {
267            path = path.substring(0, pos);
268        }
269
270        return CmsXmlUtils.removeXpathIndex(path);
271    }
272
273    /**
274     * Returns the last Xpath element from the provided path,
275     * without the index value.<p>
276     *
277     * Examples:<br>
278     * <code>title</code> is left untouched<br>
279     * <code>title[1]</code> becomes <code>title</code><br>
280     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
281     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p>
282     *
283     * @param path the path to get the last Xpath element from
284     *
285     * @return the last Xpath element from the provided path
286     */
287    public static String getLastXpathElement(String path) {
288
289        int pos = path.lastIndexOf('/');
290        if (pos >= 0) {
291            path = path.substring(pos + 1);
292        }
293
294        return CmsXmlUtils.removeXpathIndex(path);
295    }
296
297    /**
298     * Returns the last Xpath index from the given path.<p>
299     *
300     * Examples:<br>
301     * <code>title</code> returns the empty String<p>
302     * <code>title[1]</code> returns <code>[1]</code><p>
303     * <code>title/subtitle</code> returns them empty String<p>
304     * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p>
305     *
306     * @param path the path to extract the Xpath index from
307     *
308     * @return  the last Xpath index from the given path
309     */
310    public static String getXpathIndex(String path) {
311
312        int pos1 = path.lastIndexOf('/');
313        int pos2 = path.lastIndexOf('[');
314        if ((pos2 < 0) || (pos1 > pos2)) {
315            return "";
316        }
317
318        return path.substring(pos2);
319    }
320
321    /**
322     * Returns the last Xpath index from the given path as integer.<p>
323     *
324     * Examples:<br>
325     * <code>title</code> returns 1<p>
326     * <code>title[1]</code> returns 1<p>
327     * <code>title/subtitle</code> returns 1<p>
328     * <code>title[1]/subtitle[2]</code> returns 2<p>
329     *
330     * @param path the path to extract the Xpath index from
331     *
332     * @return the last Xpath index from the given path as integer
333     */
334    public static int getXpathIndexInt(String path) {
335
336        int pos1 = path.lastIndexOf('/');
337        int pos2 = path.lastIndexOf('[');
338        if ((pos2 < 0) || (pos1 > pos2)) {
339            return 1;
340        }
341
342        String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']'));
343        try {
344            return Integer.parseInt(idxStr);
345        } catch (NumberFormatException e) {
346            // NOOP
347        }
348        return 1;
349    }
350
351    /**
352     * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p>
353     * This is done for performance improvements only.<p>
354     */
355    public static void initSystemProperties() {
356
357        String implName;
358        // initialize system properties
359        if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) {
360            implName = SAXParserFactory.newInstance().getClass().getName();
361            LOG.info("Setting sax parser factory impl property to " + implName);
362            System.setProperty(SAX_PARSER_FACTORY_KEY, implName);
363        }
364        if (System.getProperty(XML_READER_KEY) == null) {
365            SAXReader reader = new SAXReader();
366            try {
367                implName = reader.getXMLReader().getClass().getName();
368                LOG.info("Setting xml reader impl property to " + implName);
369                System.setProperty(XML_READER_KEY, implName);
370            } catch (SAXException e) {
371                LOG.error("Error evaluating XMLReader impl.", e);
372            }
373        }
374        if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) {
375            ParserImpl saxParser = new ParserImpl();
376            implName = saxParser.getConfigImplName();
377            if (implName != null) {
378                LOG.info("Setting xml parser configuration impl property to " + implName);
379                System.setProperty(SAX_PARSER_CONFIG_KEY, implName);
380            }
381        }
382    }
383
384    /**
385     * Returns <code>true</code> if the given path is a Xpath with
386     * at least 2 elements.<p>
387     *
388     * Examples:<br>
389     * <code>title</code> returns <code>false</code><br>
390     * <code>title[1]</code> returns <code>false</code><br>
391     * <code>title/subtitle</code> returns <code>true</code><br>
392     * <code>title[1]/subtitle[1]</code> returns <code>true</code><p>
393     *
394     * @param path the path to check
395     * @return true if the given path is a Xpath with at least 2 elements
396     */
397    public static boolean isDeepXpath(String path) {
398
399        return path.indexOf('/') > 0;
400    }
401
402    /**
403     * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p>
404     *
405     * @param document the XML document to marshal
406     * @param out the output stream to write to
407     * @param encoding the encoding to use
408     * @return the output stream with the xml content
409     * @throws CmsXmlException if something goes wrong
410     */
411    public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException {
412
413        try {
414            OutputFormat format = OutputFormat.createPrettyPrint();
415            format.setEncoding(encoding);
416
417            XMLWriter writer = new XMLWriter(out, format);
418            writer.setEscapeText(false);
419
420            writer.write(document);
421            writer.close();
422
423        } catch (Exception e) {
424            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
425        }
426
427        return out;
428    }
429
430    /**
431     * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p>
432     *
433     * @param document the XML document to marshal
434     * @param encoding the encoding to use
435     * @return the marshalled XML document
436     * @throws CmsXmlException if something goes wrong
437     */
438    public static String marshal(Document document, String encoding) throws CmsXmlException {
439
440        ByteArrayOutputStream out = new ByteArrayOutputStream();
441        marshal(document, out, encoding);
442        try {
443            return out.toString(encoding);
444        } catch (UnsupportedEncodingException e) {
445            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e);
446        }
447    }
448
449    /**
450     * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p>
451     *
452     * @param node the XML node to marshal
453     * @param encoding the encoding to use
454     *
455     * @return the string with the xml content
456     *
457     * @throws CmsXmlException if something goes wrong
458     */
459    public static String marshal(Node node, String encoding) throws CmsXmlException {
460
461        ByteArrayOutputStream out = new ByteArrayOutputStream();
462        try {
463            OutputFormat format = OutputFormat.createPrettyPrint();
464            format.setEncoding(encoding);
465            format.setSuppressDeclaration(true);
466
467            XMLWriter writer = new XMLWriter(out, format);
468            writer.setEscapeText(false);
469
470            writer.write(node);
471            writer.close();
472        } catch (Exception e) {
473            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
474        }
475        return new String(out.toByteArray());
476    }
477
478    /**
479     * Removes all Xpath indices from the given path.<p>
480     *
481     * Example:<br>
482     * <code>title</code> is left untouched<br>
483     * <code>title[1]</code> becomes <code>title</code><br>
484     * <code>title/subtitle</code> is left untouched<br>
485     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
486     *
487     * @param path the path to remove the Xpath index from
488     *
489     * @return the path with all Xpath indices removed
490     */
491    public static String removeAllXpathIndices(String path) {
492
493        return path.replaceAll("\\[[0-9]+\\]", "");
494    }
495
496    /**
497     * Removes the first Xpath element from the path.<p>
498     *
499     * If the provided path does not contain a "/" character,
500     * it is returned unchanged.<p>
501     *
502     * <p>Examples:<br>
503     * <code>title</code> is left untouched<br>
504     * <code>title[1]</code> is left untouched<br>
505     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
506     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
507     *
508     * @param path the Xpath to remove the first element from
509     *
510     * @return the path with the first element removed
511     */
512    public static String removeFirstXpathElement(String path) {
513
514        int pos = path.indexOf('/');
515        if (pos < 0) {
516            return path;
517        }
518
519        return path.substring(pos + 1);
520    }
521
522    /**
523     * Removes the last complex Xpath element from the path.<p>
524     *
525     * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths.
526     *
527     * <p>Example:<br>
528     * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p>
529     *
530     * @param path the Xpath to remove the last element from
531     *
532     * @return the path with the last element removed
533     */
534    public static String removeLastComplexXpathElement(String path) {
535
536        int pos = path.lastIndexOf('/');
537        if (pos < 0) {
538            return path;
539        }
540        // count ' chars
541        int p = pos;
542        int count = -1;
543        while (p > 0) {
544            count++;
545            p = path.indexOf("\'", p + 1);
546        }
547        String parentPath = path.substring(0, pos);
548        if ((count % 2) == 0) {
549            // if substring is complete
550            return parentPath;
551        }
552        // if not complete
553        p = parentPath.lastIndexOf("'");
554        if (p >= 0) {
555            // complete it if possible
556            return removeLastComplexXpathElement(parentPath.substring(0, p));
557        }
558        return parentPath;
559    }
560
561    /**
562     * Removes the last Xpath element from the path.<p>
563     *
564     * If the provided path does not contain a "/" character,
565     * it is returned unchanged.<p>
566     *
567     * <p>Examples:<br>
568     * <code>title</code> is left untouched<br>
569     * <code>title[1]</code> is left untouched<br>
570     * <code>title/subtitle</code> becomes <code>title</code><br>
571     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p>
572     *
573     * @param path the Xpath to remove the last element from
574     *
575     * @return the path with the last element removed
576     */
577    public static String removeLastXpathElement(String path) {
578
579        int pos = path.lastIndexOf('/');
580        if (pos < 0) {
581            return path;
582        }
583
584        return path.substring(0, pos);
585    }
586
587    /**
588     * Removes all Xpath index information from the given input path.<p>
589     *
590     * Examples:<br>
591     * <code>title</code> is left untouched<br>
592     * <code>title[1]</code> becomes <code>title</code><br>
593     * <code>title/subtitle</code> is left untouched<br>
594     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
595     *
596     * @param path the path to remove the Xpath index information from
597     *
598     * @return the simplified Xpath for the given name
599     */
600    public static String removeXpath(String path) {
601
602        if (path.indexOf('/') > -1) {
603            // this is a complex path over more then 1 node
604            StringBuffer result = new StringBuffer(path.length() + 32);
605
606            // split the path into sub-elements
607            List<String> elements = CmsStringUtil.splitAsList(path, '/');
608            int end = elements.size() - 1;
609            for (int i = 0; i <= end; i++) {
610                // remove [i] from path element if required
611                result.append(removeXpathIndex(elements.get(i)));
612                if (i < end) {
613                    // append path delimiter if not final path element
614                    result.append('/');
615                }
616            }
617            return result.toString();
618        }
619
620        // this path has only 1 node, remove last index if required
621        return removeXpathIndex(path);
622    }
623
624    /**
625     * Removes the last Xpath index from the given path.<p>
626     *
627     * Examples:<br>
628     * <code>title</code> is left untouched<br>
629     * <code>title[1]</code> becomes <code>title</code><br>
630     * <code>title/subtitle</code> is left untouched<br>
631     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p>
632     *
633     * @param path the path to remove the Xpath index from
634     *
635     * @return the path with the last Xpath index removed
636     */
637    public static String removeXpathIndex(String path) {
638
639        int pos1 = path.lastIndexOf('/');
640        int pos2 = path.lastIndexOf('[');
641        if ((pos2 < 0) || (pos1 > pos2)) {
642            return path;
643        }
644
645        return path.substring(0, pos2);
646    }
647
648    /**
649     * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p>
650     *
651     * Examples:<br>
652     * <code>title/</code> becomes <code>title</code><br>
653     * <code>/title[1]/</code> becomes <code>title[1]</code><br>
654     * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br>
655     * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p>
656     *
657     * @param path the path to process
658     * @return the input with a leading and a trailing slash removed
659     */
660    public static String simplifyXpath(String path) {
661
662        StringBuffer result = new StringBuffer(path);
663        if (result.charAt(0) == '/') {
664            result.deleteCharAt(0);
665        }
666        int pos = result.length() - 1;
667        if (result.charAt(pos) == '/') {
668            result.deleteCharAt(pos);
669        }
670        return result.toString();
671    }
672
673    /**
674     * Splits a content value path into its components, ignoring leading or trailing slashes.<p>
675     *
676     * Note: this does not work for XPaths in general, only for the paths used to identify values in OpenCms contents.<p>
677     *
678     * @param xpath the xpath
679     *
680     * @return the path components
681     */
682    public static List<String> splitXpath(String xpath) {
683
684        return Arrays.stream(xpath.split("/")).filter(s -> !s.isEmpty()).collect(Collectors.toList());
685
686    }
687
688    /**
689     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
690     *
691     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
692     *
693     * @param xmlData the XML data in a byte array
694     * @param resolver the XML entity resolver to use
695     *
696     * @return the base object initialized with the unmarshalled XML document
697     *
698     * @throws CmsXmlException if something goes wrong
699     *
700     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
701     */
702    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
703
704        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver);
705    }
706
707    /**
708     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
709     *
710     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
711     *
712     * @param xmlData the XML data in a byte array
713     * @param resolver the XML entity resolver to use
714     * @param validate if the reader should try to validate the xml code
715     *
716     * @return the base object initialized with the unmarshalled XML document
717     *
718     * @throws CmsXmlException if something goes wrong
719     *
720     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
721     */
722    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate)
723    throws CmsXmlException {
724
725        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate);
726    }
727
728    /**
729     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
730     *
731     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
732     *
733     * Important: The encoding provided will NOT be used during unmarshalling,
734     * the XML parser will do this on the base of the information in the source String.
735     * The encoding is used for initializing the created instance of the document,
736     * which means it will be used when marshalling the document again later.<p>
737     *
738     * @param source the XML input source to use
739     * @param resolver the XML entity resolver to use
740     *
741     * @return the unmarshalled XML document
742     *
743     * @throws CmsXmlException if something goes wrong
744     */
745    public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException {
746
747        return unmarshalHelper(source, resolver, false);
748    }
749
750    /**
751     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
752     *
753     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
754     *
755     * Important: The encoding provided will NOT be used during unmarshalling,
756     * the XML parser will do this on the base of the information in the source String.
757     * The encoding is used for initializing the created instance of the document,
758     * which means it will be used when marshalling the document again later.<p>
759     *
760     * @param source the XML input source to use
761     * @param resolver the XML entity resolver to use
762     * @param validate if the reader should try to validate the xml code
763     *
764     * @return the unmarshalled XML document
765     *
766     * @throws CmsXmlException if something goes wrong
767     */
768    public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate)
769    throws CmsXmlException {
770
771        if (null == source) {
772            throw new CmsXmlException(Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "source==null!"));
773        }
774
775        try {
776            SAXReader reader = new SAXReader();
777            if (resolver != null) {
778                reader.setEntityResolver(resolver);
779            }
780            reader.setMergeAdjacentText(true);
781            reader.setStripWhitespaceText(true);
782            if (!validate) {
783                reader.setValidation(false);
784                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
785            } else {
786                reader.setValidation(true);
787            }
788            return reader.read(source);
789        } catch (DocumentException e) {
790            String systemId = source != null ? source.getSystemId() : "???";
791            throw new CmsXmlException(
792                Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"),
793                e);
794        } catch (SAXException e) {
795            String systemId = source != null ? source.getSystemId() : "???";
796            throw new CmsXmlException(
797                Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"),
798                e);
799        }
800    }
801
802    /**
803     * Helper to unmarshal (read) xml contents from a String into a document.<p>
804     *
805     * Using this method ensures that the OpenCms XML entitiy resolver is used.<p>
806     *
807     * @param xmlData the xml data in a String
808     * @param resolver the XML entity resolver to use
809     * @return the base object initialized with the unmarshalled XML document
810     * @throws CmsXmlException if something goes wrong
811     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
812     */
813    public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException {
814
815        return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver);
816    }
817
818    /**
819     * Validates the structure of a XML document contained in a byte array
820     * with the DTD or XML schema used by the document.<p>
821     *
822     * @param xmlData a byte array containing a XML document that should be validated
823     * @param resolver the XML entity resolver to use
824     *
825     * @throws CmsXmlException if the validation fails
826     */
827    public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
828
829        validateXmlStructure(new ByteArrayInputStream(xmlData), resolver);
830    }
831
832    /**
833     * Validates the structure of a XML document with the DTD or XML schema used
834     * by the document.<p>
835     *
836     * @param document a XML document that should be validated
837     * @param encoding the encoding to use when marshalling the XML document (required)
838     * @param resolver the XML entity resolver to use
839     *
840     * @throws CmsXmlException if the validation fails
841     */
842    public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver)
843    throws CmsXmlException {
844
845        // generate bytes from document
846        byte[] xmlData = ((ByteArrayOutputStream)marshal(
847            document,
848            new ByteArrayOutputStream(512),
849            encoding)).toByteArray();
850        validateXmlStructure(xmlData, resolver);
851    }
852
853    /**
854     * Validates the structure of a XML document contained in a byte array
855     * with the DTD or XML schema used by the document.<p>
856     *
857     * @param xmlStream a source providing a XML document that should be validated
858     * @param resolver the XML entity resolver to use
859     *
860     * @throws CmsXmlException if the validation fails
861     */
862    public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException {
863
864        XMLReader reader;
865        try {
866            reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
867        } catch (SAXException e) {
868            // xerces parser not available - no schema validation possible
869            if (LOG.isWarnEnabled()) {
870                LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e);
871            }
872            // no validation of the content is possible
873            return;
874        }
875        // turn on validation
876        try {
877            reader.setFeature("http://xml.org/sax/features/validation", true);
878            // turn on schema validation
879            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
880            // configure namespace support
881            reader.setFeature("http://xml.org/sax/features/namespaces", true);
882            reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
883        } catch (SAXNotRecognizedException e) {
884            // should not happen as Xerces 2 support this feature
885            if (LOG.isWarnEnabled()) {
886                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e);
887            }
888            // no validation of the content is possible
889            return;
890        } catch (SAXNotSupportedException e) {
891            // should not happen as Xerces 2 support this feature
892            if (LOG.isWarnEnabled()) {
893                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e);
894            }
895            // no validation of the content is possible
896            return;
897        }
898
899        // add an error handler which turns any errors into XML
900        CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler();
901        reader.setErrorHandler(errorHandler);
902
903        if (resolver != null) {
904            // set the resolver for the "opencms://" URIs
905            reader.setEntityResolver(resolver);
906        }
907
908        try {
909            reader.parse(new InputSource(xmlStream));
910        } catch (IOException e) {
911            // should not happen since we read form a byte array
912            if (LOG.isErrorEnabled()) {
913                LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e);
914            }
915            return;
916        } catch (SAXException e) {
917            // should not happen since all errors are handled in the XML error handler
918            if (LOG.isErrorEnabled()) {
919                LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e);
920            }
921            return;
922        }
923
924        if (errorHandler.getErrors().elements().size() > 0) {
925            // there was at last one validation error, so throw an exception
926            StringWriter out = new StringWriter(256);
927            OutputFormat format = OutputFormat.createPrettyPrint();
928            XMLWriter writer = new XMLWriter(out, format);
929            try {
930                writer.write(errorHandler.getErrors());
931                writer.write(errorHandler.getWarnings());
932                writer.close();
933            } catch (IOException e) {
934                // should not happen since we write to a StringWriter
935                if (LOG.isErrorEnabled()) {
936                    LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e);
937                }
938            }
939            // generate String from XML for display of document in error message
940            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString()));
941        }
942    }
943}