001/*
002 * This library is part of OpenCms -
003 * the Open Source Content Management System
004 *
005 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
006 *
007 * This library is free software; you can redistribute it and/or
008 * modify it under the terms of the GNU Lesser General Public
009 * License as published by the Free Software Foundation; either
010 * version 2.1 of the License, or (at your option) any later version.
011 *
012 * This library is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015 * Lesser General Public License for more details.
016 *
017 * For further information about Alkacon Software GmbH & Co. KG, please see the
018 * company website: http://www.alkacon.com
019 *
020 * For further information about OpenCms, please see the
021 * project website: http://www.opencms.org
022 *
023 * You should have received a copy of the GNU Lesser General Public
024 * License along with this library; if not, write to the Free Software
025 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
026 */
027
028package org.opencms.xml;
029
030import org.opencms.file.CmsResource;
031import org.opencms.main.CmsLog;
032import org.opencms.util.CmsStringUtil;
033
034import java.io.ByteArrayInputStream;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.io.OutputStream;
039import java.io.StringReader;
040import java.io.StringWriter;
041import java.io.UnsupportedEncodingException;
042import java.util.Arrays;
043import java.util.List;
044import java.util.stream.Collectors;
045
046import javax.xml.parsers.SAXParserFactory;
047
048import org.apache.commons.logging.Log;
049import org.apache.xerces.parsers.SAXParser;
050
051import org.dom4j.Document;
052import org.dom4j.DocumentException;
053import org.dom4j.Node;
054import org.dom4j.io.OutputFormat;
055import org.dom4j.io.SAXReader;
056import org.dom4j.io.XMLWriter;
057import org.xml.sax.EntityResolver;
058import org.xml.sax.InputSource;
059import org.xml.sax.SAXException;
060import org.xml.sax.SAXNotRecognizedException;
061import org.xml.sax.SAXNotSupportedException;
062import org.xml.sax.XMLReader;
063import org.xml.sax.helpers.XMLReaderFactory;
064
065/**
066 * Provides some basic XML handling utilities.<p>
067 *
068 * @since 6.0.0
069 */
070public final class CmsXmlUtils {
071
072    /**
073     * This class is only used to expose the XML parser configuration implementation name.<p>
074     */
075    private static class ParserImpl extends SAXParser {
076
077        /**
078         * Constructor.<p>
079         */
080        ParserImpl() {
081
082            super();
083        }
084
085        /**
086         * Returns the implementation name of the used XML parser configuration.<p>
087         *
088         * @return the implementation name
089         */
090        String getConfigImplName() {
091
092            if (fConfiguration != null) {
093                return fConfiguration.getClass().getName();
094            } else {
095                return null;
096            }
097        }
098    }
099
100    /** The log object for this class. */
101    private static final Log LOG = CmsLog.getLog(CmsXmlUtils.class);
102
103    /** Key of the SAX parser configuration system property. */
104    private static final String SAX_PARSER_CONFIG_KEY = "org.apache.xerces.xni.parser.XMLParserConfiguration";
105
106    /** Key of the SAX parser factory system property. */
107    private static final String SAX_PARSER_FACTORY_KEY = "javax.xml.parsers.SAXParserFactory";
108
109    /** Key of the XML reader system property. */
110    private static final String XML_READER_KEY = "org.xml.sax.driver";
111
112    /**
113     * Prevents instances of this class from being generated.<p>
114     */
115    private CmsXmlUtils() {
116
117        // noop
118    }
119
120    /**
121     * Concatenates two Xpath expressions, ensuring that exactly one slash "/" is between them.<p>
122     *
123     * Use this method if it's uncertain if the given arguments are starting or ending with
124     * a slash "/".<p>
125     *
126     * Examples:<br>
127     * <code>"title", "subtitle"</code> becomes <code>title/subtitle</code><br>
128     * <code>"title[1]/", "subtitle"</code> becomes <code>title[1]/subtitle</code><br>
129     * <code>"title[1]/", "/subtitle[1]"</code> becomes <code>title[1]/subtitle[1]</code><p>
130     *
131     * @param prefix the prefix Xpath
132     * @param suffix the suffix Xpath
133     *
134     * @return the concatenated Xpath build from prefix and suffix
135     */
136    public static String concatXpath(String prefix, String suffix) {
137
138        if (suffix == null) {
139            // ensure suffix is not null
140            suffix = "";
141        } else {
142            if ((suffix.length() > 0) && (suffix.charAt(0) == '/')) {
143                // remove leading '/' form suffix
144                suffix = suffix.substring(1);
145            }
146        }
147        if (prefix != null) {
148            StringBuffer result = new StringBuffer(32);
149            result.append(prefix);
150            if (!CmsResource.isFolder(prefix) && (suffix.length() > 0)) {
151                result.append('/');
152            }
153            result.append(suffix);
154            return result.toString();
155        }
156        return suffix;
157    }
158
159    /**
160     * Translates a simple lookup path to the simplified Xpath format used for
161     * the internal bookmarks.<p>
162     *
163     * Examples:<br>
164     * <code>title</code> becomes <code>title[1]</code><br>
165     * <code>title[1]</code> is left untouched<br>
166     * <code>title/subtitle</code> becomes <code>title[1]/subtitle[1]</code><br>
167     * <code>title/subtitle[1]</code> becomes <code>title[1]/subtitle[1]</code><p>
168     *
169     * Note: If the name already has the format <code>title[1]</code> then provided index parameter
170     * is ignored.<p>
171     *
172     * @param path the path to get the simplified Xpath for
173     * @param index the index to append (if required)
174     *
175     * @return the simplified Xpath for the given name
176     */
177    public static String createXpath(String path, int index) {
178
179        if (path.indexOf('/') > -1) {
180            // this is a complex path over more then 1 node
181            StringBuffer result = new StringBuffer(path.length() + 32);
182
183            // split the path into sub elements
184            List<String> elements = CmsStringUtil.splitAsList(path, '/');
185            int end = elements.size() - 1;
186            for (int i = 0; i <= end; i++) {
187                // append [i] to path element if required
188                result.append(createXpathElementCheck(elements.get(i), (i == end) ? index : 1));
189                if (i < end) {
190                    // append path delimiter if not final path element
191                    result.append('/');
192                }
193            }
194            return result.toString();
195        }
196
197        // this path has only 1 node, append [index] if required
198        return createXpathElementCheck(path, index);
199    }
200
201    /**
202     * Appends the provided index parameter in square brackets to the given name,
203     * like <code>path[index]</code>.<p>
204     *
205     * This method is used if it's clear that some path does not have
206     * a square bracket already appended.<p>
207     *
208     * @param path the path append the index to
209     * @param index the index to append
210     *
211     * @return the simplified Xpath for the given name
212     */
213    public static String createXpathElement(String path, int index) {
214
215        StringBuffer result = new StringBuffer(path.length() + 5);
216        result.append(path);
217        result.append('[');
218        result.append(index);
219        result.append(']');
220        return result.toString();
221    }
222
223    /**
224     * Ensures that a provided simplified Xpath has the format <code>title[1]</code>.<p>
225     *
226     * This method is used if it's uncertain if some path does have
227     * a square bracket already appended or not.<p>
228     *
229     * Note: If the name already has the format <code>title[1]</code>, then provided index parameter
230     * is ignored.<p>
231     *
232     * @param path the path to get the simplified Xpath for
233     * @param index the index to append (if required)
234     *
235     * @return the simplified Xpath for the given name
236     */
237    public static String createXpathElementCheck(String path, int index) {
238
239        if (path.charAt(path.length() - 1) == ']') {
240            // path is already in the form "title[1]"
241            // ignore provided index and return the path "as is"
242            return path;
243        }
244
245        // append index in square brackets
246        return createXpathElement(path, index);
247    }
248
249    /**
250     * Returns the first Xpath element from the provided path,
251     * without the index value.<p>
252     *
253     * Examples:<br>
254     * <code>title</code> is left untouched<br>
255     * <code>title[1]</code> becomes <code>title</code><br>
256     * <code>title/subtitle</code> becomes <code>title</code><br>
257     * <code>title[1]/subtitle[1]</code> becomes <code>title</code><p>
258     *
259     * @param path the path to get the first Xpath element from
260     *
261     * @return the first Xpath element from the provided path
262     */
263    public static String getFirstXpathElement(String path) {
264
265        int pos = path.indexOf('/');
266        if (pos >= 0) {
267            path = path.substring(0, pos);
268        }
269
270        return CmsXmlUtils.removeXpathIndex(path);
271    }
272
273    /**
274     * Returns the last Xpath element from the provided path,
275     * without the index value.<p>
276     *
277     * Examples:<br>
278     * <code>title</code> is left untouched<br>
279     * <code>title[1]</code> becomes <code>title</code><br>
280     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
281     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle</code><p>
282     *
283     * @param path the path to get the last Xpath element from
284     *
285     * @return the last Xpath element from the provided path
286     */
287    public static String getLastXpathElement(String path) {
288
289        int pos = path.lastIndexOf('/');
290        if (pos >= 0) {
291            path = path.substring(pos + 1);
292        }
293
294        return CmsXmlUtils.removeXpathIndex(path);
295    }
296
297    /**
298     * Returns the last Xpath element from the provided path.
299     *
300     *
301     * Examples:<br>
302     * <code>title</code> is left untouched<br>
303     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
304     *
305     * @param path the path to get the last Xpath element from
306     *
307     * @return the last Xpath element from the provided path
308     */
309    public static String getLastXpathElementWithIndex(String path) {
310
311        int pos = path.lastIndexOf('/');
312        if (pos >= 0) {
313            path = path.substring(pos + 1);
314        }
315        return path;
316    }
317
318    /**
319     * Returns the last Xpath index from the given path.<p>
320     *
321     * Examples:<br>
322     * <code>title</code> returns the empty String<p>
323     * <code>title[1]</code> returns <code>[1]</code><p>
324     * <code>title/subtitle</code> returns them empty String<p>
325     * <code>title[1]/subtitle[1]</code> returns <code>[1]</code><p>
326     *
327     * @param path the path to extract the Xpath index from
328     *
329     * @return  the last Xpath index from the given path
330     */
331    public static String getXpathIndex(String path) {
332
333        int pos1 = path.lastIndexOf('/');
334        int pos2 = path.lastIndexOf('[');
335        if ((pos2 < 0) || (pos1 > pos2)) {
336            return "";
337        }
338
339        return path.substring(pos2);
340    }
341
342    /**
343     * Returns the last Xpath index from the given path as integer.<p>
344     *
345     * Examples:<br>
346     * <code>title</code> returns 1<p>
347     * <code>title[1]</code> returns 1<p>
348     * <code>title/subtitle</code> returns 1<p>
349     * <code>title[1]/subtitle[2]</code> returns 2<p>
350     *
351     * @param path the path to extract the Xpath index from
352     *
353     * @return the last Xpath index from the given path as integer
354     */
355    public static int getXpathIndexInt(String path) {
356
357        int pos1 = path.lastIndexOf('/');
358        int pos2 = path.lastIndexOf('[');
359        if ((pos2 < 0) || (pos1 > pos2)) {
360            return 1;
361        }
362
363        String idxStr = path.substring(pos2 + 1, path.lastIndexOf(']'));
364        try {
365            return Integer.parseInt(idxStr);
366        } catch (NumberFormatException e) {
367            // NOOP
368        }
369        return 1;
370    }
371
372    /**
373     * Initializes XML processing system properties to avoid evaluating the XML parser and reader implementation each time an XML document is read.<p>
374     * This is done for performance improvements only.<p>
375     */
376    public static void initSystemProperties() {
377
378        String implName;
379        // initialize system properties
380        if (System.getProperty(SAX_PARSER_FACTORY_KEY) == null) {
381            implName = SAXParserFactory.newInstance().getClass().getName();
382            LOG.info("Setting sax parser factory impl property to " + implName);
383            System.setProperty(SAX_PARSER_FACTORY_KEY, implName);
384        }
385        if (System.getProperty(XML_READER_KEY) == null) {
386            SAXReader reader = new SAXReader();
387            try {
388                implName = reader.getXMLReader().getClass().getName();
389                LOG.info("Setting xml reader impl property to " + implName);
390                System.setProperty(XML_READER_KEY, implName);
391            } catch (SAXException e) {
392                LOG.error("Error evaluating XMLReader impl.", e);
393            }
394        }
395        if (System.getProperty(SAX_PARSER_CONFIG_KEY) == null) {
396            ParserImpl saxParser = new ParserImpl();
397            implName = saxParser.getConfigImplName();
398            if (implName != null) {
399                LOG.info("Setting xml parser configuration impl property to " + implName);
400                System.setProperty(SAX_PARSER_CONFIG_KEY, implName);
401            }
402        }
403    }
404
405    /**
406     * Returns <code>true</code> if the given path is a Xpath with
407     * at least 2 elements.<p>
408     *
409     * Examples:<br>
410     * <code>title</code> returns <code>false</code><br>
411     * <code>title[1]</code> returns <code>false</code><br>
412     * <code>title/subtitle</code> returns <code>true</code><br>
413     * <code>title[1]/subtitle[1]</code> returns <code>true</code><p>
414     *
415     * @param path the path to check
416     * @return true if the given path is a Xpath with at least 2 elements
417     */
418    public static boolean isDeepXpath(String path) {
419
420        return path.indexOf('/') > 0;
421    }
422
423    /**
424     * Marshals (writes) an XML document into an output stream using XML pretty-print formatting.<p>
425     *
426     * @param document the XML document to marshal
427     * @param out the output stream to write to
428     * @param encoding the encoding to use
429     * @return the output stream with the xml content
430     * @throws CmsXmlException if something goes wrong
431     */
432    public static OutputStream marshal(Document document, OutputStream out, String encoding) throws CmsXmlException {
433
434        try {
435            OutputFormat format = OutputFormat.createPrettyPrint();
436            format.setEncoding(encoding);
437
438            XMLWriter writer = new XMLWriter(out, format);
439            writer.setEscapeText(false);
440
441            writer.write(document);
442            writer.close();
443
444        } catch (Exception e) {
445            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
446        }
447
448        return out;
449    }
450
451    /**
452     * Marshals (writes) an XML document to a String using XML pretty-print formatting.<p>
453     *
454     * @param document the XML document to marshal
455     * @param encoding the encoding to use
456     * @return the marshalled XML document
457     * @throws CmsXmlException if something goes wrong
458     */
459    public static String marshal(Document document, String encoding) throws CmsXmlException {
460
461        ByteArrayOutputStream out = new ByteArrayOutputStream();
462        marshal(document, out, encoding);
463        try {
464            return out.toString(encoding);
465        } catch (UnsupportedEncodingException e) {
466            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_TO_STRING_0), e);
467        }
468    }
469
470    /**
471     * Marshals (writes) an XML node into an output stream using XML pretty-print formatting.<p>
472     *
473     * @param node the XML node to marshal
474     * @param encoding the encoding to use
475     *
476     * @return the string with the xml content
477     *
478     * @throws CmsXmlException if something goes wrong
479     */
480    public static String marshal(Node node, String encoding) throws CmsXmlException {
481
482        ByteArrayOutputStream out = new ByteArrayOutputStream();
483        try {
484            OutputFormat format = OutputFormat.createPrettyPrint();
485            format.setEncoding(encoding);
486            format.setSuppressDeclaration(true);
487
488            XMLWriter writer = new XMLWriter(out, format);
489            writer.setEscapeText(false);
490
491            writer.write(node);
492            writer.close();
493        } catch (Exception e) {
494            throw new CmsXmlException(Messages.get().container(Messages.ERR_MARSHALLING_XML_DOC_0), e);
495        }
496        return new String(out.toByteArray());
497    }
498
499    /**
500     * Removes all Xpath indices from the given path.<p>
501     *
502     * Example:<br>
503     * <code>title</code> is left untouched<br>
504     * <code>title[1]</code> becomes <code>title</code><br>
505     * <code>title/subtitle</code> is left untouched<br>
506     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
507     *
508     * @param path the path to remove the Xpath index from
509     *
510     * @return the path with all Xpath indices removed
511     */
512    public static String removeAllXpathIndices(String path) {
513
514        return path.replaceAll("\\[[0-9]+\\]", "");
515    }
516
517    /**
518     * Removes the first Xpath element from the path.<p>
519     *
520     * If the provided path does not contain a "/" character,
521     * it is returned unchanged.<p>
522     *
523     * <p>Examples:<br>
524     * <code>title</code> is left untouched<br>
525     * <code>title[1]</code> is left untouched<br>
526     * <code>title/subtitle</code> becomes <code>subtitle</code><br>
527     * <code>title[1]/subtitle[1]</code> becomes <code>subtitle[1]</code><p>
528     *
529     * @param path the Xpath to remove the first element from
530     *
531     * @return the path with the first element removed
532     */
533    public static String removeFirstXpathElement(String path) {
534
535        int pos = path.indexOf('/');
536        if (pos < 0) {
537            return path;
538        }
539
540        return path.substring(pos + 1);
541    }
542
543    /**
544     * Removes the last complex Xpath element from the path.<p>
545     *
546     * The same as {@link #removeLastXpathElement(String)} both it works with more complex xpaths.
547     *
548     * <p>Example:<br>
549     * <code>system/backup[@date='23/10/2003']/resource[path='/a/b/c']</code> becomes <code>system/backup[@date='23/10/2003']</code><p>
550     *
551     * @param path the Xpath to remove the last element from
552     *
553     * @return the path with the last element removed
554     */
555    public static String removeLastComplexXpathElement(String path) {
556
557        int pos = path.lastIndexOf('/');
558        if (pos < 0) {
559            return path;
560        }
561        // count ' chars
562        int p = pos;
563        int count = -1;
564        while (p > 0) {
565            count++;
566            p = path.indexOf("\'", p + 1);
567        }
568        String parentPath = path.substring(0, pos);
569        if ((count % 2) == 0) {
570            // if substring is complete
571            return parentPath;
572        }
573        // if not complete
574        p = parentPath.lastIndexOf("'");
575        if (p >= 0) {
576            // complete it if possible
577            return removeLastComplexXpathElement(parentPath.substring(0, p));
578        }
579        return parentPath;
580    }
581
582    /**
583     * Removes the last Xpath element from the path.<p>
584     *
585     * If the provided path does not contain a "/" character,
586     * it is returned unchanged.<p>
587     *
588     * <p>Examples:<br>
589     * <code>title</code> is left untouched<br>
590     * <code>title[1]</code> is left untouched<br>
591     * <code>title/subtitle</code> becomes <code>title</code><br>
592     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]</code><p>
593     *
594     * @param path the Xpath to remove the last element from
595     *
596     * @return the path with the last element removed
597     */
598    public static String removeLastXpathElement(String path) {
599
600        int pos = path.lastIndexOf('/');
601        if (pos < 0) {
602            return path;
603        }
604
605        return path.substring(0, pos);
606    }
607
608    /**
609     * Removes all Xpath index information from the given input path.<p>
610     *
611     * Examples:<br>
612     * <code>title</code> is left untouched<br>
613     * <code>title[1]</code> becomes <code>title</code><br>
614     * <code>title/subtitle</code> is left untouched<br>
615     * <code>title[1]/subtitle[1]</code> becomes <code>title/subtitle</code><p>
616     *
617     * @param path the path to remove the Xpath index information from
618     *
619     * @return the simplified Xpath for the given name
620     */
621    public static String removeXpath(String path) {
622
623        if (path.indexOf('/') > -1) {
624            // this is a complex path over more then 1 node
625            StringBuffer result = new StringBuffer(path.length() + 32);
626
627            // split the path into sub-elements
628            List<String> elements = CmsStringUtil.splitAsList(path, '/');
629            int end = elements.size() - 1;
630            for (int i = 0; i <= end; i++) {
631                // remove [i] from path element if required
632                result.append(removeXpathIndex(elements.get(i)));
633                if (i < end) {
634                    // append path delimiter if not final path element
635                    result.append('/');
636                }
637            }
638            return result.toString();
639        }
640
641        // this path has only 1 node, remove last index if required
642        return removeXpathIndex(path);
643    }
644
645    /**
646     * Removes the last Xpath index from the given path.<p>
647     *
648     * Examples:<br>
649     * <code>title</code> is left untouched<br>
650     * <code>title[1]</code> becomes <code>title</code><br>
651     * <code>title/subtitle</code> is left untouched<br>
652     * <code>title[1]/subtitle[1]</code> becomes <code>title[1]/subtitle</code><p>
653     *
654     * @param path the path to remove the Xpath index from
655     *
656     * @return the path with the last Xpath index removed
657     */
658    public static String removeXpathIndex(String path) {
659
660        int pos1 = path.lastIndexOf('/');
661        int pos2 = path.lastIndexOf('[');
662        if ((pos2 < 0) || (pos1 > pos2)) {
663            return path;
664        }
665
666        return path.substring(0, pos2);
667    }
668
669    /**
670     * Simplifies an Xpath by removing a leading and a trailing slash from the given path.<p>
671     *
672     * Examples:<br>
673     * <code>title/</code> becomes <code>title</code><br>
674     * <code>/title[1]/</code> becomes <code>title[1]</code><br>
675     * <code>/title/subtitle/</code> becomes <code>title/subtitle</code><br>
676     * <code>/title/subtitle[1]/</code> becomes <code>title/subtitle[1]</code><p>
677     *
678     * @param path the path to process
679     * @return the input with a leading and a trailing slash removed
680     */
681    public static String simplifyXpath(String path) {
682
683        StringBuffer result = new StringBuffer(path);
684        if (result.charAt(0) == '/') {
685            result.deleteCharAt(0);
686        }
687        int pos = result.length() - 1;
688        if (result.charAt(pos) == '/') {
689            result.deleteCharAt(pos);
690        }
691        return result.toString();
692    }
693
694    /**
695     * Splits a content value path into its components, ignoring leading or trailing slashes.<p>
696     *
697     * Note: this does not work for XPaths in general, only for the paths used to identify values in OpenCms contents.<p>
698     *
699     * @param xpath the xpath
700     *
701     * @return the path components
702     */
703    public static List<String> splitXpath(String xpath) {
704
705        return Arrays.stream(xpath.split("/")).filter(s -> !s.isEmpty()).collect(Collectors.toList());
706
707    }
708
709    /**
710     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
711     *
712     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
713     *
714     * @param xmlData the XML data in a byte array
715     * @param resolver the XML entity resolver to use
716     *
717     * @return the base object initialized with the unmarshalled XML document
718     *
719     * @throws CmsXmlException if something goes wrong
720     *
721     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
722     */
723    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
724
725        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver);
726    }
727
728    /**
729     * Helper to unmarshal (read) xml contents from a byte array into a document.<p>
730     *
731     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
732     *
733     * @param xmlData the XML data in a byte array
734     * @param resolver the XML entity resolver to use
735     * @param validate if the reader should try to validate the xml code
736     *
737     * @return the base object initialized with the unmarshalled XML document
738     *
739     * @throws CmsXmlException if something goes wrong
740     *
741     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
742     */
743    public static Document unmarshalHelper(byte[] xmlData, EntityResolver resolver, boolean validate)
744    throws CmsXmlException {
745
746        return CmsXmlUtils.unmarshalHelper(new InputSource(new ByteArrayInputStream(xmlData)), resolver, validate);
747    }
748
749    /**
750     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
751     *
752     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
753     *
754     * Important: The encoding provided will NOT be used during unmarshalling,
755     * the XML parser will do this on the base of the information in the source String.
756     * The encoding is used for initializing the created instance of the document,
757     * which means it will be used when marshalling the document again later.<p>
758     *
759     * @param source the XML input source to use
760     * @param resolver the XML entity resolver to use
761     *
762     * @return the unmarshalled XML document
763     *
764     * @throws CmsXmlException if something goes wrong
765     */
766    public static Document unmarshalHelper(InputSource source, EntityResolver resolver) throws CmsXmlException {
767
768        return unmarshalHelper(source, resolver, false);
769    }
770
771    /**
772     * Helper to unmarshal (read) xml contents from an input source into a document.<p>
773     *
774     * Using this method ensures that the OpenCms XML entity resolver is used.<p>
775     *
776     * Important: The encoding provided will NOT be used during unmarshalling,
777     * the XML parser will do this on the base of the information in the source String.
778     * The encoding is used for initializing the created instance of the document,
779     * which means it will be used when marshalling the document again later.<p>
780     *
781     * @param source the XML input source to use
782     * @param resolver the XML entity resolver to use
783     * @param validate if the reader should try to validate the xml code
784     *
785     * @return the unmarshalled XML document
786     *
787     * @throws CmsXmlException if something goes wrong
788     */
789    public static Document unmarshalHelper(InputSource source, EntityResolver resolver, boolean validate)
790    throws CmsXmlException {
791
792        if (null == source) {
793            throw new CmsXmlException(Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "source==null!"));
794        }
795
796        try {
797            SAXReader reader = new SAXReader();
798            if (resolver != null) {
799                reader.setEntityResolver(resolver);
800            }
801            reader.setMergeAdjacentText(true);
802            reader.setStripWhitespaceText(true);
803            if (!validate) {
804                reader.setValidation(false);
805                reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
806            } else {
807                reader.setValidation(true);
808            }
809            return reader.read(source);
810        } catch (DocumentException e) {
811            String systemId = source != null ? source.getSystemId() : "???";
812            throw new CmsXmlException(
813                Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"),
814                e);
815        } catch (SAXException e) {
816            String systemId = source != null ? source.getSystemId() : "???";
817            throw new CmsXmlException(
818                Messages.get().container(Messages.ERR_UNMARSHALLING_XML_DOC_1, "(systemId = " + systemId + ")"),
819                e);
820        }
821    }
822
823    /**
824     * Helper to unmarshal (read) xml contents from a String into a document.<p>
825     *
826     * Using this method ensures that the OpenCms XML entitiy resolver is used.<p>
827     *
828     * @param xmlData the xml data in a String
829     * @param resolver the XML entity resolver to use
830     * @return the base object initialized with the unmarshalled XML document
831     * @throws CmsXmlException if something goes wrong
832     * @see CmsXmlUtils#unmarshalHelper(InputSource, EntityResolver)
833     */
834    public static Document unmarshalHelper(String xmlData, EntityResolver resolver) throws CmsXmlException {
835
836        return CmsXmlUtils.unmarshalHelper(new InputSource(new StringReader(xmlData)), resolver);
837    }
838
839    /**
840     * Validates the structure of a XML document contained in a byte array
841     * with the DTD or XML schema used by the document.<p>
842     *
843     * @param xmlData a byte array containing a XML document that should be validated
844     * @param resolver the XML entity resolver to use
845     *
846     * @throws CmsXmlException if the validation fails
847     */
848    public static void validateXmlStructure(byte[] xmlData, EntityResolver resolver) throws CmsXmlException {
849
850        validateXmlStructure(new ByteArrayInputStream(xmlData), resolver);
851    }
852
853    /**
854     * Validates the structure of a XML document with the DTD or XML schema used
855     * by the document.<p>
856     *
857     * @param document a XML document that should be validated
858     * @param encoding the encoding to use when marshalling the XML document (required)
859     * @param resolver the XML entity resolver to use
860     *
861     * @throws CmsXmlException if the validation fails
862     */
863    public static void validateXmlStructure(Document document, String encoding, EntityResolver resolver)
864    throws CmsXmlException {
865
866        // generate bytes from document
867        byte[] xmlData = ((ByteArrayOutputStream)marshal(
868            document,
869            new ByteArrayOutputStream(512),
870            encoding)).toByteArray();
871        validateXmlStructure(xmlData, resolver);
872    }
873
874    /**
875     * Validates the structure of a XML document contained in a byte array
876     * with the DTD or XML schema used by the document.<p>
877     *
878     * @param xmlStream a source providing a XML document that should be validated
879     * @param resolver the XML entity resolver to use
880     *
881     * @throws CmsXmlException if the validation fails
882     */
883    public static void validateXmlStructure(InputStream xmlStream, EntityResolver resolver) throws CmsXmlException {
884
885        XMLReader reader;
886        try {
887            reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
888        } catch (SAXException e) {
889            // xerces parser not available - no schema validation possible
890            if (LOG.isWarnEnabled()) {
891                LOG.warn(Messages.get().getBundle().key(Messages.LOG_VALIDATION_INIT_XERXES_SAX_READER_FAILED_0), e);
892            }
893            // no validation of the content is possible
894            return;
895        }
896        // turn on validation
897        try {
898            reader.setFeature("http://xml.org/sax/features/validation", true);
899            // turn on schema validation
900            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
901            // configure namespace support
902            reader.setFeature("http://xml.org/sax/features/namespaces", true);
903            reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false);
904        } catch (SAXNotRecognizedException e) {
905            // should not happen as Xerces 2 support this feature
906            if (LOG.isWarnEnabled()) {
907                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_RECOGNIZED_0), e);
908            }
909            // no validation of the content is possible
910            return;
911        } catch (SAXNotSupportedException e) {
912            // should not happen as Xerces 2 support this feature
913            if (LOG.isWarnEnabled()) {
914                LOG.warn(Messages.get().getBundle().key(Messages.LOG_SAX_READER_FEATURE_NOT_SUPPORTED_0), e);
915            }
916            // no validation of the content is possible
917            return;
918        }
919
920        // add an error handler which turns any errors into XML
921        CmsXmlValidationErrorHandler errorHandler = new CmsXmlValidationErrorHandler();
922        reader.setErrorHandler(errorHandler);
923
924        if (resolver != null) {
925            // set the resolver for the "opencms://" URIs
926            reader.setEntityResolver(resolver);
927        }
928
929        try {
930            reader.parse(new InputSource(xmlStream));
931        } catch (IOException e) {
932            // should not happen since we read form a byte array
933            if (LOG.isErrorEnabled()) {
934                LOG.error(Messages.get().getBundle().key(Messages.LOG_READ_XML_FROM_BYTE_ARR_FAILED_0), e);
935            }
936            return;
937        } catch (SAXException e) {
938            // should not happen since all errors are handled in the XML error handler
939            if (LOG.isErrorEnabled()) {
940                LOG.error(Messages.get().getBundle().key(Messages.LOG_PARSE_SAX_EXC_0), e);
941            }
942            return;
943        }
944
945        if (errorHandler.getErrors().elements().size() > 0) {
946            // there was at last one validation error, so throw an exception
947            StringWriter out = new StringWriter(256);
948            OutputFormat format = OutputFormat.createPrettyPrint();
949            XMLWriter writer = new XMLWriter(out, format);
950            try {
951                writer.write(errorHandler.getErrors());
952                writer.write(errorHandler.getWarnings());
953                writer.close();
954            } catch (IOException e) {
955                // should not happen since we write to a StringWriter
956                if (LOG.isErrorEnabled()) {
957                    LOG.error(Messages.get().getBundle().key(Messages.LOG_STRINGWRITER_IO_EXC_0), e);
958                }
959            }
960            // generate String from XML for display of document in error message
961            throw new CmsXmlException(Messages.get().container(Messages.ERR_XML_VALIDATION_1, out.toString()));
962        }
963    }
964}