001package com.nimbusds.langtag;
002
003
004import java.util.LinkedList;
005import java.util.List;
006
007
008/**
009 * Language tag according to <a href="http://tools.ietf.org/html/rfc5646">RFC 5646</a>.
010 *
011 * <p>Supports normal language tags. Special private language tags beginning 
012 * with "x" and grandfathered tags beginning with "i" are not supported.
013 *
014 * <p>To construct a new language tag from scratch:
015 *
016 * <pre>
017 * // English as used in the United States
018 * LangTag tag = new LangTag("en");
019 * tag.setRegion("US");
020 *
021 * // Returns "en-US"
022 * tag.toString();
023 * </pre>
024 *
025 * <p>To parse a language tag:
026 *
027 * <pre>
028 * // Chinese, Mandarin, Simplified script, as used in China
029 * LangTag tag = LangTag.parse("zh-cmn-Hans-CN");
030 *
031 * // Returns "zh"
032 * tag.getPrimaryLanguage();
033 *
034 * // Returns "cmn"
035 * tag.getExtendedLanguageSubtags()[0];
036 *
037 * // Returns "zh-cmn"
038 * tag.getLanguage();
039 *
040 * // Returns "Hans"
041 * tag.getScript();
042 *
043 * // Returns "CN"
044 * tag.getRegion();
045 * </pre>
046 *
047 * <p>See <a href="http://tools.ietf.org/html/rfc5646">RFC 5646</a>.
048 */
049public class LangTag implements ReadOnlyLangTag {
050
051
052        /**
053         * The primary language, as the shortest ISO 639 code (2*3ALPHA). Must
054         * always be defined, unless sufficient language subtags exist.
055         */
056        private String primaryLanguage;
057        
058        
059        /**
060         * Optional extended language subtags, as three-letter ISO-639-3 codes.
061         */
062        private String[] languageSubtags;
063        
064        
065        /**
066         * Optional script, (4ALPHA) ISO 15924 code.
067         */
068        private String script = null;
069
070
071        /**
072         * Optional region, (2ALPHA) ISO 3166-1 code or (3DIGIT) UN M.49 code.
073         */
074        private String region = null;
075        
076        
077        /**
078         * Optional variants, (5*8alphanum) or (DIGIT 3alphanum).
079         */
080        private String[] variants = null;
081        
082        
083        /**
084         * Optional extensions.
085         */
086        private String[] extensions = null;
087        
088        
089        /**
090         * Optional private use subtag.
091         */
092        private String privateUse = null;
093        
094        
095        /**
096         * Ensures the specified subtag has a valid maximum length of eight
097         * characters.
098         *
099         * @param subtag The sub tag to check. Must not be {@code null}.
100         *
101         * @throws LangTagException If the subtag has length greater than eight
102         *                          characters.
103         */
104        private static void ensureMaxLength(final String subtag)
105                throws LangTagException {
106                
107                if (subtag.length() > 8)
108                
109                        // extension or private use subtag?
110                        if (subtag.charAt(1) != '-' && subtag.length() > 10)
111                        
112                                throw new LangTagException("Invalid subtag syntax: Max character length exceeded");
113        }
114        
115        
116        /**
117         * Creates a new simple language tag.
118         *
119         * <p>Use for simple language tags such as "en" (English), "fr" 
120         * (French) or "pt" (Portuguese).
121         *
122         * @param primaryLanguage The primary language, as the shortest two or 
123         *                        three-letter ISO 639 code. Must not be 
124         *                        {@code null}.
125         *
126         * @throws LangTagException If the primary language syntax is invalid.
127         */
128        public LangTag(final String primaryLanguage)
129                throws LangTagException {
130        
131                this(primaryLanguage, new String[]{});
132        }
133        
134        
135        /**
136         * Creates a new extended language tag.
137         *
138         * <p>Use for extended language tags such as "zh-cmn" (Mandarin 
139         * Chinese) or "zh-yue" (Cantonese Chinese).
140         *
141         * @param primaryLanguage The primary language, as the shortest two or 
142         *                        three-letter ISO 639 code. May be {@code null}
143         *                        if the subtags are sufficient to identify the
144         *                        language.
145         * @param languageSubtags One or more extended language subtags, as
146         *                        three-letter ISO 639-3 codes. {@code null} if
147         *                        none.
148         *
149         * @throws LangTagException If the primary or extended language syntax 
150         *                          is invalid.
151         */
152        public LangTag(final String primaryLanguage, final String... languageSubtags)
153                throws LangTagException {
154                
155                if (primaryLanguage == null && 
156                    (languageSubtags == null || languageSubtags.length == 0))
157                        throw new LangTagException("Either the primary language or the extended language subtags, or both must be defined");
158                
159                setPrimaryLanguage(primaryLanguage);
160                setExtendedLanguageSubtags(languageSubtags);
161        }
162        
163        
164        @Override
165        public String getLanguage() {
166        
167                StringBuilder sb = new StringBuilder();
168                
169                if (primaryLanguage != null)
170                        sb.append(primaryLanguage);
171                        
172                if (languageSubtags != null && languageSubtags.length > 0) {
173                
174                        for (String tag: languageSubtags) {
175                        
176                                if (sb.length() > 0)
177                                        sb.append('-');
178                                
179                                sb.append(tag);
180                        }
181                }
182                        
183                return sb.toString();
184        }
185        
186        
187        @Override
188        public String getPrimaryLanguage() {
189        
190                return primaryLanguage;
191        }
192        
193        
194        /**
195         * Checks if the specified string has a valid primary language subtag 
196         * syntax.
197         *
198         * @param s The string to check. Must not be {@code null}.
199         *
200         * @return {@code true} if the syntax is correct, else {@code false}.
201         */
202        private static boolean isPrimaryLanguage(final String s) {
203        
204                return s.matches("[a-zA-Z]{2,3}");
205        }
206        
207        
208        /**
209         * Sets the primary language subtag.
210         *
211         * <p>See RFC 5646 section 2.2.1.
212         *
213         * @param primaryLanguage The primary language, as the shortest two or
214         *                        three-letter ISO 639 code. May be 
215         *                        {@code null}.
216         *
217         * @throws LangTagException If the primary language syntax is invalid.
218         */
219        private void setPrimaryLanguage(final String primaryLanguage)
220                throws LangTagException {
221                
222                if (primaryLanguage == null) {
223                        this.primaryLanguage = null;
224                        return;
225                }
226                
227                ensureMaxLength(primaryLanguage);
228                
229                if (! isPrimaryLanguage(primaryLanguage))
230                        throw new LangTagException("Invalid primary language subtag: Must be a two or three-letter ISO 639 code");
231                
232                this.primaryLanguage = primaryLanguage.toLowerCase();
233        }
234        
235        
236        @Override
237        public String[] getExtendedLanguageSubtags() {
238        
239                return languageSubtags;
240        }
241        
242        
243        /**
244         * Checks if the specified string has a valid extended language subtag 
245         * syntax.
246         *
247         * @param s The string to check. Must not be {@code null}.
248         *
249         * @return {@code true} if the syntax is correct, else {@code false}.
250         */
251        private static boolean isExtendedLanguageSubtag(final String s) {
252        
253                return s.matches("[a-zA-Z]{3}");
254        }
255        
256        
257        /**
258         * Sets the extended language subtags.
259         *
260         * <p>See RFC 5646 section 2.2.2.
261         *
262         * @param languageSubtags The extended language subtags, as three-letter
263         *                        ISO 639-3 codes. {@code null} if none.
264         */
265        private void setExtendedLanguageSubtags(final String... languageSubtags)
266                throws LangTagException {
267                
268                if (languageSubtags == null || languageSubtags.length == 0) {
269                        this.languageSubtags = null;
270                        return;
271                }
272
273                this.languageSubtags = new String[languageSubtags.length];
274                
275                for (int i=0; i < languageSubtags.length; i++) {
276                
277                        ensureMaxLength(languageSubtags[i]);
278
279                        if (! isExtendedLanguageSubtag(languageSubtags[i]))
280                                throw new LangTagException("Invalid extended language subtag: Must be a three-letter ISO 639-3 code");
281
282                        this.languageSubtags[i] = languageSubtags[i].toLowerCase();
283                }
284        }
285        
286        
287        @Override
288        public String getScript() {
289        
290                return script;
291        }
292        
293        
294        /**
295         * Checks if the specified string has a valid script subtag syntax.
296         *
297         * @param s The string to check. Must not be {@code null}.
298         *
299         * @return {@code true} if the syntax is correct, else {@code false}.
300         */
301        private static boolean isScript(final String s) {
302                
303                return s.matches("[a-zA-Z]{4}");
304        }
305        
306        
307        /**
308         * Sets the script.
309         *
310         * <p>See RFC 5646 section 2.2.3.
311         *
312         * @param script The script, as a four-letter ISO 15924 code. 
313         *               {@code null} if not defined.
314         *
315         * @throws LangTagException If the script syntax is invalid.
316         */
317        public void setScript(final String script)
318                throws LangTagException {
319        
320                if (script == null) {
321                        this.script = null;
322                        return;
323                }
324                
325                ensureMaxLength(script);
326                
327                if (! isScript(script))
328                        throw new LangTagException("Invalid script subtag: Must be a four-letter ISO 15924 code");
329                
330                this.script = script.substring(0, 1).toUpperCase() + 
331                              script.substring(1).toLowerCase();
332        }
333        
334        
335        @Override
336        public String getRegion() {
337        
338                return region;
339        }
340        
341        
342        /**
343         * Checks if the specified string has a valid region subtag syntax.
344         *
345         * @param s The string to check. Must not be {@code null}.
346         *
347         * @return {@code true} if the syntax is correct, else {@code false}.
348         */
349        private static boolean isRegion(final String s) {
350        
351                return s.matches("[a-zA-Z]{2}|\\d{3}");
352        }
353        
354        
355        /**
356         * Sets the region.
357         *
358         * <p>See RFC 5646 section 2.2.4.
359         *
360         * @param region The region, as a two-letter ISO 3166-1 code or a three-
361         *               digit UN M.49 code. {@code null} if not defined.
362         *
363         * @throws LangTagException If the region syntax is invalid.
364         */
365        public void setRegion(final String region)
366                throws LangTagException {
367                
368                if (region == null) {
369                        this.region = null;
370                        return;
371                }
372                
373                ensureMaxLength(region);
374                
375                if (! isRegion(region))
376                        throw new LangTagException("Invalid region subtag: Must be a two-letter ISO 3166-1 code or a three-digit UN M.49 code");
377        
378                this.region = region.toUpperCase();
379        }
380        
381        
382        @Override
383        public String[] getVariants() {
384        
385                return variants;
386        }
387        
388        
389        /**
390         * Checks if the specified string has a valid variant subtag syntax.
391         *
392         * @param s The string to check. Must not be {@code null}.
393         *
394         * @return {@code true} if the syntax is correct, else {@code false}.
395         */
396        private static boolean isVariant(final String s) {
397        
398                return s.matches("[a-zA-Z][a-zA-Z0-9]{4,}|[0-9][a-zA-Z0-9]{3,}");
399        }
400        
401        
402        /**
403         * Sets the variants.
404         *
405         * <p>See RFC 5646 section 2.2.5.
406         *
407         * @param variants The variants. {@code null} if not defined.
408         *
409         * @throws LangTagException If the variant syntax is invalid.
410         */
411        public void setVariants(final String... variants)
412                throws LangTagException {
413                
414                if (variants == null || variants.length == 0) {
415                        this.variants = null;
416                        return;
417                }
418        
419                this.variants = new String[variants.length];
420                
421                for (int i=0; i < variants.length; i++) {
422                
423                        ensureMaxLength(variants[i]);
424
425                        if (! isVariant(variants[i]))
426                                throw new LangTagException("Invalid variant subtag");
427
428                        this.variants[i] = variants[i].toLowerCase();
429                }
430        }
431        
432        
433        @Override
434        public String[] getExtensions() {
435        
436                return extensions;
437        }
438        
439        
440        /**
441         * Checks if the specified string has a valid extension singleton 
442         * syntax.
443         *
444         * @param s The string to check. Must not be {@code null}.
445         *
446         * @return {@code true} if the syntax is correct, else {@code false}.
447         */
448        private static boolean isExtensionSingleton(final String s) {
449        
450                return s.matches("[0-9a-wA-Wy-zY-Z]");
451        }
452        
453        
454        /**
455         * Checks if the specified string has a valid extension subtag syntax.
456         *
457         * @param s The string to check. Must not be {@code null}.
458         *
459         * @return {@code true} if the syntax is correct, else {@code false}.
460         */
461        private static boolean isExtension(final String s) {
462        
463                return s.matches("[0-9a-wA-Wy-zY-Z]-[0-9a-zA-Z]+");
464        }
465        
466        
467        /**
468         * Sets the extensions.
469         *
470         * <p>See RFC 5646 section 2.2.6.
471         *
472         * @param extensions The extensions. {@code null} if not defined.
473         *
474         * @throws LangTagException If the extension syntax is invalid.
475         */
476        public void setExtensions(final String... extensions)
477                throws LangTagException {
478                
479                if (extensions == null || extensions.length == 0) {
480                        this.extensions = null;
481                        return;
482                }
483        
484                this.extensions = new String[extensions.length];
485                
486                for (int i=0; i < extensions.length; i++) {
487                
488                        ensureMaxLength(extensions[i]);
489
490                        if (! isExtension(extensions[i]))
491                                throw new LangTagException("Invalid extension subtag");
492
493                        this.extensions[i] = extensions[i].toLowerCase();
494                }
495        }
496        
497        
498        @Override
499        public String getPrivateUse() {
500        
501                return privateUse;
502        }
503        
504        
505        /**
506         * Checks if the specified string has a valid private use subtag syntax.
507         *
508         * @param s The string to check. Must not be {@code null}.
509         *
510         * @return {@code true} if the syntax is correct, else {@code false}.
511         */
512        private static boolean isPrivateUse(final String s) {
513        
514                return s.matches("x-[0-9a-zA-Z]+");
515        }
516        
517        
518        /**
519         * Sets the private use.
520         *
521         * <p>See RFC 5646 section 2.2.7.
522         *
523         * @param privateUse The private use. {@code null} if not defined.
524         *
525         * @throws LangTagException If the extension syntax is invalid.
526         */
527        public void setPrivateUse(final String privateUse)
528                throws LangTagException {
529        
530                if (privateUse == null) {
531                        this.privateUse = null;
532                        return;
533                }
534                
535                ensureMaxLength(privateUse);
536                
537                if (! isPrivateUse(privateUse))
538                        throw new LangTagException("Invalid private use subtag");
539        
540                this.privateUse = privateUse.toLowerCase();
541        }
542        
543        
544        @Override
545        public String toString() {
546        
547                StringBuilder sb = new StringBuilder(getLanguage());
548                
549                if (script != null) {
550                        sb.append('-');
551                        sb.append(script);
552                }
553                
554                if (region != null) {
555                        sb.append('-');
556                        sb.append(region);
557                }
558                
559                if (variants != null) {
560                
561                        for (String v: variants) {
562                                sb.append('-');
563                                sb.append(v);
564                        }
565                }
566                
567                if (extensions != null) {
568                
569                        for (String e: extensions) {
570                                sb.append('-');
571                                sb.append(e);
572                        }
573                }
574                
575                if (privateUse != null) {
576                
577                        sb.append('-');
578                        sb.append(privateUse);
579                }
580                
581                return sb.toString();
582        }
583        
584        
585        /**
586         * Overrides {@code Object.hashCode()}.
587         *
588         * @return The object hash code.
589         */
590        @Override
591        public int hashCode() {
592        
593                return toString().hashCode();
594        }
595        
596        
597        /**
598         * Overrides {@code Object.equals()}.
599         *
600         * @param object The object to compare to.
601         *
602         * @return {@code true} if the objects have the same value, otherwise
603         *         {@code false}.
604         */
605        @Override
606        public boolean equals(Object object) {
607        
608                return object != null &&
609                       object instanceof LangTag && 
610                       this.toString().equals(object.toString());
611        }
612        
613        
614        /**
615         * Parses the specified string representation of a language tag.
616         *
617         * @param s The string to parse. May be {@code null}.
618         *
619         * @return The language tag. {@code null} if the string was empty or
620         *         {@code null}.
621         *
622         * @throws LangTagException If the string has invalid language tag 
623         *                          syntax.
624         */
625        public static LangTag parse(final String s)
626                throws LangTagException {
627                
628                if (s == null || s.trim().isEmpty())
629                        return null;
630                        
631                final String[] subtags = s.split("-");
632                
633                int pos = 0;
634                
635                // Parse primary lang + ext lang subtags
636                String primaryLang = null;
637                List<String> extLangSubtags = new LinkedList<String>();
638                
639                if (isPrimaryLanguage(subtags[0]))
640                        primaryLang = subtags[pos++];
641                
642                // Multiple ext lang subtags possible
643                while (pos < subtags.length && isExtendedLanguageSubtag(subtags[pos]))
644                        extLangSubtags.add(subtags[pos++]);
645                
646                LangTag langTag = new LangTag(primaryLang, extLangSubtags.toArray(new String[]{}));
647                
648                
649                // Parse script
650                if (pos < subtags.length && isScript(subtags[pos]))
651                        langTag.setScript(subtags[pos++]);
652                                
653                // Parse region
654                if (pos < subtags.length && isRegion(subtags[pos]))
655                        langTag.setRegion(subtags[pos++]);
656                                
657                // Parse variants
658                List<String> variantSubtags = new LinkedList<String>();
659                        
660                while (pos < subtags.length && isVariant(subtags[pos]))
661                        variantSubtags.add(subtags[pos++]);
662                        
663                if (! variantSubtags.isEmpty())
664                        langTag.setVariants(variantSubtags.toArray(new String[]{}));
665                        
666                // Parse extensions, e.g. u-usercal
667                List<String> extSubtags = new LinkedList<String>();
668                
669                while (pos < subtags.length && isExtensionSingleton(subtags[pos])) {
670                        
671                        String singleton = subtags[pos++];
672                        
673                        if (pos == subtags.length)
674                                throw new LangTagException("Invalid extension subtag");
675                        
676                        extSubtags.add(singleton + "-" + subtags[pos++]);
677                }
678                        
679                if (! extSubtags.isEmpty())
680                        langTag.setExtensions(extSubtags.toArray(new String[]{}));
681                        
682                        
683                // Parse private use, e.g. x-abc
684                if (pos < subtags.length && subtags[pos].equals("x")) {
685                
686                        if (++pos == subtags.length)
687                                throw new LangTagException("Invalid private use subtag");
688                        
689                        langTag.setPrivateUse("x-" + subtags[pos++]);
690                }
691                
692                // End of tag?
693                if (pos < subtags.length)
694                        throw new LangTagException("Invalid language tag: Unexpected subtag");
695                
696                return langTag;
697        }
698}