001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.util;
020
021 import java.io.PrintWriter;
022 import java.io.StringWriter;
023 import java.net.URI;
024 import java.net.URISyntaxException;
025 import java.text.DateFormat;
026 import java.text.DecimalFormat;
027 import java.text.NumberFormat;
028 import java.util.ArrayList;
029 import java.util.Arrays;
030 import java.util.Collection;
031 import java.util.Date;
032 import java.util.Iterator;
033 import java.util.List;
034 import java.util.Locale;
035 import java.util.StringTokenizer;
036
037 import org.apache.hadoop.classification.InterfaceAudience;
038 import org.apache.hadoop.classification.InterfaceStability;
039 import org.apache.hadoop.fs.Path;
040 import org.apache.hadoop.net.NetUtils;
041
042 /**
043 * General string utils
044 */
045 @InterfaceAudience.Private
046 @InterfaceStability.Unstable
047 public class StringUtils {
048
049 private static final DecimalFormat decimalFormat;
050 static {
051 NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH);
052 decimalFormat = (DecimalFormat) numberFormat;
053 decimalFormat.applyPattern("#.##");
054 }
055
056 /**
057 * Make a string representation of the exception.
058 * @param e The exception to stringify
059 * @return A string with exception name and call stack.
060 */
061 public static String stringifyException(Throwable e) {
062 StringWriter stm = new StringWriter();
063 PrintWriter wrt = new PrintWriter(stm);
064 e.printStackTrace(wrt);
065 wrt.close();
066 return stm.toString();
067 }
068
069 /**
070 * Given a full hostname, return the word upto the first dot.
071 * @param fullHostname the full hostname
072 * @return the hostname to the first dot
073 */
074 public static String simpleHostname(String fullHostname) {
075 int offset = fullHostname.indexOf('.');
076 if (offset != -1) {
077 return fullHostname.substring(0, offset);
078 }
079 return fullHostname;
080 }
081
082 private static DecimalFormat oneDecimal = new DecimalFormat("0.0");
083
084 /**
085 * Given an integer, return a string that is in an approximate, but human
086 * readable format.
087 * It uses the bases 'k', 'm', and 'g' for 1024, 1024**2, and 1024**3.
088 * @param number the number to format
089 * @return a human readable form of the integer
090 */
091 public static String humanReadableInt(long number) {
092 long absNumber = Math.abs(number);
093 double result = number;
094 String suffix = "";
095 if (absNumber < 1024) {
096 // since no division has occurred, don't format with a decimal point
097 return String.valueOf(number);
098 } else if (absNumber < 1024 * 1024) {
099 result = number / 1024.0;
100 suffix = "k";
101 } else if (absNumber < 1024 * 1024 * 1024) {
102 result = number / (1024.0 * 1024);
103 suffix = "m";
104 } else {
105 result = number / (1024.0 * 1024 * 1024);
106 suffix = "g";
107 }
108 return oneDecimal.format(result) + suffix;
109 }
110
111 /**
112 * Format a percentage for presentation to the user.
113 * @param done the percentage to format (0.0 to 1.0)
114 * @param digits the number of digits past the decimal point
115 * @return a string representation of the percentage
116 */
117 public static String formatPercent(double done, int digits) {
118 DecimalFormat percentFormat = new DecimalFormat("0.00%");
119 double scale = Math.pow(10.0, digits+2);
120 double rounded = Math.floor(done * scale);
121 percentFormat.setDecimalSeparatorAlwaysShown(false);
122 percentFormat.setMinimumFractionDigits(digits);
123 percentFormat.setMaximumFractionDigits(digits);
124 return percentFormat.format(rounded / scale);
125 }
126
127 /**
128 * Given an array of strings, return a comma-separated list of its elements.
129 * @param strs Array of strings
130 * @return Empty string if strs.length is 0, comma separated list of strings
131 * otherwise
132 */
133
134 public static String arrayToString(String[] strs) {
135 if (strs.length == 0) { return ""; }
136 StringBuilder sbuf = new StringBuilder();
137 sbuf.append(strs[0]);
138 for (int idx = 1; idx < strs.length; idx++) {
139 sbuf.append(",");
140 sbuf.append(strs[idx]);
141 }
142 return sbuf.toString();
143 }
144
145 /**
146 * Given an array of bytes it will convert the bytes to a hex string
147 * representation of the bytes
148 * @param bytes
149 * @param start start index, inclusively
150 * @param end end index, exclusively
151 * @return hex string representation of the byte array
152 */
153 public static String byteToHexString(byte[] bytes, int start, int end) {
154 if (bytes == null) {
155 throw new IllegalArgumentException("bytes == null");
156 }
157 StringBuilder s = new StringBuilder();
158 for(int i = start; i < end; i++) {
159 s.append(String.format("%02x", bytes[i]));
160 }
161 return s.toString();
162 }
163
164 /** Same as byteToHexString(bytes, 0, bytes.length). */
165 public static String byteToHexString(byte bytes[]) {
166 return byteToHexString(bytes, 0, bytes.length);
167 }
168
169 /**
170 * Given a hexstring this will return the byte array corresponding to the
171 * string
172 * @param hex the hex String array
173 * @return a byte array that is a hex string representation of the given
174 * string. The size of the byte array is therefore hex.length/2
175 */
176 public static byte[] hexStringToByte(String hex) {
177 byte[] bts = new byte[hex.length() / 2];
178 for (int i = 0; i < bts.length; i++) {
179 bts[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16);
180 }
181 return bts;
182 }
183 /**
184 *
185 * @param uris
186 */
187 public static String uriToString(URI[] uris){
188 if (uris == null) {
189 return null;
190 }
191 StringBuilder ret = new StringBuilder(uris[0].toString());
192 for(int i = 1; i < uris.length;i++){
193 ret.append(",");
194 ret.append(uris[i].toString());
195 }
196 return ret.toString();
197 }
198
199 /**
200 *
201 * @param str
202 */
203 public static URI[] stringToURI(String[] str){
204 if (str == null)
205 return null;
206 URI[] uris = new URI[str.length];
207 for (int i = 0; i < str.length;i++){
208 try{
209 uris[i] = new URI(str[i]);
210 }catch(URISyntaxException ur){
211 System.out.println("Exception in specified URI's " + StringUtils.stringifyException(ur));
212 //making sure its asssigned to null in case of an error
213 uris[i] = null;
214 }
215 }
216 return uris;
217 }
218
219 /**
220 *
221 * @param str
222 */
223 public static Path[] stringToPath(String[] str){
224 if (str == null) {
225 return null;
226 }
227 Path[] p = new Path[str.length];
228 for (int i = 0; i < str.length;i++){
229 p[i] = new Path(str[i]);
230 }
231 return p;
232 }
233 /**
234 *
235 * Given a finish and start time in long milliseconds, returns a
236 * String in the format Xhrs, Ymins, Z sec, for the time difference between two times.
237 * If finish time comes before start time then negative valeus of X, Y and Z wil return.
238 *
239 * @param finishTime finish time
240 * @param startTime start time
241 */
242 public static String formatTimeDiff(long finishTime, long startTime){
243 long timeDiff = finishTime - startTime;
244 return formatTime(timeDiff);
245 }
246
247 /**
248 *
249 * Given the time in long milliseconds, returns a
250 * String in the format Xhrs, Ymins, Z sec.
251 *
252 * @param timeDiff The time difference to format
253 */
254 public static String formatTime(long timeDiff){
255 StringBuilder buf = new StringBuilder();
256 long hours = timeDiff / (60*60*1000);
257 long rem = (timeDiff % (60*60*1000));
258 long minutes = rem / (60*1000);
259 rem = rem % (60*1000);
260 long seconds = rem / 1000;
261
262 if (hours != 0){
263 buf.append(hours);
264 buf.append("hrs, ");
265 }
266 if (minutes != 0){
267 buf.append(minutes);
268 buf.append("mins, ");
269 }
270 // return "0sec if no difference
271 buf.append(seconds);
272 buf.append("sec");
273 return buf.toString();
274 }
275 /**
276 * Formats time in ms and appends difference (finishTime - startTime)
277 * as returned by formatTimeDiff().
278 * If finish time is 0, empty string is returned, if start time is 0
279 * then difference is not appended to return value.
280 * @param dateFormat date format to use
281 * @param finishTime fnish time
282 * @param startTime start time
283 * @return formatted value.
284 */
285 public static String getFormattedTimeWithDiff(DateFormat dateFormat,
286 long finishTime, long startTime){
287 StringBuilder buf = new StringBuilder();
288 if (0 != finishTime) {
289 buf.append(dateFormat.format(new Date(finishTime)));
290 if (0 != startTime){
291 buf.append(" (" + formatTimeDiff(finishTime , startTime) + ")");
292 }
293 }
294 return buf.toString();
295 }
296
297 /**
298 * Returns an arraylist of strings.
299 * @param str the comma seperated string values
300 * @return the arraylist of the comma seperated string values
301 */
302 public static String[] getStrings(String str){
303 Collection<String> values = getStringCollection(str);
304 if(values.size() == 0) {
305 return null;
306 }
307 return values.toArray(new String[values.size()]);
308 }
309
310 /**
311 * Returns a collection of strings.
312 * @param str comma seperated string values
313 * @return an <code>ArrayList</code> of string values
314 */
315 public static Collection<String> getStringCollection(String str){
316 List<String> values = new ArrayList<String>();
317 if (str == null)
318 return values;
319 StringTokenizer tokenizer = new StringTokenizer (str,",");
320 values = new ArrayList<String>();
321 while (tokenizer.hasMoreTokens()) {
322 values.add(tokenizer.nextToken());
323 }
324 return values;
325 }
326
327 /**
328 * Splits a comma separated value <code>String</code>, trimming leading and trailing whitespace on each value.
329 * @param str a comma separated <String> with values
330 * @return a <code>Collection</code> of <code>String</code> values
331 */
332 public static Collection<String> getTrimmedStringCollection(String str){
333 return new ArrayList<String>(
334 Arrays.asList(getTrimmedStrings(str)));
335 }
336
337 /**
338 * Splits a comma separated value <code>String</code>, trimming leading and trailing whitespace on each value.
339 * @param str a comma separated <String> with values
340 * @return an array of <code>String</code> values
341 */
342 public static String[] getTrimmedStrings(String str){
343 if (null == str || "".equals(str.trim())) {
344 return emptyStringArray;
345 }
346
347 return str.trim().split("\\s*,\\s*");
348 }
349
350 final public static String[] emptyStringArray = {};
351 final public static char COMMA = ',';
352 final public static String COMMA_STR = ",";
353 final public static char ESCAPE_CHAR = '\\';
354
355 /**
356 * Split a string using the default separator
357 * @param str a string that may have escaped separator
358 * @return an array of strings
359 */
360 public static String[] split(String str) {
361 return split(str, ESCAPE_CHAR, COMMA);
362 }
363
364 /**
365 * Split a string using the given separator
366 * @param str a string that may have escaped separator
367 * @param escapeChar a char that be used to escape the separator
368 * @param separator a separator char
369 * @return an array of strings
370 */
371 public static String[] split(
372 String str, char escapeChar, char separator) {
373 if (str==null) {
374 return null;
375 }
376 ArrayList<String> strList = new ArrayList<String>();
377 StringBuilder split = new StringBuilder();
378 int index = 0;
379 while ((index = findNext(str, separator, escapeChar, index, split)) >= 0) {
380 ++index; // move over the separator for next search
381 strList.add(split.toString());
382 split.setLength(0); // reset the buffer
383 }
384 strList.add(split.toString());
385 // remove trailing empty split(s)
386 int last = strList.size(); // last split
387 while (--last>=0 && "".equals(strList.get(last))) {
388 strList.remove(last);
389 }
390 return strList.toArray(new String[strList.size()]);
391 }
392
393 /**
394 * Split a string using the given separator, with no escaping performed.
395 * @param str a string to be split. Note that this may not be null.
396 * @param separator a separator char
397 * @return an array of strings
398 */
399 public static String[] split(
400 String str, char separator) {
401 // String.split returns a single empty result for splitting the empty
402 // string.
403 if ("".equals(str)) {
404 return new String[]{""};
405 }
406 ArrayList<String> strList = new ArrayList<String>();
407 int startIndex = 0;
408 int nextIndex = 0;
409 while ((nextIndex = str.indexOf((int)separator, startIndex)) != -1) {
410 strList.add(str.substring(startIndex, nextIndex));
411 startIndex = nextIndex + 1;
412 }
413 strList.add(str.substring(startIndex));
414 // remove trailing empty split(s)
415 int last = strList.size(); // last split
416 while (--last>=0 && "".equals(strList.get(last))) {
417 strList.remove(last);
418 }
419 return strList.toArray(new String[strList.size()]);
420 }
421
422 /**
423 * Finds the first occurrence of the separator character ignoring the escaped
424 * separators starting from the index. Note the substring between the index
425 * and the position of the separator is passed.
426 * @param str the source string
427 * @param separator the character to find
428 * @param escapeChar character used to escape
429 * @param start from where to search
430 * @param split used to pass back the extracted string
431 */
432 public static int findNext(String str, char separator, char escapeChar,
433 int start, StringBuilder split) {
434 int numPreEscapes = 0;
435 for (int i = start; i < str.length(); i++) {
436 char curChar = str.charAt(i);
437 if (numPreEscapes == 0 && curChar == separator) { // separator
438 return i;
439 } else {
440 split.append(curChar);
441 numPreEscapes = (curChar == escapeChar)
442 ? (++numPreEscapes) % 2
443 : 0;
444 }
445 }
446 return -1;
447 }
448
449 /**
450 * Escape commas in the string using the default escape char
451 * @param str a string
452 * @return an escaped string
453 */
454 public static String escapeString(String str) {
455 return escapeString(str, ESCAPE_CHAR, COMMA);
456 }
457
458 /**
459 * Escape <code>charToEscape</code> in the string
460 * with the escape char <code>escapeChar</code>
461 *
462 * @param str string
463 * @param escapeChar escape char
464 * @param charToEscape the char to be escaped
465 * @return an escaped string
466 */
467 public static String escapeString(
468 String str, char escapeChar, char charToEscape) {
469 return escapeString(str, escapeChar, new char[] {charToEscape});
470 }
471
472 // check if the character array has the character
473 private static boolean hasChar(char[] chars, char character) {
474 for (char target : chars) {
475 if (character == target) {
476 return true;
477 }
478 }
479 return false;
480 }
481
482 /**
483 * @param charsToEscape array of characters to be escaped
484 */
485 public static String escapeString(String str, char escapeChar,
486 char[] charsToEscape) {
487 if (str == null) {
488 return null;
489 }
490 StringBuilder result = new StringBuilder();
491 for (int i=0; i<str.length(); i++) {
492 char curChar = str.charAt(i);
493 if (curChar == escapeChar || hasChar(charsToEscape, curChar)) {
494 // special char
495 result.append(escapeChar);
496 }
497 result.append(curChar);
498 }
499 return result.toString();
500 }
501
502 /**
503 * Unescape commas in the string using the default escape char
504 * @param str a string
505 * @return an unescaped string
506 */
507 public static String unEscapeString(String str) {
508 return unEscapeString(str, ESCAPE_CHAR, COMMA);
509 }
510
511 /**
512 * Unescape <code>charToEscape</code> in the string
513 * with the escape char <code>escapeChar</code>
514 *
515 * @param str string
516 * @param escapeChar escape char
517 * @param charToEscape the escaped char
518 * @return an unescaped string
519 */
520 public static String unEscapeString(
521 String str, char escapeChar, char charToEscape) {
522 return unEscapeString(str, escapeChar, new char[] {charToEscape});
523 }
524
525 /**
526 * @param charsToEscape array of characters to unescape
527 */
528 public static String unEscapeString(String str, char escapeChar,
529 char[] charsToEscape) {
530 if (str == null) {
531 return null;
532 }
533 StringBuilder result = new StringBuilder(str.length());
534 boolean hasPreEscape = false;
535 for (int i=0; i<str.length(); i++) {
536 char curChar = str.charAt(i);
537 if (hasPreEscape) {
538 if (curChar != escapeChar && !hasChar(charsToEscape, curChar)) {
539 // no special char
540 throw new IllegalArgumentException("Illegal escaped string " + str +
541 " unescaped " + escapeChar + " at " + (i-1));
542 }
543 // otherwise discard the escape char
544 result.append(curChar);
545 hasPreEscape = false;
546 } else {
547 if (hasChar(charsToEscape, curChar)) {
548 throw new IllegalArgumentException("Illegal escaped string " + str +
549 " unescaped " + curChar + " at " + i);
550 } else if (curChar == escapeChar) {
551 hasPreEscape = true;
552 } else {
553 result.append(curChar);
554 }
555 }
556 }
557 if (hasPreEscape ) {
558 throw new IllegalArgumentException("Illegal escaped string " + str +
559 ", not expecting " + escapeChar + " in the end." );
560 }
561 return result.toString();
562 }
563
564 /**
565 * Return a message for logging.
566 * @param prefix prefix keyword for the message
567 * @param msg content of the message
568 * @return a message for logging
569 */
570 private static String toStartupShutdownString(String prefix, String [] msg) {
571 StringBuilder b = new StringBuilder(prefix);
572 b.append("\n/************************************************************");
573 for(String s : msg)
574 b.append("\n" + prefix + s);
575 b.append("\n************************************************************/");
576 return b.toString();
577 }
578
579 /**
580 * Print a log message for starting up and shutting down
581 * @param clazz the class of the server
582 * @param args arguments
583 * @param LOG the target log object
584 */
585 public static void startupShutdownMessage(Class<?> clazz, String[] args,
586 final org.apache.commons.logging.Log LOG) {
587 final String hostname = NetUtils.getHostname();
588 final String classname = clazz.getSimpleName();
589 LOG.info(
590 toStartupShutdownString("STARTUP_MSG: ", new String[] {
591 "Starting " + classname,
592 " host = " + hostname,
593 " args = " + Arrays.asList(args),
594 " version = " + VersionInfo.getVersion(),
595 " classpath = " + System.getProperty("java.class.path"),
596 " build = " + VersionInfo.getUrl() + " -r "
597 + VersionInfo.getRevision()
598 + "; compiled by '" + VersionInfo.getUser()
599 + "' on " + VersionInfo.getDate()}
600 )
601 );
602
603 Runtime.getRuntime().addShutdownHook(new Thread() {
604 public void run() {
605 LOG.info(toStartupShutdownString("SHUTDOWN_MSG: ", new String[]{
606 "Shutting down " + classname + " at " + hostname}));
607 }
608 });
609 }
610
611 /**
612 * The traditional binary prefixes, kilo, mega, ..., exa,
613 * which can be represented by a 64-bit integer.
614 * TraditionalBinaryPrefix symbol are case insensitive.
615 */
616 public static enum TraditionalBinaryPrefix {
617 KILO(1024),
618 MEGA(KILO.value << 10),
619 GIGA(MEGA.value << 10),
620 TERA(GIGA.value << 10),
621 PETA(TERA.value << 10),
622 EXA(PETA.value << 10);
623
624 public final long value;
625 public final char symbol;
626
627 TraditionalBinaryPrefix(long value) {
628 this.value = value;
629 this.symbol = toString().charAt(0);
630 }
631
632 /**
633 * @return The TraditionalBinaryPrefix object corresponding to the symbol.
634 */
635 public static TraditionalBinaryPrefix valueOf(char symbol) {
636 symbol = Character.toUpperCase(symbol);
637 for(TraditionalBinaryPrefix prefix : TraditionalBinaryPrefix.values()) {
638 if (symbol == prefix.symbol) {
639 return prefix;
640 }
641 }
642 throw new IllegalArgumentException("Unknown symbol '" + symbol + "'");
643 }
644
645 /**
646 * Convert a string to long.
647 * The input string is first be trimmed
648 * and then it is parsed with traditional binary prefix.
649 *
650 * For example,
651 * "-1230k" will be converted to -1230 * 1024 = -1259520;
652 * "891g" will be converted to 891 * 1024^3 = 956703965184;
653 *
654 * @param s input string
655 * @return a long value represented by the input string.
656 */
657 public static long string2long(String s) {
658 s = s.trim();
659 final int lastpos = s.length() - 1;
660 final char lastchar = s.charAt(lastpos);
661 if (Character.isDigit(lastchar))
662 return Long.parseLong(s);
663 else {
664 long prefix;
665 try {
666 prefix = TraditionalBinaryPrefix.valueOf(lastchar).value;
667 } catch (IllegalArgumentException e) {
668 throw new IllegalArgumentException("Invalid size prefix '" + lastchar
669 + "' in '" + s
670 + "'. Allowed prefixes are k, m, g, t, p, e(case insensitive)");
671 }
672 long num = Long.parseLong(s.substring(0, lastpos));
673 if (num > (Long.MAX_VALUE/prefix) || num < (Long.MIN_VALUE/prefix)) {
674 throw new IllegalArgumentException(s + " does not fit in a Long");
675 }
676 return num * prefix;
677 }
678 }
679 }
680
681 /**
682 * Escapes HTML Special characters present in the string.
683 * @param string
684 * @return HTML Escaped String representation
685 */
686 public static String escapeHTML(String string) {
687 if(string == null) {
688 return null;
689 }
690 StringBuilder sb = new StringBuilder();
691 boolean lastCharacterWasSpace = false;
692 char[] chars = string.toCharArray();
693 for(char c : chars) {
694 if(c == ' ') {
695 if(lastCharacterWasSpace){
696 lastCharacterWasSpace = false;
697 sb.append(" ");
698 }else {
699 lastCharacterWasSpace=true;
700 sb.append(" ");
701 }
702 }else {
703 lastCharacterWasSpace = false;
704 switch(c) {
705 case '<': sb.append("<"); break;
706 case '>': sb.append(">"); break;
707 case '&': sb.append("&"); break;
708 case '"': sb.append("""); break;
709 default : sb.append(c);break;
710 }
711 }
712 }
713
714 return sb.toString();
715 }
716
717 /**
718 * Return an abbreviated English-language desc of the byte length
719 */
720 public static String byteDesc(long len) {
721 double val = 0.0;
722 String ending = "";
723 if (len < 1024 * 1024) {
724 val = (1.0 * len) / 1024;
725 ending = " KB";
726 } else if (len < 1024 * 1024 * 1024) {
727 val = (1.0 * len) / (1024 * 1024);
728 ending = " MB";
729 } else if (len < 1024L * 1024 * 1024 * 1024) {
730 val = (1.0 * len) / (1024 * 1024 * 1024);
731 ending = " GB";
732 } else if (len < 1024L * 1024 * 1024 * 1024 * 1024) {
733 val = (1.0 * len) / (1024L * 1024 * 1024 * 1024);
734 ending = " TB";
735 } else {
736 val = (1.0 * len) / (1024L * 1024 * 1024 * 1024 * 1024);
737 ending = " PB";
738 }
739 return limitDecimalTo2(val) + ending;
740 }
741
742 public static synchronized String limitDecimalTo2(double d) {
743 return decimalFormat.format(d);
744 }
745
746 /**
747 * Concatenates strings, using a separator.
748 *
749 * @param separator Separator to join with.
750 * @param strings Strings to join.
751 */
752 public static String join(CharSequence separator, Iterable<?> strings) {
753 Iterator<?> i = strings.iterator();
754 if (!i.hasNext()) {
755 return "";
756 }
757 StringBuilder sb = new StringBuilder(i.next().toString());
758 while (i.hasNext()) {
759 sb.append(separator);
760 sb.append(i.next().toString());
761 }
762 return sb.toString();
763 }
764
765 /**
766 * Convert SOME_STUFF to SomeStuff
767 *
768 * @param s input string
769 * @return camelized string
770 */
771 public static String camelize(String s) {
772 StringBuilder sb = new StringBuilder();
773 String[] words = split(s.toLowerCase(Locale.US), ESCAPE_CHAR, '_');
774
775 for (String word : words)
776 sb.append(org.apache.commons.lang.StringUtils.capitalize(word));
777
778 return sb.toString();
779 }
780 }