001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.io.GwtWorkarounds.asCharInput; 022import static com.google.common.io.GwtWorkarounds.asCharOutput; 023import static com.google.common.io.GwtWorkarounds.asInputStream; 024import static com.google.common.io.GwtWorkarounds.asOutputStream; 025import static com.google.common.io.GwtWorkarounds.stringBuilderOutput; 026import static com.google.common.math.IntMath.divide; 027import static com.google.common.math.IntMath.log2; 028import static java.math.RoundingMode.CEILING; 029import static java.math.RoundingMode.FLOOR; 030import static java.math.RoundingMode.UNNECESSARY; 031 032import com.google.common.annotations.Beta; 033import com.google.common.annotations.GwtCompatible; 034import com.google.common.annotations.GwtIncompatible; 035import com.google.common.base.Ascii; 036import com.google.common.base.CharMatcher; 037import com.google.common.io.GwtWorkarounds.ByteInput; 038import com.google.common.io.GwtWorkarounds.ByteOutput; 039import com.google.common.io.GwtWorkarounds.CharInput; 040import com.google.common.io.GwtWorkarounds.CharOutput; 041 042import java.io.IOException; 043import java.io.InputStream; 044import java.io.OutputStream; 045import java.io.Reader; 046import java.io.Writer; 047import java.util.Arrays; 048 049import javax.annotation.CheckReturnValue; 050import javax.annotation.Nullable; 051 052/** 053 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 054 * strings. This class includes several constants for encoding schemes specified by <a 055 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 056 * <pre> {@code 057 * 058 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII)) 059 * }</pre> 060 * returns the string {@code "MZXW6==="}, and <pre> {@code 061 * 062 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 063 * }</pre> 064 * 065 * ...returns the ASCII bytes of the string {@code "foo"}. 066 * 067 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with 068 * RFC 4648. Decoding rejects characters in the wrong case, though padding is optional. 069 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding 070 * with modified behavior: <pre> {@code 071 * 072 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 073 * }</pre> 074 * 075 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 076 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 077 * <pre> {@code 078 * 079 * // Do NOT do this 080 * BaseEncoding hex = BaseEncoding.base16(); 081 * hex.lowerCase(); // does nothing! 082 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 083 * }</pre> 084 * 085 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 086 * {@code x}, but the reverse does not necessarily hold. 087 * 088 * <p> 089 * <table> 090 * <tr> 091 * <th>Encoding 092 * <th>Alphabet 093 * <th>{@code char:byte} ratio 094 * <th>Default padding 095 * <th>Comments 096 * <tr> 097 * <td>{@link #base16()} 098 * <td>0-9 A-F 099 * <td>2.00 100 * <td>N/A 101 * <td>Traditional hexadecimal. Defaults to upper case. 102 * <tr> 103 * <td>{@link #base32()} 104 * <td>A-Z 2-7 105 * <td>1.60 106 * <td>= 107 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 108 * <tr> 109 * <td>{@link #base32Hex()} 110 * <td>0-9 A-V 111 * <td>1.60 112 * <td>= 113 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 114 * <tr> 115 * <td>{@link #base64()} 116 * <td>A-Z a-z 0-9 + / 117 * <td>1.33 118 * <td>= 119 * <td> 120 * <tr> 121 * <td>{@link #base64Url()} 122 * <td>A-Z a-z 0-9 - _ 123 * <td>1.33 124 * <td>= 125 * <td>Safe to use as filenames, or to pass in URLs without escaping 126 * </table> 127 * 128 * <p> 129 * All instances of this class are immutable, so they may be stored safely as static constants. 130 * 131 * @author Louis Wasserman 132 * @since 14.0 133 */ 134@Beta 135@GwtCompatible(emulated = true) 136public abstract class BaseEncoding { 137 // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int]) 138 139 BaseEncoding() {} 140 141 /** 142 * Encodes the specified byte array, and returns the encoded {@code String}. 143 */ 144 public String encode(byte[] bytes) { 145 return encode(checkNotNull(bytes), 0, bytes.length); 146 } 147 148 /** 149 * Encodes the specified range of the specified byte array, and returns the encoded 150 * {@code String}. 151 */ 152 public final String encode(byte[] bytes, int off, int len) { 153 checkNotNull(bytes); 154 checkPositionIndexes(off, off + len, bytes.length); 155 CharOutput result = stringBuilderOutput(maxEncodedSize(len)); 156 ByteOutput byteOutput = encodingStream(result); 157 try { 158 for (int i = 0; i < len; i++) { 159 byteOutput.write(bytes[off + i]); 160 } 161 byteOutput.close(); 162 } catch (IOException impossible) { 163 throw new AssertionError("impossible"); 164 } 165 return result.toString(); 166 } 167 168 /** 169 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 170 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 171 * {@code Writer}. 172 */ 173 @GwtIncompatible("Writer,OutputStream") 174 public final OutputStream encodingStream(Writer writer) { 175 return asOutputStream(encodingStream(asCharOutput(writer))); 176 } 177 178 /** 179 * Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding 180 * into writers from the specified {@code OutputSupplier}. 181 */ 182 @GwtIncompatible("Writer,OutputStream") 183 public final OutputSupplier<OutputStream> encodingStream( 184 final OutputSupplier<Writer> writerSupplier) { 185 checkNotNull(writerSupplier); 186 return new OutputSupplier<OutputStream>() { 187 @Override 188 public OutputStream getOutput() throws IOException { 189 return encodingStream(writerSupplier.getOutput()); 190 } 191 }; 192 } 193 194 // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher) 195 196 private static byte[] extract(byte[] result, int length) { 197 if (length == result.length) { 198 return result; 199 } else { 200 byte[] trunc = new byte[length]; 201 System.arraycopy(result, 0, trunc, 0, length); 202 return trunc; 203 } 204 } 205 206 /** 207 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 208 * This is the inverse operation to {@link #encode(byte[])}. 209 * 210 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 211 * encoding. 212 */ 213 public final byte[] decode(CharSequence chars) { 214 ByteInput decodedInput = decodingStream(asCharInput(chars)); 215 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 216 int index = 0; 217 try { 218 for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) { 219 tmp[index++] = (byte) i; 220 } 221 } catch (IOException badInput) { 222 throw new IllegalArgumentException(badInput); 223 } 224 return extract(tmp, index); 225 } 226 227 /** 228 * Returns an {@code InputStream} that decodes base-encoded input from the specified 229 * {@code Reader}. 230 */ 231 @GwtIncompatible("Reader,InputStream") 232 public final InputStream decodingStream(Reader reader) { 233 return asInputStream(decodingStream(asCharInput(reader))); 234 } 235 236 /** 237 * Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input 238 * from readers from the specified supplier. 239 */ 240 @GwtIncompatible("Reader,InputStream") 241 public InputSupplier<InputStream> decodingStream(final InputSupplier<Reader> readerSupplier) { 242 checkNotNull(readerSupplier); 243 return new InputSupplier<InputStream>() { 244 @Override 245 public InputStream getInput() throws IOException { 246 return decodingStream(readerSupplier.getInput()); 247 } 248 }; 249 } 250 251 // Implementations for encoding/decoding 252 253 abstract int maxEncodedSize(int bytes); 254 255 abstract ByteOutput encodingStream(CharOutput charOutput); 256 257 abstract int maxDecodedSize(int chars); 258 259 abstract ByteInput decodingStream(CharInput charInput); 260 261 // Modified encoding generators 262 263 /** 264 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 265 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 266 * section 3.2</a>, Padding of Encoded Data. 267 */ 268 @CheckReturnValue 269 public abstract BaseEncoding omitPadding(); 270 271 /** 272 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 273 * for padding. 274 * 275 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 276 * separator 277 */ 278 @CheckReturnValue 279 public abstract BaseEncoding withPadChar(char padChar); 280 281 /** 282 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 283 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 284 * are skipped over in decoding. 285 * 286 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 287 * string, or if {@code n <= 0} 288 * @throws UnsupportedOperationException if this encoding already uses a separator 289 */ 290 @CheckReturnValue 291 public abstract BaseEncoding withSeparator(String separator, int n); 292 293 /** 294 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 295 * uppercase letters. Padding and separator characters remain in their original case. 296 * 297 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 298 * lower-case characters 299 */ 300 @CheckReturnValue 301 public abstract BaseEncoding upperCase(); 302 303 /** 304 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 305 * lowercase letters. Padding and separator characters remain in their original case. 306 * 307 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 308 * lower-case characters 309 */ 310 @CheckReturnValue 311 public abstract BaseEncoding lowerCase(); 312 313 private static final BaseEncoding BASE64 = new StandardBaseEncoding( 314 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 315 316 /** 317 * The "base64" base encoding specified by <a 318 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 319 * (This is the same as the base 64 encoding from <a 320 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 321 * 322 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 323 * omitted} or {@linkplain #withPadChar(char) replaced}. 324 * 325 * <p>No line feeds are added by default, as per <a 326 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 327 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 328 */ 329 public static BaseEncoding base64() { 330 return BASE64; 331 } 332 333 private static final BaseEncoding BASE64_URL = new StandardBaseEncoding( 334 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 335 336 /** 337 * The "base64url" encoding specified by <a 338 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 339 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." 340 * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a 341 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 342 * 343 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 344 * omitted} or {@linkplain #withPadChar(char) replaced}. 345 * 346 * <p>No line feeds are added by default, as per <a 347 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 348 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 349 */ 350 public static BaseEncoding base64Url() { 351 return BASE64_URL; 352 } 353 354 private static final BaseEncoding BASE32 = 355 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 356 357 /** 358 * The "base32" encoding specified by <a 359 * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding. 360 * (This is the same as the base 32 encoding from <a 361 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 362 * 363 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 364 * omitted} or {@linkplain #withPadChar(char) replaced}. 365 * 366 * <p>No line feeds are added by default, as per <a 367 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 368 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 369 */ 370 public static BaseEncoding base32() { 371 return BASE32; 372 } 373 374 private static final BaseEncoding BASE32_HEX = 375 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 376 377 /** 378 * The "base32hex" encoding specified by <a 379 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 380 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 381 * 382 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 383 * omitted} or {@linkplain #withPadChar(char) replaced}. 384 * 385 * <p>No line feeds are added by default, as per <a 386 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 387 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 388 */ 389 public static BaseEncoding base32Hex() { 390 return BASE32_HEX; 391 } 392 393 private static final BaseEncoding BASE16 = 394 new StandardBaseEncoding("base16()", "0123456789ABCDEF", null); 395 396 /** 397 * The "base16" encoding specified by <a 398 * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding. 399 * (This is the same as the base 16 encoding from <a 400 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 401 * "hexadecimal" format. 402 * 403 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and 404 * {@link #omitPadding()} have no effect. 405 * 406 * <p>No line feeds are added by default, as per <a 407 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 408 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 409 */ 410 public static BaseEncoding base16() { 411 return BASE16; 412 } 413 414 private static final class Alphabet extends CharMatcher { 415 private final String name; 416 // this is meant to be immutable -- don't modify it! 417 private final char[] chars; 418 final int mask; 419 final int bitsPerChar; 420 final int charsPerChunk; 421 final int bytesPerChunk; 422 private final byte[] decodabet; 423 private final boolean[] validPadding; 424 425 Alphabet(String name, char[] chars) { 426 this.name = checkNotNull(name); 427 this.chars = checkNotNull(chars); 428 try { 429 this.bitsPerChar = log2(chars.length, UNNECESSARY); 430 } catch (ArithmeticException e) { 431 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 432 } 433 434 /* 435 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 436 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 437 */ 438 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 439 this.charsPerChunk = 8 / gcd; 440 this.bytesPerChunk = bitsPerChar / gcd; 441 442 this.mask = chars.length - 1; 443 444 byte[] decodabet = new byte[Ascii.MAX + 1]; 445 Arrays.fill(decodabet, (byte) -1); 446 for (int i = 0; i < chars.length; i++) { 447 char c = chars[i]; 448 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); 449 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 450 decodabet[c] = (byte) i; 451 } 452 this.decodabet = decodabet; 453 454 boolean[] validPadding = new boolean[charsPerChunk]; 455 for (int i = 0; i < bytesPerChunk; i++) { 456 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 457 } 458 this.validPadding = validPadding; 459 } 460 461 char encode(int bits) { 462 return chars[bits]; 463 } 464 465 boolean isValidPaddingStartPosition(int index) { 466 return validPadding[index % charsPerChunk]; 467 } 468 469 int decode(char ch) throws IOException { 470 if (ch > Ascii.MAX || decodabet[ch] == -1) { 471 throw new IOException("Unrecognized character: " + ch); 472 } 473 return decodabet[ch]; 474 } 475 476 private boolean hasLowerCase() { 477 for (char c : chars) { 478 if (Ascii.isLowerCase(c)) { 479 return true; 480 } 481 } 482 return false; 483 } 484 485 private boolean hasUpperCase() { 486 for (char c : chars) { 487 if (Ascii.isUpperCase(c)) { 488 return true; 489 } 490 } 491 return false; 492 } 493 494 Alphabet upperCase() { 495 if (!hasLowerCase()) { 496 return this; 497 } else { 498 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 499 char[] upperCased = new char[chars.length]; 500 for (int i = 0; i < chars.length; i++) { 501 upperCased[i] = Ascii.toUpperCase(chars[i]); 502 } 503 return new Alphabet(name + ".upperCase()", upperCased); 504 } 505 } 506 507 Alphabet lowerCase() { 508 if (!hasUpperCase()) { 509 return this; 510 } else { 511 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 512 char[] lowerCased = new char[chars.length]; 513 for (int i = 0; i < chars.length; i++) { 514 lowerCased[i] = Ascii.toLowerCase(chars[i]); 515 } 516 return new Alphabet(name + ".lowerCase()", lowerCased); 517 } 518 } 519 520 @Override 521 public boolean matches(char c) { 522 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; 523 } 524 525 @Override 526 public String toString() { 527 return name; 528 } 529 } 530 531 static final class StandardBaseEncoding extends BaseEncoding { 532 // TODO(user): provide a useful toString 533 private final Alphabet alphabet; 534 535 @Nullable 536 private final Character paddingChar; 537 538 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 539 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 540 } 541 542 StandardBaseEncoding(Alphabet alphabet, Character paddingChar) { 543 this.alphabet = checkNotNull(alphabet); 544 checkArgument(paddingChar == null || !alphabet.matches(paddingChar), 545 "Padding character %s was already in alphabet", paddingChar); 546 this.paddingChar = paddingChar; 547 } 548 549 private CharMatcher paddingMatcher() { 550 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); 551 } 552 553 @Override 554 int maxEncodedSize(int bytes) { 555 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 556 } 557 558 @Override 559 ByteOutput encodingStream(final CharOutput out) { 560 checkNotNull(out); 561 return new ByteOutput() { 562 int bitBuffer = 0; 563 int bitBufferLength = 0; 564 int writtenChars = 0; 565 566 @Override 567 public void write(byte b) throws IOException { 568 bitBuffer <<= 8; 569 bitBuffer |= b & 0xFF; 570 bitBufferLength += 8; 571 while (bitBufferLength >= alphabet.bitsPerChar) { 572 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) 573 & alphabet.mask; 574 out.write(alphabet.encode(charIndex)); 575 writtenChars++; 576 bitBufferLength -= alphabet.bitsPerChar; 577 } 578 } 579 580 @Override 581 public void flush() throws IOException { 582 out.flush(); 583 } 584 585 @Override 586 public void close() throws IOException { 587 if (bitBufferLength > 0) { 588 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) 589 & alphabet.mask; 590 out.write(alphabet.encode(charIndex)); 591 writtenChars++; 592 if (paddingChar != null) { 593 while (writtenChars % alphabet.charsPerChunk != 0) { 594 out.write(paddingChar.charValue()); 595 writtenChars++; 596 } 597 } 598 } 599 out.close(); 600 } 601 }; 602 } 603 604 @Override 605 int maxDecodedSize(int chars) { 606 return alphabet.bytesPerChunk * divide(chars, alphabet.charsPerChunk, CEILING); 607 } 608 609 @Override 610 ByteInput decodingStream(final CharInput reader) { 611 checkNotNull(reader); 612 return new ByteInput() { 613 int bitBuffer = 0; 614 int bitBufferLength = 0; 615 int readChars = 0; 616 boolean hitPadding = false; 617 final CharMatcher paddingMatcher = paddingMatcher(); 618 619 @Override 620 public int read() throws IOException { 621 while (true) { 622 int readChar = reader.read(); 623 if (readChar == -1) { 624 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 625 throw new IOException("Invalid input length " + readChars); 626 } 627 return -1; 628 } 629 readChars++; 630 char ch = (char) readChar; 631 if (paddingMatcher.matches(ch)) { 632 if (!hitPadding 633 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 634 throw new IOException("Padding cannot start at index " + readChars); 635 } 636 hitPadding = true; 637 } else if (hitPadding) { 638 throw new IOException( 639 "Expected padding character but found '" + ch + "' at index " + readChars); 640 } else { 641 bitBuffer <<= alphabet.bitsPerChar; 642 bitBuffer |= alphabet.decode(ch); 643 bitBufferLength += alphabet.bitsPerChar; 644 645 if (bitBufferLength >= 8) { 646 bitBufferLength -= 8; 647 return (bitBuffer >> bitBufferLength) & 0xFF; 648 } 649 } 650 } 651 } 652 653 @Override 654 public void close() throws IOException { 655 reader.close(); 656 } 657 }; 658 } 659 660 @Override 661 public BaseEncoding omitPadding() { 662 return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null); 663 } 664 665 @Override 666 public BaseEncoding withPadChar(char padChar) { 667 if (8 % alphabet.bitsPerChar == 0 || 668 (paddingChar != null && paddingChar.charValue() == padChar)) { 669 return this; 670 } else { 671 return new StandardBaseEncoding(alphabet, padChar); 672 } 673 } 674 675 @Override 676 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 677 checkNotNull(separator); 678 checkArgument(paddingMatcher().or(alphabet).matchesNoneOf(separator), 679 "Separator cannot contain alphabet or padding characters"); 680 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 681 } 682 683 private transient BaseEncoding upperCase; 684 private transient BaseEncoding lowerCase; 685 686 @Override 687 public BaseEncoding upperCase() { 688 BaseEncoding result = upperCase; 689 if (result == null) { 690 Alphabet upper = alphabet.upperCase(); 691 result = upperCase = 692 (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar); 693 } 694 return result; 695 } 696 697 @Override 698 public BaseEncoding lowerCase() { 699 BaseEncoding result = lowerCase; 700 if (result == null) { 701 Alphabet lower = alphabet.lowerCase(); 702 result = lowerCase = 703 (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar); 704 } 705 return result; 706 } 707 708 @Override 709 public String toString() { 710 StringBuilder builder = new StringBuilder("BaseEncoding."); 711 builder.append(alphabet.toString()); 712 if (8 % alphabet.bitsPerChar != 0) { 713 if (paddingChar == null) { 714 builder.append(".omitPadding()"); 715 } else { 716 builder.append(".withPadChar(").append(paddingChar).append(')'); 717 } 718 } 719 return builder.toString(); 720 } 721 } 722 723 static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) { 724 checkNotNull(delegate); 725 checkNotNull(toIgnore); 726 return new CharInput() { 727 @Override 728 public int read() throws IOException { 729 int readChar; 730 do { 731 readChar = delegate.read(); 732 } while (readChar != -1 && toIgnore.matches((char) readChar)); 733 return readChar; 734 } 735 736 @Override 737 public void close() throws IOException { 738 delegate.close(); 739 } 740 }; 741 } 742 743 static CharOutput separatingOutput( 744 final CharOutput delegate, final String separator, final int afterEveryChars) { 745 checkNotNull(delegate); 746 checkNotNull(separator); 747 checkArgument(afterEveryChars > 0); 748 return new CharOutput() { 749 int charsUntilSeparator = afterEveryChars; 750 751 @Override 752 public void write(char c) throws IOException { 753 if (charsUntilSeparator == 0) { 754 for (int i = 0; i < separator.length(); i++) { 755 delegate.write(separator.charAt(i)); 756 } 757 charsUntilSeparator = afterEveryChars; 758 } 759 delegate.write(c); 760 charsUntilSeparator--; 761 } 762 763 @Override 764 public void flush() throws IOException { 765 delegate.flush(); 766 } 767 768 @Override 769 public void close() throws IOException { 770 delegate.close(); 771 } 772 }; 773 } 774 775 static final class SeparatedBaseEncoding extends BaseEncoding { 776 private final BaseEncoding delegate; 777 private final String separator; 778 private final int afterEveryChars; 779 private final CharMatcher separatorChars; 780 781 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 782 this.delegate = checkNotNull(delegate); 783 this.separator = checkNotNull(separator); 784 this.afterEveryChars = afterEveryChars; 785 checkArgument( 786 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 787 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 788 } 789 790 @Override 791 int maxEncodedSize(int bytes) { 792 int unseparatedSize = delegate.maxEncodedSize(bytes); 793 return unseparatedSize + separator.length() 794 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 795 } 796 797 @Override 798 ByteOutput encodingStream(final CharOutput output) { 799 return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars)); 800 } 801 802 @Override 803 int maxDecodedSize(int chars) { 804 return delegate.maxDecodedSize(chars); 805 } 806 807 @Override 808 ByteInput decodingStream(final CharInput input) { 809 return delegate.decodingStream(ignoringInput(input, separatorChars)); 810 } 811 812 @Override 813 public BaseEncoding omitPadding() { 814 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 815 } 816 817 @Override 818 public BaseEncoding withPadChar(char padChar) { 819 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 820 } 821 822 @Override 823 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 824 throw new UnsupportedOperationException("Already have a separator"); 825 } 826 827 @Override 828 public BaseEncoding upperCase() { 829 return delegate.upperCase().withSeparator(separator, afterEveryChars); 830 } 831 832 @Override 833 public BaseEncoding lowerCase() { 834 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 835 } 836 837 @Override 838 public String toString() { 839 return delegate.toString() + 840 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 841 } 842 } 843}