001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020package org.apache.commons.csv; 021 022import static org.apache.commons.io.IOUtils.EOF; 023 024import java.io.File; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.OutputStream; 028import java.io.Reader; 029import java.io.Serializable; 030import java.io.StringWriter; 031import java.io.Writer; 032import java.nio.charset.Charset; 033import java.nio.file.Files; 034import java.nio.file.Path; 035import java.sql.ResultSet; 036import java.sql.ResultSetMetaData; 037import java.sql.SQLException; 038import java.util.Arrays; 039import java.util.HashSet; 040import java.util.Objects; 041import java.util.Set; 042import java.util.function.Supplier; 043 044import org.apache.commons.codec.binary.Base64OutputStream; 045import org.apache.commons.io.IOUtils; 046import org.apache.commons.io.function.Uncheck; 047import org.apache.commons.io.output.AppendableOutputStream; 048 049/** 050 * Specifies the format of a CSV file for parsing and writing. 051 * 052 * <h2>Using predefined formats</h2> 053 * 054 * <p> 055 * You can use one of the predefined formats: 056 * </p> 057 * 058 * <ul> 059 * <li>{@link #DEFAULT}</li> 060 * <li>{@link #EXCEL}</li> 061 * <li>{@link #INFORMIX_UNLOAD}</li> 062 * <li>{@link #INFORMIX_UNLOAD_CSV}</li> 063 * <li>{@link #MONGODB_CSV}</li> 064 * <li>{@link #MONGODB_TSV}</li> 065 * <li>{@link #MYSQL}</li> 066 * <li>{@link #ORACLE}</li> 067 * <li>{@link #POSTGRESQL_CSV}</li> 068 * <li>{@link #POSTGRESQL_TEXT}</li> 069 * <li>{@link #RFC4180}</li> 070 * <li>{@link #TDF}</li> 071 * </ul> 072 * 073 * <p> 074 * For example: 075 * </p> 076 * 077 * <pre> 078 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 079 * </pre> 080 * 081 * <p> 082 * The {@link CSVParser} provides static methods to parse other input types, for example: 083 * </p> 084 * 085 * <pre> 086 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 087 * </pre> 088 * 089 * <h2>Defining formats</h2> 090 * 091 * <p> 092 * You can extend a format by calling the {@code set} methods. For example: 093 * </p> 094 * 095 * <pre>{@code 096 * CSVFormat.EXCEL.builder().setNullString("N/A").setIgnoreSurroundingSpaces(true).get(); 097 * }</pre> 098 * 099 * <h2>Defining column names</h2> 100 * 101 * <p> 102 * To define the column names you want to use to access records, write: 103 * </p> 104 * 105 * <pre>{@code 106 * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get(); 107 * }</pre> 108 * 109 * <p> 110 * Calling {@link Builder#setHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and assumes that your CSV source does not 111 * contain a first record that also defines column names. 112 * 113 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 114 * {@link Builder#setSkipHeaderRecord(boolean)} with {@code true}. 115 * </p> 116 * 117 * <h2>Parsing</h2> 118 * 119 * <p> 120 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 121 * </p> 122 * 123 * <pre>{@code 124 * Reader in = ...; 125 * CSVFormat.EXCEL.builder().setHeader("Col1", "Col2", "Col3").get().parse(in); 126 * }</pre> 127 * 128 * <p> 129 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 130 * </p> 131 * 132 * <h2>Referencing columns safely</h2> 133 * 134 * <p> 135 * If your source contains a header record, you can simplify your code and safely reference columns, by using {@link Builder#setHeader(String...)} with no 136 * arguments: 137 * </p> 138 * 139 * <pre> 140 * CSVFormat.EXCEL.builder().setHeader().get(); 141 * </pre> 142 * 143 * <p> 144 * This causes the parser to read the first record and use its values as column names. 145 * 146 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 147 * </p> 148 * 149 * <pre>{@code 150 * String value = record.get("Col1"); 151 * }</pre> 152 * 153 * <p> 154 * This makes your code impervious to changes in column order in the CSV file. 155 * </p> 156 * 157 * <h2>Serialization</h2> 158 * <p> 159 * This class implements the {@link Serializable} interface with the following caveats: 160 * </p> 161 * <ul> 162 * <li>This class will no longer implement Serializable in 2.0.</li> 163 * <li>Serialization is not supported from one version to the next.</li> 164 * </ul> 165 * <p> 166 * The {@code serialVersionUID} values are: 167 * </p> 168 * <ul> 169 * <li>Version 1.10.0: {@code 2L}</li> 170 * <li>Version 1.9.0 through 1.0: {@code 1L}</li> 171 * </ul> 172 * 173 * <h2>Notes</h2> 174 * <p> 175 * This class is immutable. 176 * </p> 177 * <p> 178 * Not all settings are used for both parsing and writing. 179 * </p> 180 */ 181public final class CSVFormat implements Serializable { 182 183 /** 184 * Builds CSVFormat instances. 185 * 186 * @since 1.9.0 187 */ 188 public static class Builder implements Supplier<CSVFormat> { 189 190 /** 191 * Creates a new default builder. 192 * 193 * @return a copy of the builder 194 */ 195 public static Builder create() { 196 // @formatter:off 197 return new Builder() 198 .setDelimiter(Constants.COMMA) 199 .setRecordSeparator(Constants.CRLF) 200 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 201 .setIgnoreEmptyLines(true) 202 .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL); 203 // @formatter:on 204 } 205 206 /** 207 * Creates a new builder for the given format. 208 * 209 * @param csvFormat the source format. 210 * @return a copy of the builder 211 */ 212 public static Builder create(final CSVFormat csvFormat) { 213 return new Builder(csvFormat); 214 } 215 216 private boolean allowMissingColumnNames; 217 218 private boolean autoFlush; 219 220 private Character commentMarker; 221 222 private String delimiter; 223 224 private DuplicateHeaderMode duplicateHeaderMode; 225 226 private Character escapeCharacter; 227 228 private String[] headerComments; 229 230 private String[] headers; 231 232 private boolean ignoreEmptyLines; 233 234 private boolean ignoreHeaderCase; 235 236 private boolean ignoreSurroundingSpaces; 237 238 private String nullString; 239 240 private Character quoteCharacter; 241 242 private String quotedNullString; 243 244 private QuoteMode quoteMode; 245 246 private String recordSeparator; 247 248 private boolean skipHeaderRecord; 249 250 private boolean lenientEof; 251 252 private boolean trailingData; 253 254 private boolean trailingDelimiter; 255 256 private boolean trim; 257 258 private Builder() { 259 // empty 260 } 261 262 private Builder(final CSVFormat csvFormat) { 263 this.delimiter = csvFormat.delimiter; 264 this.quoteCharacter = csvFormat.quoteCharacter; 265 this.quoteMode = csvFormat.quoteMode; 266 this.commentMarker = csvFormat.commentMarker; 267 this.escapeCharacter = csvFormat.escapeCharacter; 268 this.ignoreSurroundingSpaces = csvFormat.ignoreSurroundingSpaces; 269 this.allowMissingColumnNames = csvFormat.allowMissingColumnNames; 270 this.ignoreEmptyLines = csvFormat.ignoreEmptyLines; 271 this.recordSeparator = csvFormat.recordSeparator; 272 this.nullString = csvFormat.nullString; 273 this.headerComments = csvFormat.headerComments; 274 this.headers = csvFormat.headers; 275 this.skipHeaderRecord = csvFormat.skipHeaderRecord; 276 this.ignoreHeaderCase = csvFormat.ignoreHeaderCase; 277 this.lenientEof = csvFormat.lenientEof; 278 this.trailingData = csvFormat.trailingData; 279 this.trailingDelimiter = csvFormat.trailingDelimiter; 280 this.trim = csvFormat.trim; 281 this.autoFlush = csvFormat.autoFlush; 282 this.quotedNullString = csvFormat.quotedNullString; 283 this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; 284 } 285 286 /** 287 * Builds a new CSVFormat instance. 288 * 289 * @return a new CSVFormat instance. 290 * @deprecated Use {@link #get()}. 291 */ 292 @Deprecated 293 public CSVFormat build() { 294 return get(); 295 } 296 297 /** 298 * Builds a new CSVFormat instance. 299 * 300 * @return a new CSVFormat instance. 301 * @since 1.13.0 302 */ 303 @Override 304 public CSVFormat get() { 305 return new CSVFormat(this); 306 } 307 308 /** 309 * Sets the duplicate header names behavior, true to allow, false to disallow. 310 * 311 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 312 * @return This instance. 313 * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. 314 */ 315 @Deprecated 316 public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 317 setDuplicateHeaderMode(allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY); 318 return this; 319 } 320 321 /** 322 * Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an 323 * {@link IllegalArgumentException} to be thrown. 324 * 325 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to 326 * cause an {@link IllegalArgumentException} to be thrown. 327 * @return This instance. 328 */ 329 public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames) { 330 this.allowMissingColumnNames = allowMissingColumnNames; 331 return this; 332 } 333 334 /** 335 * Sets whether to flush on close. 336 * 337 * @param autoFlush whether to flush on close. 338 * @return This instance. 339 */ 340 public Builder setAutoFlush(final boolean autoFlush) { 341 this.autoFlush = autoFlush; 342 return this; 343 } 344 345 /** 346 * Sets the comment marker character, use {@code null} to disable comments. 347 * <p> 348 * The comment start character is only recognized at the start of a line. 349 * </p> 350 * <p> 351 * Comments are printed first, before headers. 352 * </p> 353 * <p> 354 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 355 * </p> 356 * <p> 357 * If the comment marker is not set, then the header comments are ignored. 358 * </p> 359 * <p> 360 * For example: 361 * </p> 362 * 363 * <pre> 364 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 365 * </pre> 366 * <p> 367 * writes: 368 * </p> 369 * 370 * <pre> 371 * # Generated by Apache Commons CSV. 372 * # 1970-01-01T00:00:00Z 373 * </pre> 374 * 375 * @param commentMarker the comment start marker, use {@code null} to disable. 376 * @return This instance. 377 * @throws IllegalArgumentException thrown if the specified character is a line break 378 */ 379 public Builder setCommentMarker(final char commentMarker) { 380 setCommentMarker(Character.valueOf(commentMarker)); 381 return this; 382 } 383 384 /** 385 * Sets the comment marker character, use {@code null} to disable comments. 386 * <p> 387 * The comment start character is only recognized at the start of a line. 388 * </p> 389 * <p> 390 * Comments are printed first, before headers. 391 * </p> 392 * <p> 393 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 394 * </p> 395 * <p> 396 * If the comment marker is not set, then the header comments are ignored. 397 * </p> 398 * <p> 399 * For example: 400 * </p> 401 * 402 * <pre> 403 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 404 * </pre> 405 * <p> 406 * writes: 407 * </p> 408 * 409 * <pre> 410 * # Generated by Apache Commons CSV. 411 * # 1970-01-01T00:00:00Z 412 * </pre> 413 * 414 * @param commentMarker the comment start marker, use {@code null} to disable. 415 * @return This instance. 416 * @throws IllegalArgumentException thrown if the specified character is a line break 417 */ 418 public Builder setCommentMarker(final Character commentMarker) { 419 if (isLineBreak(commentMarker)) { 420 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 421 } 422 this.commentMarker = commentMarker; 423 return this; 424 } 425 426 /** 427 * Sets the delimiter character. 428 * 429 * @param delimiter the delimiter character. 430 * @return This instance. 431 */ 432 public Builder setDelimiter(final char delimiter) { 433 return setDelimiter(String.valueOf(delimiter)); 434 } 435 436 /** 437 * Sets the delimiter character. 438 * 439 * @param delimiter the delimiter character. 440 * @return This instance. 441 */ 442 public Builder setDelimiter(final String delimiter) { 443 if (containsLineBreak(delimiter)) { 444 throw new IllegalArgumentException("The delimiter cannot be a line break"); 445 } 446 if (delimiter.isEmpty()) { 447 throw new IllegalArgumentException("The delimiter cannot be empty"); 448 } 449 this.delimiter = delimiter; 450 return this; 451 } 452 453 /** 454 * Sets the duplicate header names behavior. 455 * 456 * @param duplicateHeaderMode the duplicate header names behavior 457 * @return This instance. 458 * @since 1.10.0 459 */ 460 public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { 461 this.duplicateHeaderMode = Objects.requireNonNull(duplicateHeaderMode, "duplicateHeaderMode"); 462 return this; 463 } 464 465 /** 466 * Sets the escape character. 467 * 468 * @param escapeCharacter the escape character. 469 * @return This instance. 470 * @throws IllegalArgumentException thrown if the specified character is a line break 471 */ 472 public Builder setEscape(final char escapeCharacter) { 473 setEscape(Character.valueOf(escapeCharacter)); 474 return this; 475 } 476 477 /** 478 * Sets the escape character. 479 * 480 * @param escapeCharacter the escape character. 481 * @return This instance. 482 * @throws IllegalArgumentException thrown if the specified character is a line break 483 */ 484 public Builder setEscape(final Character escapeCharacter) { 485 if (isLineBreak(escapeCharacter)) { 486 throw new IllegalArgumentException("The escape character cannot be a line break"); 487 } 488 this.escapeCharacter = escapeCharacter; 489 return this; 490 } 491 492 /** 493 * Sets the header defined by the given {@link Enum} class. 494 * 495 * <p> 496 * Example: 497 * </p> 498 * 499 * <pre> 500 * public enum HeaderEnum { 501 * Name, Email, Phone 502 * } 503 * 504 * Builder builder = builder.setHeader(HeaderEnum.class); 505 * </pre> 506 * <p> 507 * The header is also used by the {@link CSVPrinter}. 508 * </p> 509 * 510 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 511 * @return This instance. 512 */ 513 public Builder setHeader(final Class<? extends Enum<?>> headerEnum) { 514 String[] header = null; 515 if (headerEnum != null) { 516 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 517 header = new String[enumValues.length]; 518 Arrays.setAll(header, i -> enumValues[i].name()); 519 } 520 return setHeader(header); 521 } 522 523 /** 524 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 525 * 526 * <pre> 527 * builder.setHeader(); 528 * </pre> 529 * 530 * or specified manually with: 531 * 532 * <pre> 533 * builder.setHeader(resultSet); 534 * </pre> 535 * <p> 536 * The header is also used by the {@link CSVPrinter}. 537 * </p> 538 * 539 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 540 * @return This instance. 541 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 542 */ 543 public Builder setHeader(final ResultSet resultSet) throws SQLException { 544 return setHeader(resultSet != null ? resultSet.getMetaData() : null); 545 } 546 547 /** 548 * Sets the header from the result set metadata. The header can be parsed automatically from the input file with: 549 * 550 * <pre> 551 * builder.setHeader(); 552 * </pre> 553 * 554 * or specified manually with: 555 * 556 * <pre> 557 * builder.setHeader(resultSetMetaData); 558 * </pre> 559 * <p> 560 * The header is also used by the {@link CSVPrinter}. 561 * </p> 562 * 563 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 564 * @return This instance. 565 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 566 */ 567 public Builder setHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 568 String[] labels = null; 569 if (resultSetMetaData != null) { 570 final int columnCount = resultSetMetaData.getColumnCount(); 571 labels = new String[columnCount]; 572 for (int i = 0; i < columnCount; i++) { 573 labels[i] = resultSetMetaData.getColumnLabel(i + 1); 574 } 575 } 576 return setHeader(labels); 577 } 578 579 /** 580 * Sets the header to the given values. The header can be parsed automatically from the input file with: 581 * 582 * <pre> 583 * builder.setHeader(); 584 * </pre> 585 * 586 * or specified manually with: 587 * 588 * <pre>{@code 589 * builder.setHeader("name", "email", "phone"); 590 * }</pre> 591 * <p> 592 * The header is also used by the {@link CSVPrinter}. 593 * </p> 594 * 595 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 596 * @return This instance. 597 */ 598 public Builder setHeader(final String... header) { 599 this.headers = CSVFormat.clone(header); 600 return this; 601 } 602 603 /** 604 * Sets the header comments to write before the CSV data. 605 * <p> 606 * This setting is ignored by the parser. 607 * </p> 608 * <p> 609 * Comments are printed first, before headers. 610 * </p> 611 * <p> 612 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 613 * </p> 614 * <p> 615 * If the comment marker is not set, then the header comments are ignored. 616 * </p> 617 * <p> 618 * For example: 619 * </p> 620 * 621 * <pre> 622 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 623 * </pre> 624 * <p> 625 * writes: 626 * </p> 627 * 628 * <pre> 629 * # Generated by Apache Commons CSV. 630 * # 1970-01-01T00:00:00Z 631 * </pre> 632 * 633 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 634 * @return This instance. 635 */ 636 public Builder setHeaderComments(final Object... headerComments) { 637 this.headerComments = CSVFormat.clone(toStringArray(headerComments)); 638 return this; 639 } 640 641 /** 642 * Sets the header comments to write before the CSV data. 643 * <p> 644 * This setting is ignored by the parser. 645 * </p> 646 * <p> 647 * Comments are printed first, before headers. 648 * </p> 649 * <p> 650 * Use {@link #setCommentMarker(char)} or {@link #setCommentMarker(Character)} to set the comment marker written at the start of each comment line. 651 * </p> 652 * <p> 653 * If the comment marker is not set, then the header comments are ignored. 654 * </p> 655 * <p> 656 * For example: 657 * </p> 658 * 659 * <pre> 660 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0).toString()); 661 * </pre> 662 * <p> 663 * writes: 664 * </p> 665 * 666 * <pre> 667 * # Generated by Apache Commons CSV. 668 * # 1970-01-01T00:00:00Z 669 * </pre> 670 * 671 * @param headerComments the headerComments which will be printed by the Printer before the CSV data. 672 * @return This instance. 673 */ 674 public Builder setHeaderComments(final String... headerComments) { 675 this.headerComments = CSVFormat.clone(headerComments); 676 return this; 677 } 678 679 /** 680 * Sets the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty lines to empty 681 * records. 682 * 683 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate 684 * empty lines to empty records. 685 * @return This instance. 686 */ 687 public Builder setIgnoreEmptyLines(final boolean ignoreEmptyLines) { 688 this.ignoreEmptyLines = ignoreEmptyLines; 689 return this; 690 } 691 692 /** 693 * Sets the parser case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 694 * 695 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 696 * @return This instance. 697 */ 698 public Builder setIgnoreHeaderCase(final boolean ignoreHeaderCase) { 699 this.ignoreHeaderCase = ignoreHeaderCase; 700 return this; 701 } 702 703 /** 704 * Sets the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 705 * 706 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 707 * @return This instance. 708 */ 709 public Builder setIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 710 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 711 return this; 712 } 713 714 /** 715 * Sets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 716 * 717 * @param lenientEof whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 718 * @return This instance. 719 * @since 1.11.0 720 */ 721 public Builder setLenientEof(final boolean lenientEof) { 722 this.lenientEof = lenientEof; 723 return this; 724 } 725 726 /** 727 * Sets the String to convert to and from {@code null}. No substitution occurs if {@code null}. 728 * 729 * <ul> 730 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 731 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 732 * </ul> 733 * 734 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null}. 735 * @return This instance. 736 */ 737 public Builder setNullString(final String nullString) { 738 this.nullString = nullString; 739 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 740 return this; 741 } 742 743 /** 744 * Sets the quote character. 745 * 746 * @param quoteCharacter the quote character. 747 * @return This instance. 748 */ 749 public Builder setQuote(final char quoteCharacter) { 750 setQuote(Character.valueOf(quoteCharacter)); 751 return this; 752 } 753 754 /** 755 * Sets the quote character, use {@code null} to disable. 756 * 757 * @param quoteCharacter the quote character, use {@code null} to disable. 758 * @return This instance. 759 */ 760 public Builder setQuote(final Character quoteCharacter) { 761 if (isLineBreak(quoteCharacter)) { 762 throw new IllegalArgumentException("The quoteChar cannot be a line break"); 763 } 764 this.quoteCharacter = quoteCharacter; 765 return this; 766 } 767 768 /** 769 * Sets the quote policy to use for output. 770 * 771 * @param quoteMode the quote policy to use for output. 772 * @return This instance. 773 */ 774 public Builder setQuoteMode(final QuoteMode quoteMode) { 775 this.quoteMode = quoteMode; 776 return this; 777 } 778 779 /** 780 * Sets the record separator to use for output. 781 * 782 * <p> 783 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 784 * and "\r\n" 785 * </p> 786 * 787 * @param recordSeparator the record separator to use for output. 788 * @return This instance. 789 */ 790 public Builder setRecordSeparator(final char recordSeparator) { 791 this.recordSeparator = String.valueOf(recordSeparator); 792 return this; 793 } 794 795 /** 796 * Sets the record separator to use for output. 797 * 798 * <p> 799 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' 800 * and "\r\n" 801 * </p> 802 * 803 * @param recordSeparator the record separator to use for output. 804 * @return This instance. 805 */ 806 public Builder setRecordSeparator(final String recordSeparator) { 807 this.recordSeparator = recordSeparator; 808 return this; 809 } 810 811 /** 812 * Sets whether to skip the header record. 813 * 814 * @param skipHeaderRecord whether to skip the header record. 815 * @return This instance. 816 */ 817 public Builder setSkipHeaderRecord(final boolean skipHeaderRecord) { 818 this.skipHeaderRecord = skipHeaderRecord; 819 return this; 820 } 821 822 /** 823 * Sets whether reading trailing data is allowed in records, helps Excel compatibility. 824 * 825 * @param trailingData whether reading trailing data is allowed in records, helps Excel compatibility. 826 * @return This instance. 827 * @since 1.11.0 828 */ 829 public Builder setTrailingData(final boolean trailingData) { 830 this.trailingData = trailingData; 831 return this; 832 } 833 834 /** 835 * Sets whether to add a trailing delimiter. 836 * 837 * @param trailingDelimiter whether to add a trailing delimiter. 838 * @return This instance. 839 */ 840 public Builder setTrailingDelimiter(final boolean trailingDelimiter) { 841 this.trailingDelimiter = trailingDelimiter; 842 return this; 843 } 844 845 /** 846 * Sets whether to trim leading and trailing blanks. 847 * 848 * @param trim whether to trim leading and trailing blanks. 849 * @return This instance. 850 */ 851 public Builder setTrim(final boolean trim) { 852 this.trim = trim; 853 return this; 854 } 855 } 856 857 /** 858 * Predefines formats. 859 * 860 * @since 1.2 861 */ 862 public enum Predefined { 863 864 /** 865 * The DEFAULT predefined format. 866 * 867 * @see CSVFormat#DEFAULT 868 */ 869 Default(DEFAULT), 870 871 /** 872 * The EXCEL predefined format. 873 * 874 * @see CSVFormat#EXCEL 875 */ 876 Excel(EXCEL), 877 878 /** 879 * The INFORMIX_UNLOAD predefined format. 880 * 881 * @see CSVFormat#INFORMIX_UNLOAD 882 * @since 1.3 883 */ 884 InformixUnload(INFORMIX_UNLOAD), 885 886 /** 887 * The INFORMIX_UNLOAD_CSV predefined format. 888 * 889 * @see CSVFormat#INFORMIX_UNLOAD_CSV 890 * @since 1.3 891 */ 892 InformixUnloadCsv(INFORMIX_UNLOAD_CSV), 893 894 /** 895 * The MONGODB_CSV predefined format. 896 * 897 * @see CSVFormat#MONGODB_CSV 898 * @since 1.7 899 */ 900 MongoDBCsv(MONGODB_CSV), 901 902 /** 903 * The MONGODB_TSV predefined format. 904 * 905 * @see CSVFormat#MONGODB_TSV 906 * @since 1.7 907 */ 908 MongoDBTsv(MONGODB_TSV), 909 910 /** 911 * The MYSQL predefined format. 912 * 913 * @see CSVFormat#MYSQL 914 */ 915 MySQL(MYSQL), 916 917 /** 918 * The ORACLE predefined format. 919 * 920 * @see CSVFormat#ORACLE 921 */ 922 Oracle(ORACLE), 923 924 /** 925 * The POSTGRESQL_CSV predefined format. 926 * 927 * @see CSVFormat#POSTGRESQL_CSV 928 * @since 1.5 929 */ 930 PostgreSQLCsv(POSTGRESQL_CSV), 931 932 /** 933 * The POSTGRESQL_TEXT predefined format. 934 * 935 * @see CSVFormat#POSTGRESQL_TEXT 936 */ 937 PostgreSQLText(POSTGRESQL_TEXT), 938 939 /** 940 * The RFC4180 predefined format. 941 * 942 * @see CSVFormat#RFC4180 943 */ 944 RFC4180(CSVFormat.RFC4180), 945 946 /** 947 * The TDF predefined format. 948 * 949 * @see CSVFormat#TDF 950 */ 951 TDF(CSVFormat.TDF); 952 953 private final CSVFormat format; 954 955 Predefined(final CSVFormat format) { 956 this.format = format; 957 } 958 959 /** 960 * Gets the format. 961 * 962 * @return the format. 963 */ 964 public CSVFormat getFormat() { 965 return format; 966 } 967 } 968 969 /** 970 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. 971 * 972 * <p> 973 * The {@link Builder} settings are: 974 * </p> 975 * <ul> 976 * <li>{@code setDelimiter(',')}</li> 977 * <li>{@code setQuote('"')}</li> 978 * <li>{@code setRecordSeparator("\r\n")}</li> 979 * <li>{@code setIgnoreEmptyLines(true)}</li> 980 * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li> 981 * </ul> 982 * 983 * @see Predefined#Default 984 */ 985 public static final CSVFormat DEFAULT = new CSVFormat(Builder.create()); 986 987 /** 988 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary 989 * to customize this format to accommodate your regional settings. 990 * 991 * <p> 992 * For example for parsing or generating a CSV file on a French system the following format will be used: 993 * </p> 994 * 995 * <pre> 996 * CSVFormat fmt = CSVFormat.EXCEL.builder().setDelimiter(';').get(); 997 * </pre> 998 * 999 * <p> 1000 * The {@link Builder} settings are: 1001 * </p> 1002 * <ul> 1003 * <li>{@code setDelimiter(',')}</li> 1004 * <li>{@code setQuote('"')}</li> 1005 * <li>{@code setRecordSeparator("\r\n")}</li> 1006 * <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li> 1007 * <li>{@code setIgnoreEmptyLines(false)}</li> 1008 * <li>{@code setAllowMissingColumnNames(true)}</li> 1009 * <li>{@code setTrailingData(true)}</li> 1010 * <li>{@code setLenientEof(true)}</li> 1011 * </ul> 1012 * <p> 1013 * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and 1014 * {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(false)}. 1015 * </p> 1016 * 1017 * @see Predefined#Excel 1018 */ 1019 // @formatter:off 1020 public static final CSVFormat EXCEL = DEFAULT.builder() 1021 .setIgnoreEmptyLines(false) 1022 .setAllowMissingColumnNames(true) 1023 .setTrailingData(true) 1024 .setLenientEof(true) 1025 .get(); 1026 // @formatter:on 1027 1028 /** 1029 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 1030 * 1031 * <p> 1032 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1033 * The default NULL string is {@code "\\N"}. 1034 * </p> 1035 * 1036 * <p> 1037 * The {@link Builder} settings are: 1038 * </p> 1039 * <ul> 1040 * <li>{@code setDelimiter(',')}</li> 1041 * <li>{@code setEscape('\\')}</li> 1042 * <li>{@code setQuote("\"")}</li> 1043 * <li>{@code setRecordSeparator('\n')}</li> 1044 * </ul> 1045 * 1046 * @see Predefined#MySQL 1047 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1048 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1049 * @since 1.3 1050 */ 1051 // @formatter:off 1052 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT.builder() 1053 .setDelimiter(Constants.PIPE) 1054 .setEscape(Constants.BACKSLASH) 1055 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1056 .setRecordSeparator(Constants.LF) 1057 .get(); 1058 // @formatter:on 1059 1060 /** 1061 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 1062 * 1063 * <p> 1064 * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. 1065 * The default NULL string is {@code "\\N"}. 1066 * </p> 1067 * 1068 * <p> 1069 * The {@link Builder} settings are: 1070 * </p> 1071 * <ul> 1072 * <li>{@code setDelimiter(',')}</li> 1073 * <li>{@code setQuote("\"")}</li> 1074 * <li>{@code setRecordSeparator('\n')}</li> 1075 * </ul> 1076 * 1077 * @see Predefined#MySQL 1078 * @see <a href= "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 1079 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 1080 * @since 1.3 1081 */ 1082 // @formatter:off 1083 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT.builder() 1084 .setDelimiter(Constants.COMMA) 1085 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1086 .setRecordSeparator(Constants.LF) 1087 .get(); 1088 // @formatter:on 1089 1090 /** 1091 * Default MongoDB CSV format used by the {@code mongoexport} operation. 1092 * <p> 1093 * <strong>Parsing is not supported yet.</strong> 1094 * </p> 1095 * 1096 * <p> 1097 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with {@code '"'}. A header line with field 1098 * names is expected. 1099 * </p> 1100 * <p> 1101 * As of 2024-04-05, the MongoDB documentation for {@code mongoimport} states: 1102 * </p> 1103 * <blockquote>The csv parser accepts that data that complies with RFC <a href="https://tools.ietf.org/html/4180">RFC-4180</a>. As a result, backslashes are 1104 * not a valid escape character. If you use double-quotes to enclose fields in the CSV data, you must escape internal double-quote marks by prepending 1105 * another double-quote. </blockquote> 1106 * <p> 1107 * The {@link Builder} settings are: 1108 * </p> 1109 * <ul> 1110 * <li>{@code setDelimiter(',')}</li> 1111 * <li>{@code setEscape('"')}</li> 1112 * <li>{@code setQuote('"')}</li> 1113 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1114 * <li>{@code setSkipHeaderRecord(false)}</li> 1115 * </ul> 1116 * 1117 * @see Predefined#MongoDBCsv 1118 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command documentation</a> 1119 * @since 1.7 1120 */ 1121 // @formatter:off 1122 public static final CSVFormat MONGODB_CSV = DEFAULT.builder() 1123 .setDelimiter(Constants.COMMA) 1124 .setEscape(Constants.DOUBLE_QUOTE_CHAR) 1125 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1126 .setQuoteMode(QuoteMode.MINIMAL) 1127 .setSkipHeaderRecord(false) 1128 .get(); 1129 // @formatter:off 1130 1131 /** 1132 * Default MongoDB TSV format used by the {@code mongoexport} operation. 1133 * <p> 1134 * <strong>Parsing is not supported yet.</strong> 1135 * </p> 1136 * 1137 * <p> 1138 * This is a tab-delimited format. Values are double quoted only if needed and special 1139 * characters are escaped with {@code '"'}. A header line with field names is expected. 1140 * </p> 1141 * 1142 * <p> 1143 * The {@link Builder} settings are: 1144 * </p> 1145 * <ul> 1146 * <li>{@code setDelimiter('\t')}</li> 1147 * <li>{@code setEscape('"')}</li> 1148 * <li>{@code setQuote('"')}</li> 1149 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1150 * <li>{@code setSkipHeaderRecord(false)}</li> 1151 * </ul> 1152 * 1153 * @see Predefined#MongoDBCsv 1154 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 1155 * documentation</a> 1156 * @since 1.7 1157 */ 1158 // @formatter:off 1159 public static final CSVFormat MONGODB_TSV = DEFAULT.builder() 1160 .setDelimiter(Constants.TAB) 1161 .setEscape(Constants.DOUBLE_QUOTE_CHAR) 1162 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1163 .setQuoteMode(QuoteMode.MINIMAL) 1164 .setSkipHeaderRecord(false) 1165 .get(); 1166 // @formatter:off 1167 1168 /** 1169 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 1170 * 1171 * <p> 1172 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1173 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 1174 * </p> 1175 * 1176 * <p> 1177 * The {@link Builder} settings are: 1178 * </p> 1179 * <ul> 1180 * <li>{@code setDelimiter('\t')}</li> 1181 * <li>{@code setEscape('\\')}</li> 1182 * <li>{@code setIgnoreEmptyLines(false)}</li> 1183 * <li>{@code setQuote(null)}</li> 1184 * <li>{@code setRecordSeparator('\n')}</li> 1185 * <li>{@code setNullString("\\N")}</li> 1186 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1187 * </ul> 1188 * 1189 * @see Predefined#MySQL 1190 * @see <a href="https://dev.mysql.com/doc/refman/5.1/en/load-data.html"> https://dev.mysql.com/doc/refman/5.1/en/load 1191 * -data.html</a> 1192 */ 1193 // @formatter:off 1194 public static final CSVFormat MYSQL = DEFAULT.builder() 1195 .setDelimiter(Constants.TAB) 1196 .setEscape(Constants.BACKSLASH) 1197 .setIgnoreEmptyLines(false) 1198 .setQuote(null) 1199 .setRecordSeparator(Constants.LF) 1200 .setNullString(Constants.SQL_NULL_STRING) 1201 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1202 .get(); 1203 // @formatter:off 1204 1205 /** 1206 * Default Oracle format used by the SQL*Loader utility. 1207 * 1208 * <p> 1209 * This is a comma-delimited format with the system line separator character as the record separator. Values are 1210 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is 1211 * {@code ""}. Values are trimmed. 1212 * </p> 1213 * 1214 * <p> 1215 * The {@link Builder} settings are: 1216 * </p> 1217 * <ul> 1218 * <li>{@code setDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}</li> 1219 * <li>{@code setEscape('\\')}</li> 1220 * <li>{@code setIgnoreEmptyLines(false)}</li> 1221 * <li>{@code setQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li> 1222 * <li>{@code setNullString("\\N")}</li> 1223 * <li>{@code setTrim()}</li> 1224 * <li>{@code setSystemRecordSeparator()}</li> 1225 * <li>{@code setQuoteMode(QuoteMode.MINIMAL)}</li> 1226 * </ul> 1227 * 1228 * @see Predefined#Oracle 1229 * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a> 1230 * @since 1.6 1231 */ 1232 // @formatter:off 1233 public static final CSVFormat ORACLE = DEFAULT.builder() 1234 .setDelimiter(Constants.COMMA) 1235 .setEscape(Constants.BACKSLASH) 1236 .setIgnoreEmptyLines(false) 1237 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1238 .setNullString(Constants.SQL_NULL_STRING) 1239 .setTrim(true) 1240 .setRecordSeparator(System.lineSeparator()) 1241 .setQuoteMode(QuoteMode.MINIMAL) 1242 .get(); 1243 // @formatter:off 1244 1245 /** 1246 * Default PostgreSQL CSV format used by the {@code COPY} operation. 1247 * 1248 * <p> 1249 * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special 1250 * characters are not escaped. The default NULL string is {@code ""}. 1251 * </p> 1252 * 1253 * <p> 1254 * The {@link Builder} settings are: 1255 * </p> 1256 * <ul> 1257 * <li>{@code setDelimiter(',')}</li> 1258 * <li>{@code setEscape(null)}</li> 1259 * <li>{@code setIgnoreEmptyLines(false)}</li> 1260 * <li>{@code setQuote('"')}</li> 1261 * <li>{@code setRecordSeparator('\n')}</li> 1262 * <li>{@code setNullString("")}</li> 1263 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1264 * </ul> 1265 * 1266 * @see Predefined#MySQL 1267 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1268 * documentation</a> 1269 * @since 1.5 1270 */ 1271 // @formatter:off 1272 public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder() 1273 .setDelimiter(Constants.COMMA) 1274 .setEscape(null) 1275 .setIgnoreEmptyLines(false) 1276 .setQuote(Constants.DOUBLE_QUOTE_CHAR) 1277 .setRecordSeparator(Constants.LF) 1278 .setNullString(Constants.EMPTY) 1279 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1280 .get(); 1281 // @formatter:off 1282 1283 /** 1284 * Default PostgreSQL text format used by the {@code COPY} operation. 1285 * 1286 * <p> 1287 * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special 1288 * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. 1289 * </p> 1290 * 1291 * <p> 1292 * The {@link Builder} settings are: 1293 * </p> 1294 * <ul> 1295 * <li>{@code setDelimiter('\t')}</li> 1296 * <li>{@code setEscape('\\')}</li> 1297 * <li>{@code setIgnoreEmptyLines(false)}</li> 1298 * <li>{@code setQuote(null)}</li> 1299 * <li>{@code setRecordSeparator('\n')}</li> 1300 * <li>{@code setNullString("\\N")}</li> 1301 * <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 1302 * </ul> 1303 * 1304 * @see Predefined#MySQL 1305 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 1306 * documentation</a> 1307 * @since 1.5 1308 */ 1309 // @formatter:off 1310 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT.builder() 1311 .setDelimiter(Constants.TAB) 1312 .setEscape(Constants.BACKSLASH) 1313 .setIgnoreEmptyLines(false) 1314 .setQuote(null) 1315 .setRecordSeparator(Constants.LF) 1316 .setNullString(Constants.SQL_NULL_STRING) 1317 .setQuoteMode(QuoteMode.ALL_NON_NULL) 1318 .get(); 1319 // @formatter:off 1320 1321 /** 1322 * Comma separated format as defined by <a href="https://tools.ietf.org/html/rfc4180">RFC 4180</a>. 1323 * 1324 * <p> 1325 * The {@link Builder} settings are: 1326 * </p> 1327 * <ul> 1328 * <li>{@code setDelimiter(',')}</li> 1329 * <li>{@code setQuote('"')}</li> 1330 * <li>{@code setRecordSeparator("\r\n")}</li> 1331 * <li>{@code setIgnoreEmptyLines(false)}</li> 1332 * </ul> 1333 * 1334 * @see Predefined#RFC4180 1335 */ 1336 public static final CSVFormat RFC4180 = DEFAULT.builder().setIgnoreEmptyLines(false).get(); 1337 1338 private static final long serialVersionUID = 2L; 1339 1340 /** 1341 * Tab-delimited format. 1342 * 1343 * <p> 1344 * The {@link Builder} settings are: 1345 * </p> 1346 * <ul> 1347 * <li>{@code setDelimiter('\t')}</li> 1348 * <li>{@code setQuote('"')}</li> 1349 * <li>{@code setRecordSeparator("\r\n")}</li> 1350 * <li>{@code setIgnoreSurroundingSpaces(true)}</li> 1351 * </ul> 1352 * 1353 * @see Predefined#TDF 1354 */ 1355 // @formatter:off 1356 public static final CSVFormat TDF = DEFAULT.builder() 1357 .setDelimiter(Constants.TAB) 1358 .setIgnoreSurroundingSpaces(true) 1359 .get(); 1360 // @formatter:on 1361 1362 /** 1363 * Null-safe clone of an array. 1364 * 1365 * @param <T> The array element type. 1366 * @param values the source array 1367 * @return the cloned array. 1368 */ 1369 @SafeVarargs 1370 static <T> T[] clone(final T... values) { 1371 return values == null ? null : values.clone(); 1372 } 1373 1374 /** 1375 * Returns true if the given string contains the search char. 1376 * 1377 * @param source the string to check. 1378 * @param searchCh the character to search. 1379 * @return true if {@code c} contains a line break character 1380 */ 1381 private static boolean contains(final String source, final char searchCh) { 1382 return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0; 1383 } 1384 1385 /** 1386 * Returns true if the given string contains a line break character. 1387 * 1388 * @param source the string to check. 1389 * @return true if {@code c} contains a line break character. 1390 */ 1391 private static boolean containsLineBreak(final String source) { 1392 return contains(source, Constants.CR) || contains(source, Constants.LF); 1393 } 1394 1395 /** 1396 * Creates a null-safe copy of the given instance. 1397 * 1398 * @return a copy of the given instance or null if the input is null. 1399 */ 1400 static CSVFormat copy(final CSVFormat format) { 1401 return format != null ? format.copy() : null; 1402 } 1403 1404 static boolean isBlank(final String value) { 1405 return value == null || value.trim().isEmpty(); 1406 } 1407 1408 /** 1409 * Returns true if the given character is a line break character. 1410 * 1411 * @param c the character to check. 1412 * @return true if {@code c} is a line break character. 1413 */ 1414 private static boolean isLineBreak(final char c) { 1415 return c == Constants.LF || c == Constants.CR; 1416 } 1417 1418 /** 1419 * Returns true if the given character is a line break character. 1420 * 1421 * @param c the character to check, may be null. 1422 * @return true if {@code c} is a line break character (and not null). 1423 */ 1424 private static boolean isLineBreak(final Character c) { 1425 return c != null && isLineBreak(c.charValue()); // N.B. Explicit (un)boxing is intentional 1426 } 1427 1428 /** Same test as in as {@link String#trim()}. */ 1429 private static boolean isTrimChar(final char ch) { 1430 return ch <= Constants.SP; 1431 } 1432 1433 /** Same test as in as {@link String#trim()}. */ 1434 private static boolean isTrimChar(final CharSequence charSequence, final int pos) { 1435 return isTrimChar(charSequence.charAt(pos)); 1436 } 1437 1438 /** 1439 * Creates a new CSV format with the specified delimiter. 1440 * 1441 * <p> 1442 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized with null/false. 1443 * </p> 1444 * 1445 * @param delimiter the char used for value separation, must not be a line break character 1446 * @return a new CSV format. 1447 * @throws IllegalArgumentException if the delimiter is a line break character 1448 * @see #DEFAULT 1449 * @see #RFC4180 1450 * @see #MYSQL 1451 * @see #EXCEL 1452 * @see #TDF 1453 */ 1454 public static CSVFormat newFormat(final char delimiter) { 1455 return new CSVFormat(new Builder().setDelimiter(delimiter)); 1456 } 1457 1458 static String[] toStringArray(final Object[] values) { 1459 if (values == null) { 1460 return null; 1461 } 1462 final String[] strings = new String[values.length]; 1463 Arrays.setAll(strings, i -> Objects.toString(values[i], null)); 1464 return strings; 1465 } 1466 1467 static CharSequence trim(final CharSequence charSequence) { 1468 if (charSequence instanceof String) { 1469 return ((String) charSequence).trim(); 1470 } 1471 final int count = charSequence.length(); 1472 int len = count; 1473 int pos = 0; 1474 1475 while (pos < len && isTrimChar(charSequence, pos)) { 1476 pos++; 1477 } 1478 while (pos < len && isTrimChar(charSequence, len - 1)) { 1479 len--; 1480 } 1481 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1482 } 1483 1484 /** 1485 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 1486 * 1487 * @param format name 1488 * @return one of the predefined formats 1489 * @since 1.2 1490 */ 1491 public static CSVFormat valueOf(final String format) { 1492 return CSVFormat.Predefined.valueOf(format).getFormat(); 1493 } 1494 1495 /** How duplicate headers are handled. */ 1496 private final DuplicateHeaderMode duplicateHeaderMode; 1497 1498 /** Whether missing column names are allowed when parsing the header line. */ 1499 private final boolean allowMissingColumnNames; 1500 1501 /** Whether to flush on close. */ 1502 private final boolean autoFlush; 1503 1504 /** Set to null if commenting is disabled. */ 1505 private final Character commentMarker; 1506 1507 /** The character delimiting the values (typically ";", "," or "\t"). */ 1508 private final String delimiter; 1509 1510 /** Set to null if escaping is disabled. */ 1511 private final Character escapeCharacter; 1512 1513 /** Array of header column names. */ 1514 private final String[] headers; 1515 1516 /** Array of header comment lines. */ 1517 private final String[] headerComments; 1518 1519 /** Whether empty lines between records are ignored when parsing input. */ 1520 private final boolean ignoreEmptyLines; 1521 1522 /** Should ignore header names case. */ 1523 private final boolean ignoreHeaderCase; 1524 1525 /** Should leading/trailing spaces be ignored around values?. */ 1526 private final boolean ignoreSurroundingSpaces; 1527 1528 /** The string to be used for null values. */ 1529 private final String nullString; 1530 1531 /** Set to null if quoting is disabled. */ 1532 private final Character quoteCharacter; 1533 1534 /** Set to {@code quoteCharacter + nullString + quoteCharacter} */ 1535 private final String quotedNullString; 1536 1537 /** The quote policy output fields. */ 1538 private final QuoteMode quoteMode; 1539 1540 /** For output. */ 1541 private final String recordSeparator; 1542 1543 /** Whether to skip the header record. */ 1544 private final boolean skipHeaderRecord; 1545 1546 /** Whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. */ 1547 private final boolean lenientEof; 1548 1549 /** Whether reading trailing data is allowed in records, helps Excel compatibility. */ 1550 private final boolean trailingData; 1551 1552 /** Whether to add a trailing delimiter. */ 1553 private final boolean trailingDelimiter; 1554 1555 /** Whether to trim leading and trailing blanks. */ 1556 private final boolean trim; 1557 1558 private CSVFormat(final Builder builder) { 1559 this.delimiter = builder.delimiter; 1560 this.quoteCharacter = builder.quoteCharacter; 1561 this.quoteMode = builder.quoteMode; 1562 this.commentMarker = builder.commentMarker; 1563 this.escapeCharacter = builder.escapeCharacter; 1564 this.ignoreSurroundingSpaces = builder.ignoreSurroundingSpaces; 1565 this.allowMissingColumnNames = builder.allowMissingColumnNames; 1566 this.ignoreEmptyLines = builder.ignoreEmptyLines; 1567 this.recordSeparator = builder.recordSeparator; 1568 this.nullString = builder.nullString; 1569 this.headerComments = builder.headerComments; 1570 this.headers = builder.headers; 1571 this.skipHeaderRecord = builder.skipHeaderRecord; 1572 this.ignoreHeaderCase = builder.ignoreHeaderCase; 1573 this.lenientEof = builder.lenientEof; 1574 this.trailingData = builder.trailingData; 1575 this.trailingDelimiter = builder.trailingDelimiter; 1576 this.trim = builder.trim; 1577 this.autoFlush = builder.autoFlush; 1578 this.quotedNullString = builder.quotedNullString; 1579 this.duplicateHeaderMode = builder.duplicateHeaderMode; 1580 validate(); 1581 } 1582 1583 private void append(final char c, final Appendable appendable) throws IOException { 1584 // try { 1585 appendable.append(c); 1586 // } catch (final IOException e) { 1587 // throw new UncheckedIOException(e); 1588 // } 1589 } 1590 1591 private void append(final CharSequence csq, final Appendable appendable) throws IOException { 1592 // try { 1593 appendable.append(csq); 1594 // } catch (final IOException e) { 1595 // throw new UncheckedIOException(e); 1596 // } 1597 } 1598 1599 /** 1600 * Creates a new Builder for this instance. 1601 * 1602 * @return a new Builder. 1603 */ 1604 public Builder builder() { 1605 return Builder.create(this); 1606 } 1607 1608 /** 1609 * Creates a copy of this instance. 1610 * 1611 * @return a copy of this instance. 1612 */ 1613 CSVFormat copy() { 1614 return builder().get(); 1615 } 1616 1617 @Override 1618 public boolean equals(final Object obj) { 1619 if (this == obj) { 1620 return true; 1621 } 1622 if (obj == null) { 1623 return false; 1624 } 1625 if (getClass() != obj.getClass()) { 1626 return false; 1627 } 1628 final CSVFormat other = (CSVFormat) obj; 1629 return allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && 1630 Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && 1631 duplicateHeaderMode == other.duplicateHeaderMode && Objects.equals(escapeCharacter, other.escapeCharacter) && 1632 Arrays.equals(headerComments, other.headerComments) && Arrays.equals(headers, other.headers) && 1633 ignoreEmptyLines == other.ignoreEmptyLines && ignoreHeaderCase == other.ignoreHeaderCase && 1634 ignoreSurroundingSpaces == other.ignoreSurroundingSpaces && lenientEof == other.lenientEof && 1635 Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && 1636 quoteMode == other.quoteMode && Objects.equals(quotedNullString, other.quotedNullString) && 1637 Objects.equals(recordSeparator, other.recordSeparator) && skipHeaderRecord == other.skipHeaderRecord && 1638 trailingData == other.trailingData && trailingDelimiter == other.trailingDelimiter && trim == other.trim; 1639 } 1640 1641 private void escape(final char c, final Appendable appendable) throws IOException { 1642 append(escapeCharacter.charValue(), appendable); // N.B. Explicit (un)boxing is intentional 1643 append(c, appendable); 1644 } 1645 1646 /** 1647 * Formats the specified values. 1648 * 1649 * @param values the values to format 1650 * @return the formatted values 1651 */ 1652 public String format(final Object... values) { 1653 return Uncheck.get(() -> format_(values)); 1654 } 1655 1656 private String format_(final Object... values) throws IOException { 1657 final StringWriter out = new StringWriter(); 1658 try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 1659 csvPrinter.printRecord(values); 1660 final String res = out.toString(); 1661 final int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); 1662 return res.substring(0, len); 1663 } 1664 } 1665 1666 /** 1667 * Gets whether duplicate names are allowed in the headers. 1668 * 1669 * @return whether duplicate header names are allowed 1670 * @since 1.7 1671 * @deprecated Use {@link #getDuplicateHeaderMode()}. 1672 */ 1673 @Deprecated 1674 public boolean getAllowDuplicateHeaderNames() { 1675 return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; 1676 } 1677 1678 /** 1679 * Gets whether missing column names are allowed when parsing the header line. 1680 * 1681 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an {@link IllegalArgumentException}. 1682 */ 1683 public boolean getAllowMissingColumnNames() { 1684 return allowMissingColumnNames; 1685 } 1686 1687 /** 1688 * Gets whether to flush on close. 1689 * 1690 * @return whether to flush on close. 1691 * @since 1.6 1692 */ 1693 public boolean getAutoFlush() { 1694 return autoFlush; 1695 } 1696 1697 /** 1698 * Gets the comment marker character, {@code null} disables comments. 1699 * <p> 1700 * The comment start character is only recognized at the start of a line. 1701 * </p> 1702 * <p> 1703 * Comments are printed first, before headers. 1704 * </p> 1705 * <p> 1706 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment 1707 * line. 1708 * </p> 1709 * <p> 1710 * If the comment marker is not set, then the header comments are ignored. 1711 * </p> 1712 * <p> 1713 * For example: 1714 * </p> 1715 * 1716 * <pre> 1717 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1718 * </pre> 1719 * <p> 1720 * writes: 1721 * </p> 1722 * 1723 * <pre> 1724 * # Generated by Apache Commons CSV. 1725 * # 1970-01-01T00:00:00Z 1726 * </pre> 1727 * 1728 * @return the comment start marker, may be {@code null} 1729 */ 1730 public Character getCommentMarker() { 1731 return commentMarker; 1732 } 1733 1734 /** 1735 * Gets the first character delimiting the values (typically ';', ',' or '\t'). 1736 * 1737 * @return the first delimiter character. 1738 * @deprecated Use {@link #getDelimiterString()}. 1739 */ 1740 @Deprecated 1741 public char getDelimiter() { 1742 return delimiter.charAt(0); 1743 } 1744 1745 /** 1746 * Gets the character delimiting the values (typically ";", "," or "\t"). 1747 * 1748 * @return the delimiter. 1749 */ 1750 char[] getDelimiterCharArray() { 1751 return delimiter.toCharArray(); 1752 } 1753 1754 /** 1755 * Gets the character delimiting the values (typically ";", "," or "\t"). 1756 * 1757 * @return the delimiter. 1758 * @since 1.9.0 1759 */ 1760 public String getDelimiterString() { 1761 return delimiter; 1762 } 1763 1764 /** 1765 * Gets how duplicate headers are handled. 1766 * 1767 * @return if duplicate header values are allowed, allowed conditionally, or disallowed. 1768 * @since 1.10.0 1769 */ 1770 public DuplicateHeaderMode getDuplicateHeaderMode() { 1771 return duplicateHeaderMode; 1772 } 1773 1774 /** 1775 * Gets the escape character. 1776 * 1777 * @return the escape character, may be {@code 0} 1778 */ 1779 char getEscapeChar() { 1780 return escapeCharacter != null ? escapeCharacter.charValue() : 0; // N.B. Explicit (un)boxing is intentional 1781 } 1782 1783 /** 1784 * Gets the escape character. 1785 * 1786 * @return the escape character, may be {@code null} 1787 */ 1788 public Character getEscapeCharacter() { 1789 return escapeCharacter; 1790 } 1791 1792 /** 1793 * Gets a copy of the header array. 1794 * 1795 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 1796 */ 1797 public String[] getHeader() { 1798 return headers != null ? headers.clone() : null; 1799 } 1800 1801 /** 1802 * Gets a copy of the header comment array to write before the CSV data. 1803 * <p> 1804 * This setting is ignored by the parser. 1805 * </p> 1806 * <p> 1807 * Comments are printed first, before headers. 1808 * </p> 1809 * <p> 1810 * Use {@link Builder#setCommentMarker(char)} or {@link Builder#setCommentMarker(Character)} to set the comment marker written at the start of each comment 1811 * line. 1812 * </p> 1813 * <p> 1814 * If the comment marker is not set, then the header comments are ignored. 1815 * </p> 1816 * <p> 1817 * For example: 1818 * </p> 1819 * 1820 * <pre> 1821 * builder.setCommentMarker('#').setHeaderComments("Generated by Apache Commons CSV", Instant.ofEpochMilli(0)); 1822 * </pre> 1823 * <p> 1824 * writes: 1825 * </p> 1826 * 1827 * <pre> 1828 * # Generated by Apache Commons CSV. 1829 * # 1970-01-01T00:00:00Z 1830 * </pre> 1831 * 1832 * @return a copy of the header comment array; {@code null} if disabled. 1833 */ 1834 public String[] getHeaderComments() { 1835 return headerComments != null ? headerComments.clone() : null; 1836 } 1837 1838 /** 1839 * Gets whether empty lines between records are ignored when parsing input. 1840 * 1841 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty records. 1842 */ 1843 public boolean getIgnoreEmptyLines() { 1844 return ignoreEmptyLines; 1845 } 1846 1847 /** 1848 * Gets whether header names will be accessed ignoring case when parsing input. 1849 * 1850 * @return {@code true} if header names cases are ignored, {@code false} if they are case-sensitive. 1851 * @since 1.3 1852 */ 1853 public boolean getIgnoreHeaderCase() { 1854 return ignoreHeaderCase; 1855 } 1856 1857 /** 1858 * Gets whether spaces around values are ignored when parsing input. 1859 * 1860 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 1861 */ 1862 public boolean getIgnoreSurroundingSpaces() { 1863 return ignoreSurroundingSpaces; 1864 } 1865 1866 /** 1867 * Gets whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1868 * 1869 * @return whether reading end-of-file is allowed even when input is malformed, helps Excel compatibility. 1870 * @since 1.11.0 1871 */ 1872 public boolean getLenientEof() { 1873 return lenientEof; 1874 } 1875 1876 /** 1877 * Gets the String to convert to and from {@code null}. 1878 * <ul> 1879 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 1880 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 1881 * </ul> 1882 * 1883 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 1884 */ 1885 public String getNullString() { 1886 return nullString; 1887 } 1888 1889 /** 1890 * Gets the character used to encapsulate values containing special characters. 1891 * 1892 * @return the quoteChar character, may be {@code null} 1893 */ 1894 public Character getQuoteCharacter() { 1895 return quoteCharacter; 1896 } 1897 1898 /** 1899 * Gets the quote policy output fields. 1900 * 1901 * @return the quote policy 1902 */ 1903 public QuoteMode getQuoteMode() { 1904 return quoteMode; 1905 } 1906 1907 /** 1908 * Gets the record separator delimiting output records. 1909 * 1910 * @return the record separator 1911 */ 1912 public String getRecordSeparator() { 1913 return recordSeparator; 1914 } 1915 1916 /** 1917 * Gets whether to skip the header record. 1918 * 1919 * @return whether to skip the header record. 1920 */ 1921 public boolean getSkipHeaderRecord() { 1922 return skipHeaderRecord; 1923 } 1924 1925 /** 1926 * Gets whether reading trailing data is allowed in records, helps Excel compatibility. 1927 * 1928 * @return whether reading trailing data is allowed in records, helps Excel compatibility. 1929 * @since 1.11.0 1930 */ 1931 public boolean getTrailingData() { 1932 return trailingData; 1933 } 1934 1935 /** 1936 * Gets whether to add a trailing delimiter. 1937 * 1938 * @return whether to add a trailing delimiter. 1939 * @since 1.3 1940 */ 1941 public boolean getTrailingDelimiter() { 1942 return trailingDelimiter; 1943 } 1944 1945 /** 1946 * Gets whether to trim leading and trailing blanks. This is used by {@link #print(Object, Appendable, boolean)} Also by {CSVParser#addRecordValue(boolean)} 1947 * 1948 * @return whether to trim leading and trailing blanks. 1949 */ 1950 public boolean getTrim() { 1951 return trim; 1952 } 1953 1954 @Override 1955 public int hashCode() { 1956 final int prime = 31; 1957 int result = 1; 1958 result = prime * result + Arrays.hashCode(headerComments); 1959 result = prime * result + Arrays.hashCode(headers); 1960 result = prime * result + Objects.hash(allowMissingColumnNames, autoFlush, commentMarker, delimiter, duplicateHeaderMode, escapeCharacter, 1961 ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, lenientEof, nullString, quoteCharacter, quoteMode, quotedNullString, 1962 recordSeparator, skipHeaderRecord, trailingData, trailingDelimiter, trim); 1963 return result; 1964 } 1965 1966 /** 1967 * Tests whether comments are supported by this format. 1968 * 1969 * Note that the comment introducer character is only recognized at the start of a line. 1970 * 1971 * @return {@code true} is comments are supported, {@code false} otherwise 1972 */ 1973 public boolean isCommentMarkerSet() { 1974 return commentMarker != null; 1975 } 1976 1977 /** 1978 * Tests whether the next characters constitute a delimiter 1979 * 1980 * @param ch0 the first char (index 0). 1981 * @param charSeq the match char sequence 1982 * @param startIndex where start to match 1983 * @param delimiter the delimiter 1984 * @param delimiterLength the delimiter length 1985 * @return true if the match is successful 1986 */ 1987 private boolean isDelimiter(final char ch0, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) { 1988 if (ch0 != delimiter[0]) { 1989 return false; 1990 } 1991 final int len = charSeq.length(); 1992 if (startIndex + delimiterLength > len) { 1993 return false; 1994 } 1995 for (int i = 1; i < delimiterLength; i++) { 1996 if (charSeq.charAt(startIndex + i) != delimiter[i]) { 1997 return false; 1998 } 1999 } 2000 return true; 2001 } 2002 2003 /** 2004 * Tests whether escapes are being processed. 2005 * 2006 * @return {@code true} if escapes are processed 2007 */ 2008 public boolean isEscapeCharacterSet() { 2009 return escapeCharacter != null; 2010 } 2011 2012 /** 2013 * Tests whether a null string has been defined. 2014 * 2015 * @return {@code true} if a nullString is defined 2016 */ 2017 public boolean isNullStringSet() { 2018 return nullString != null; 2019 } 2020 2021 /** 2022 * Tests whether a quoteChar has been defined. 2023 * 2024 * @return {@code true} if a quoteChar is defined 2025 */ 2026 public boolean isQuoteCharacterSet() { 2027 return quoteCharacter != null; 2028 } 2029 2030 /** 2031 * Parses the specified content. 2032 * 2033 * <p> 2034 * See also the various static parse methods on {@link CSVParser}. 2035 * </p> 2036 * 2037 * @param reader the input stream 2038 * @return a parser over a stream of {@link CSVRecord}s. 2039 * @throws IOException If an I/O error occurs 2040 * @throws CSVException Thrown on invalid input. 2041 */ 2042 public CSVParser parse(final Reader reader) throws IOException { 2043 return CSVParser.builder().setReader(reader).setFormat(this).get(); 2044 } 2045 2046 /** 2047 * Prints to the specified output. 2048 * 2049 * <p> 2050 * See also {@link CSVPrinter}. 2051 * </p> 2052 * 2053 * @param out the output. 2054 * @return a printer to an output. 2055 * @throws IOException thrown if the optional header cannot be printed. 2056 */ 2057 public CSVPrinter print(final Appendable out) throws IOException { 2058 return new CSVPrinter(out, this); 2059 } 2060 2061 /** 2062 * Prints to the specified {@code File} with given {@code Charset}. 2063 * 2064 * <p> 2065 * See also {@link CSVPrinter}. 2066 * </p> 2067 * 2068 * @param out the output. 2069 * @param charset A charset. 2070 * @return a printer to an output. 2071 * @throws IOException thrown if the optional header cannot be printed. 2072 * @since 1.5 2073 */ 2074 public CSVPrinter print(final File out, final Charset charset) throws IOException { 2075 return print(out.toPath(), charset); 2076 } 2077 2078 private void print(final InputStream inputStream, final Appendable out, final boolean newRecord) throws IOException { 2079 // InputStream is never null here 2080 // There is nothing to escape when quoting is used which is the default. 2081 if (!newRecord) { 2082 append(getDelimiterString(), out); 2083 } 2084 final boolean quoteCharacterSet = isQuoteCharacterSet(); 2085 if (quoteCharacterSet) { 2086 append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional 2087 } 2088 // Stream the input to the output without reading or holding the whole value in memory. 2089 // AppendableOutputStream cannot "close" an Appendable. 2090 try (OutputStream outputStream = new Base64OutputStream(new AppendableOutputStream<>(out))) { 2091 IOUtils.copy(inputStream, outputStream); 2092 } 2093 if (quoteCharacterSet) { 2094 append(getQuoteCharacter().charValue(), out); // N.B. Explicit (un)boxing is intentional 2095 } 2096 } 2097 2098 /** 2099 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated as needed. Useful when one wants to 2100 * avoid creating CSVPrinters. Trims the value if {@link #getTrim()} is true. 2101 * 2102 * @param value value to output. 2103 * @param out where to print the value. 2104 * @param newRecord if this a new record. 2105 * @throws IOException If an I/O error occurs. 2106 * @since 1.4 2107 */ 2108 public synchronized void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 2109 // null values are considered empty 2110 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 2111 CharSequence charSequence; 2112 if (value == null) { 2113 // https://issues.apache.org/jira/browse/CSV-203 2114 if (null == nullString) { 2115 charSequence = Constants.EMPTY; 2116 } else if (QuoteMode.ALL == quoteMode) { 2117 charSequence = quotedNullString; 2118 } else { 2119 charSequence = nullString; 2120 } 2121 } else if (value instanceof CharSequence) { 2122 charSequence = (CharSequence) value; 2123 } else if (value instanceof Reader) { 2124 print((Reader) value, out, newRecord); 2125 return; 2126 } else if (value instanceof InputStream) { 2127 print((InputStream) value, out, newRecord); 2128 return; 2129 } else { 2130 charSequence = value.toString(); 2131 } 2132 charSequence = getTrim() ? trim(charSequence) : charSequence; 2133 print(value, charSequence, out, newRecord); 2134 } 2135 2136 private synchronized void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException { 2137 final int offset = 0; 2138 final int len = value.length(); 2139 if (!newRecord) { 2140 out.append(getDelimiterString()); 2141 } 2142 if (object == null) { 2143 out.append(value); 2144 } else if (isQuoteCharacterSet()) { 2145 // The original object is needed so can check for Number 2146 printWithQuotes(object, value, out, newRecord); 2147 } else if (isEscapeCharacterSet()) { 2148 printWithEscapes(value, out); 2149 } else { 2150 out.append(value, offset, len); 2151 } 2152 } 2153 2154 /** 2155 * Prints to the specified {@code Path} with given {@code Charset}, returns a {@code CSVPrinter} which the caller MUST close. 2156 * 2157 * <p> 2158 * See also {@link CSVPrinter}. 2159 * </p> 2160 * 2161 * @param out the output. 2162 * @param charset A charset. 2163 * @return a printer to an output. 2164 * @throws IOException thrown if the optional header cannot be printed. 2165 * @since 1.5 2166 */ 2167 @SuppressWarnings("resource") 2168 public CSVPrinter print(final Path out, final Charset charset) throws IOException { 2169 return print(Files.newBufferedWriter(out, charset)); 2170 } 2171 2172 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { 2173 // Reader is never null here 2174 if (!newRecord) { 2175 append(getDelimiterString(), out); 2176 } 2177 if (isQuoteCharacterSet()) { 2178 printWithQuotes(reader, out); 2179 } else if (isEscapeCharacterSet()) { 2180 printWithEscapes(reader, out); 2181 } else if (out instanceof Writer) { 2182 IOUtils.copyLarge(reader, (Writer) out); 2183 } else { 2184 IOUtils.copy(reader, out); 2185 } 2186 } 2187 2188 /** 2189 * Prints to the {@link System#out}. 2190 * 2191 * <p> 2192 * See also {@link CSVPrinter}. 2193 * </p> 2194 * 2195 * @return a printer to {@link System#out}. 2196 * @throws IOException thrown if the optional header cannot be printed. 2197 * @since 1.5 2198 */ 2199 public CSVPrinter printer() throws IOException { 2200 return new CSVPrinter(System.out, this); 2201 } 2202 2203 /** 2204 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 2205 * 2206 * @param appendable where to write 2207 * @throws IOException If an I/O error occurs. 2208 * @since 1.4 2209 */ 2210 public synchronized void println(final Appendable appendable) throws IOException { 2211 if (getTrailingDelimiter()) { 2212 append(getDelimiterString(), appendable); 2213 } 2214 if (recordSeparator != null) { 2215 append(recordSeparator, appendable); 2216 } 2217 } 2218 2219 /** 2220 * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator. 2221 * 2222 * <p> 2223 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing 2224 * the record, so there is no need to call {@link #println(Appendable)}. 2225 * </p> 2226 * 2227 * @param appendable where to write. 2228 * @param values values to output. 2229 * @throws IOException If an I/O error occurs. 2230 * @since 1.4 2231 */ 2232 public synchronized void printRecord(final Appendable appendable, final Object... values) throws IOException { 2233 for (int i = 0; i < values.length; i++) { 2234 print(values[i], appendable, i == 0); 2235 } 2236 println(appendable); 2237 } 2238 2239 /* 2240 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2241 */ 2242 private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException { 2243 int start = 0; 2244 int pos = 0; 2245 final int end = charSeq.length(); 2246 final char[] delimArray = getDelimiterCharArray(); 2247 final int delimLength = delimArray.length; 2248 final char escape = getEscapeChar(); 2249 while (pos < end) { 2250 char c = charSeq.charAt(pos); 2251 final boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delimArray, delimLength); 2252 final boolean isCr = c == Constants.CR; 2253 final boolean isLf = c == Constants.LF; 2254 if (isCr || isLf || c == escape || isDelimiterStart) { 2255 // write out segment up until this char 2256 if (pos > start) { 2257 appendable.append(charSeq, start, pos); 2258 } 2259 if (isLf) { 2260 c = 'n'; 2261 } else if (isCr) { 2262 c = 'r'; 2263 } 2264 escape(c, appendable); 2265 if (isDelimiterStart) { 2266 for (int i = 1; i < delimLength; i++) { 2267 pos++; 2268 escape(charSeq.charAt(pos), appendable); 2269 } 2270 } 2271 start = pos + 1; // start on the current char after this one 2272 } 2273 pos++; 2274 } 2275 2276 // write last segment 2277 if (pos > start) { 2278 appendable.append(charSeq, start, pos); 2279 } 2280 } 2281 2282 /* 2283 * Note: Must only be called if escaping is enabled, otherwise can throw exceptions. 2284 */ 2285 private void printWithEscapes(final Reader reader, final Appendable appendable) throws IOException { 2286 int start = 0; 2287 int pos = 0; 2288 @SuppressWarnings("resource") // Temp reader on input reader. 2289 final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader); 2290 final char[] delimArray = getDelimiterCharArray(); 2291 final int delimLength = delimArray.length; 2292 final char escape = getEscapeChar(); 2293 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 2294 int c; 2295 final char[] lookAheadBuffer = new char[delimLength - 1]; 2296 while (EOF != (c = bufferedReader.read())) { 2297 builder.append((char) c); 2298 Arrays.fill(lookAheadBuffer, (char) 0); 2299 bufferedReader.peek(lookAheadBuffer); 2300 final String test = builder.toString() + new String(lookAheadBuffer); 2301 final boolean isDelimiterStart = isDelimiter((char) c, test, pos, delimArray, delimLength); 2302 final boolean isCr = c == Constants.CR; 2303 final boolean isLf = c == Constants.LF; 2304 if (isCr || isLf || c == escape || isDelimiterStart) { 2305 // write out segment up until this char 2306 if (pos > start) { 2307 append(builder.substring(start, pos), appendable); 2308 builder.setLength(0); 2309 pos = -1; 2310 } 2311 if (isLf) { 2312 c = 'n'; 2313 } else if (isCr) { 2314 c = 'r'; 2315 } 2316 escape((char) c, appendable); 2317 if (isDelimiterStart) { 2318 for (int i = 1; i < delimLength; i++) { 2319 escape((char) bufferedReader.read(), appendable); 2320 } 2321 } 2322 start = pos + 1; // start on the current char after this one 2323 } 2324 pos++; 2325 } 2326 // write last segment 2327 if (pos > start) { 2328 appendable.append(builder, start, pos); 2329 } 2330 } 2331 2332 /* 2333 * Note: must only be called if quoting is enabled, otherwise will generate NPE 2334 */ 2335 // the original object is needed so can check for Number 2336 private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException { 2337 boolean quote = false; 2338 int start = 0; 2339 int pos = 0; 2340 final int len = charSeq.length(); 2341 final char[] delim = getDelimiterCharArray(); 2342 final int delimLength = delim.length; 2343 final char quoteChar = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional 2344 // If escape char not specified, default to the quote char 2345 // This avoids having to keep checking whether there is an escape character 2346 // at the cost of checking against quote twice 2347 final char escapeChar = isEscapeCharacterSet() ? getEscapeChar() : quoteChar; 2348 QuoteMode quoteModePolicy = getQuoteMode(); 2349 if (quoteModePolicy == null) { 2350 quoteModePolicy = QuoteMode.MINIMAL; 2351 } 2352 switch (quoteModePolicy) { 2353 case ALL: 2354 case ALL_NON_NULL: 2355 quote = true; 2356 break; 2357 case NON_NUMERIC: 2358 quote = !(object instanceof Number); 2359 break; 2360 case NONE: 2361 // Use the existing escaping code 2362 printWithEscapes(charSeq, out); 2363 return; 2364 case MINIMAL: 2365 if (len <= 0) { 2366 // Always quote an empty token that is the first 2367 // on the line, as it may be the only thing on the 2368 // line. If it were not quoted in that case, 2369 // an empty line has no tokens. 2370 if (newRecord) { 2371 quote = true; 2372 } 2373 } else { 2374 char c = charSeq.charAt(pos); 2375 if (c <= Constants.COMMENT) { 2376 // Some other chars at the start of a value caused the parser to fail, so for now 2377 // encapsulate if we start in anything less than '#'. We are being conservative 2378 // by including the default comment char too. 2379 quote = true; 2380 } else { 2381 while (pos < len) { 2382 c = charSeq.charAt(pos); 2383 if (c == Constants.LF || c == Constants.CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) { 2384 quote = true; 2385 break; 2386 } 2387 pos++; 2388 } 2389 2390 if (!quote) { 2391 pos = len - 1; 2392 c = charSeq.charAt(pos); 2393 // Some other chars at the end caused the parser to fail, so for now 2394 // encapsulate if we end in anything less than ' ' 2395 if (isTrimChar(c)) { 2396 quote = true; 2397 } 2398 } 2399 } 2400 } 2401 if (!quote) { 2402 // No encapsulation needed - write out the original value 2403 out.append(charSeq, start, len); 2404 return; 2405 } 2406 break; 2407 default: 2408 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 2409 } 2410 if (!quote) { 2411 // No encapsulation needed - write out the original value 2412 out.append(charSeq, start, len); 2413 return; 2414 } 2415 // We hit something that needed encapsulation 2416 out.append(quoteChar); 2417 // Pick up where we left off: pos should be positioned on the first character that caused 2418 // the need for encapsulation. 2419 while (pos < len) { 2420 final char c = charSeq.charAt(pos); 2421 if (c == quoteChar || c == escapeChar) { 2422 // write out the chunk up until this point 2423 out.append(charSeq, start, pos); 2424 out.append(escapeChar); // now output the escape 2425 start = pos; // and restart with the matched char 2426 } 2427 pos++; 2428 } 2429 // Write the last segment 2430 out.append(charSeq, start, pos); 2431 out.append(quoteChar); 2432 } 2433 2434 /** 2435 * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead. 2436 * 2437 * @param reader What to print 2438 * @param appendable Where to print it 2439 * @throws IOException If an I/O error occurs 2440 */ 2441 private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException { 2442 if (getQuoteMode() == QuoteMode.NONE) { 2443 printWithEscapes(reader, appendable); 2444 return; 2445 } 2446 final char quote = getQuoteCharacter().charValue(); // N.B. Explicit (un)boxing is intentional 2447 // (1) Append opening quote 2448 append(quote, appendable); 2449 // (2) Append Reader contents, doubling quotes 2450 int c; 2451 while (EOF != (c = reader.read())) { 2452 append((char) c, appendable); 2453 if (c == quote) { 2454 append(quote, appendable); 2455 } 2456 } 2457 // (3) Append closing quote 2458 append(quote, appendable); 2459 } 2460 2461 @Override 2462 public String toString() { 2463 final StringBuilder sb = new StringBuilder(); 2464 sb.append("Delimiter=<").append(delimiter).append('>'); 2465 if (isEscapeCharacterSet()) { 2466 sb.append(' '); 2467 sb.append("Escape=<").append(escapeCharacter).append('>'); 2468 } 2469 if (isQuoteCharacterSet()) { 2470 sb.append(' '); 2471 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 2472 } 2473 if (quoteMode != null) { 2474 sb.append(' '); 2475 sb.append("QuoteMode=<").append(quoteMode).append('>'); 2476 } 2477 if (isCommentMarkerSet()) { 2478 sb.append(' '); 2479 sb.append("CommentStart=<").append(commentMarker).append('>'); 2480 } 2481 if (isNullStringSet()) { 2482 sb.append(' '); 2483 sb.append("NullString=<").append(nullString).append('>'); 2484 } 2485 if (recordSeparator != null) { 2486 sb.append(' '); 2487 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 2488 } 2489 if (getIgnoreEmptyLines()) { 2490 sb.append(" EmptyLines:ignored"); 2491 } 2492 if (getIgnoreSurroundingSpaces()) { 2493 sb.append(" SurroundingSpaces:ignored"); 2494 } 2495 if (getIgnoreHeaderCase()) { 2496 sb.append(" IgnoreHeaderCase:ignored"); 2497 } 2498 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 2499 if (headerComments != null) { 2500 sb.append(' '); 2501 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 2502 } 2503 if (headers != null) { 2504 sb.append(' '); 2505 sb.append("Header:").append(Arrays.toString(headers)); 2506 } 2507 return sb.toString(); 2508 } 2509 2510 String trim(final String value) { 2511 return getTrim() ? value.trim() : value; 2512 } 2513 2514 /** 2515 * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. 2516 * <p> 2517 * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used 2518 * for parsing, so it cannot be used here. 2519 * </p> 2520 * 2521 * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. 2522 */ 2523 private void validate() throws IllegalArgumentException { 2524 if (containsLineBreak(delimiter)) { 2525 throw new IllegalArgumentException("The delimiter cannot be a line break"); 2526 } 2527 if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional 2528 throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 2529 } 2530 if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) { // N.B. Explicit (un)boxing is intentional 2531 throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 2532 } 2533 if (commentMarker != null && contains(delimiter, commentMarker.charValue())) { // N.B. Explicit (un)boxing is intentional 2534 throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 2535 } 2536 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 2537 throw new IllegalArgumentException("The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 2538 } 2539 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 2540 throw new IllegalArgumentException("The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 2541 } 2542 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 2543 throw new IllegalArgumentException("Quote mode set to NONE but no escape character is set"); 2544 } 2545 // Validate headers 2546 if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { 2547 final Set<String> dupCheckSet = new HashSet<>(headers.length); 2548 final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY; 2549 for (final String header : headers) { 2550 final boolean blank = isBlank(header); 2551 // Sanitize all empty headers to the empty string "" when checking duplicates 2552 final boolean containsHeader = !dupCheckSet.add(blank ? "" : header); 2553 if (containsHeader && !(blank && emptyDuplicatesAllowed)) { 2554 throw new IllegalArgumentException(String.format( 2555 "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, 2556 Arrays.toString(headers))); 2557 } 2558 } 2559 } 2560 } 2561 2562 /** 2563 * Builds a new {@code CSVFormat} that allows duplicate header names. 2564 * 2565 * @return a new {@code CSVFormat} that allows duplicate header names 2566 * @since 1.7 2567 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean) Builder#setAllowDuplicateHeaderNames(true)} 2568 */ 2569 @Deprecated 2570 public CSVFormat withAllowDuplicateHeaderNames() { 2571 return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).get(); 2572 } 2573 2574 /** 2575 * Builds a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2576 * 2577 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 2578 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. 2579 * @since 1.7 2580 * @deprecated Use {@link Builder#setAllowDuplicateHeaderNames(boolean)} 2581 */ 2582 @Deprecated 2583 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 2584 final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; 2585 return builder().setDuplicateHeaderMode(mode).get(); 2586 } 2587 2588 /** 2589 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}. 2590 * 2591 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2592 * @see Builder#setAllowMissingColumnNames(boolean) 2593 * @since 1.1 2594 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} 2595 */ 2596 @Deprecated 2597 public CSVFormat withAllowMissingColumnNames() { 2598 return builder().setAllowMissingColumnNames(true).get(); 2599 } 2600 2601 /** 2602 * Builds a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 2603 * 2604 * @param allowMissingColumnNames the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause 2605 * an {@link IllegalArgumentException} to be thrown. 2606 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 2607 * @deprecated Use {@link Builder#setAllowMissingColumnNames(boolean)} 2608 */ 2609 @Deprecated 2610 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 2611 return builder().setAllowMissingColumnNames(allowMissingColumnNames).get(); 2612 } 2613 2614 /** 2615 * Builds a new {@code CSVFormat} with whether to flush on close. 2616 * 2617 * @param autoFlush whether to flush on close. 2618 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. 2619 * @since 1.6 2620 * @deprecated Use {@link Builder#setAutoFlush(boolean)} 2621 */ 2622 @Deprecated 2623 public CSVFormat withAutoFlush(final boolean autoFlush) { 2624 return builder().setAutoFlush(autoFlush).get(); 2625 } 2626 2627 /** 2628 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2629 * 2630 * Note that the comment start character is only recognized at the start of a line. 2631 * 2632 * @param commentMarker the comment start marker 2633 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2634 * @throws IllegalArgumentException thrown if the specified character is a line break 2635 * @deprecated Use {@link Builder#setCommentMarker(char)} 2636 */ 2637 @Deprecated 2638 public CSVFormat withCommentMarker(final char commentMarker) { 2639 return builder().setCommentMarker(commentMarker).get(); 2640 } 2641 2642 /** 2643 * Builds a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 2644 * 2645 * Note that the comment start character is only recognized at the start of a line. 2646 * 2647 * @param commentMarker the comment start marker, use {@code null} to disable 2648 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 2649 * @throws IllegalArgumentException thrown if the specified character is a line break 2650 * @deprecated Use {@link Builder#setCommentMarker(Character)} 2651 */ 2652 @Deprecated 2653 public CSVFormat withCommentMarker(final Character commentMarker) { 2654 return builder().setCommentMarker(commentMarker).get(); 2655 } 2656 2657 /** 2658 * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. 2659 * 2660 * @param delimiter the delimiter character 2661 * @return A new CSVFormat that is equal to this with the specified character as a delimiter 2662 * @throws IllegalArgumentException thrown if the specified character is a line break 2663 * @deprecated Use {@link Builder#setDelimiter(char)} 2664 */ 2665 @Deprecated 2666 public CSVFormat withDelimiter(final char delimiter) { 2667 return builder().setDelimiter(delimiter).get(); 2668 } 2669 2670 /** 2671 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2672 * 2673 * @param escape the escape character 2674 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2675 * @throws IllegalArgumentException thrown if the specified character is a line break 2676 * @deprecated Use {@link Builder#setEscape(char)} 2677 */ 2678 @Deprecated 2679 public CSVFormat withEscape(final char escape) { 2680 return builder().setEscape(escape).get(); 2681 } 2682 2683 /** 2684 * Builds a new {@code CSVFormat} with the escape character of the format set to the specified character. 2685 * 2686 * @param escape the escape character, use {@code null} to disable 2687 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 2688 * @throws IllegalArgumentException thrown if the specified character is a line break 2689 * @deprecated Use {@link Builder#setEscape(Character)} 2690 */ 2691 @Deprecated 2692 public CSVFormat withEscape(final Character escape) { 2693 return builder().setEscape(escape).get(); 2694 } 2695 2696 // @formatter:off 2697 /** 2698 * Builds a new {@code CSVFormat} using the first record as header. 2699 * 2700 * <p> 2701 * Calling this method is equivalent to calling: 2702 * </p> 2703 * 2704 * <pre> 2705 * CSVFormat format = aFormat.builder() 2706 * .setHeader() 2707 * .setSkipHeaderRecord(true) 2708 * .get(); 2709 * </pre> 2710 * 2711 * @return A new CSVFormat that is equal to this but using the first record as header. 2712 * @see Builder#setSkipHeaderRecord(boolean) 2713 * @see Builder#setHeader(String...) 2714 * @since 1.3 2715 * @deprecated Use {@link Builder#setHeader(String...) Builder#setHeader()}.{@link Builder#setSkipHeaderRecord(boolean) setSkipHeaderRecord(true)}. 2716 */ 2717 // @formatter:on 2718 @Deprecated 2719 public CSVFormat withFirstRecordAsHeader() { 2720 // @formatter:off 2721 return builder() 2722 .setHeader() 2723 .setSkipHeaderRecord(true) 2724 .get(); 2725 // @formatter:on 2726 } 2727 2728 /** 2729 * Builds a new {@code CSVFormat} with the header of the format defined by the enum class. 2730 * 2731 * <p> 2732 * Example: 2733 * </p> 2734 * 2735 * <pre> 2736 * public enum MyHeader { 2737 * Name, Email, Phone 2738 * } 2739 * ... 2740 * CSVFormat format = aFormat.builder().setHeader(MyHeader.class).get(); 2741 * </pre> 2742 * <p> 2743 * The header is also used by the {@link CSVPrinter}. 2744 * </p> 2745 * 2746 * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2747 * @return A new CSVFormat that is equal to this but with the specified header 2748 * @see Builder#setHeader(String...) 2749 * @see Builder#setSkipHeaderRecord(boolean) 2750 * @since 1.3 2751 * @deprecated Use {@link Builder#setHeader(Class)} 2752 */ 2753 @Deprecated 2754 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 2755 return builder().setHeader(headerEnum).get(); 2756 } 2757 2758 /** 2759 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2760 * input file with: 2761 * 2762 * <pre> 2763 * CSVFormat format = aFormat.builder().setHeader().get(); 2764 * </pre> 2765 * 2766 * or specified manually with: 2767 * 2768 * <pre> 2769 * CSVFormat format = aFormat.builder().setHeader(resultSet).get(); 2770 * </pre> 2771 * <p> 2772 * The header is also used by the {@link CSVPrinter}. 2773 * </p> 2774 * 2775 * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2776 * @return A new CSVFormat that is equal to this but with the specified header 2777 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2778 * @since 1.1 2779 * @deprecated Use {@link Builder#setHeader(ResultSet)} 2780 */ 2781 @Deprecated 2782 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 2783 return builder().setHeader(resultSet).get(); 2784 } 2785 2786 /** 2787 * Builds a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can either be parsed automatically from the 2788 * input file with: 2789 * 2790 * <pre> 2791 * CSVFormat format = aFormat.builder().setHeader().get() 2792 * </pre> 2793 * 2794 * or specified manually with: 2795 * 2796 * <pre> 2797 * CSVFormat format = aFormat.builder().setHeader(resultSetMetaData).get() 2798 * </pre> 2799 * <p> 2800 * The header is also used by the {@link CSVPrinter}. 2801 * </p> 2802 * 2803 * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2804 * @return A new CSVFormat that is equal to this but with the specified header 2805 * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. 2806 * @since 1.1 2807 * @deprecated Use {@link Builder#setHeader(ResultSetMetaData)} 2808 */ 2809 @Deprecated 2810 public CSVFormat withHeader(final ResultSetMetaData resultSetMetaData) throws SQLException { 2811 return builder().setHeader(resultSetMetaData).get(); 2812 } 2813 2814 /** 2815 * Builds a new {@code CSVFormat} with the header of the format set to the given values. The header can either be parsed automatically from the input file 2816 * with: 2817 * 2818 * <pre> 2819 * CSVFormat format = aFormat.builder().setHeader().get(); 2820 * </pre> 2821 * 2822 * or specified manually with: 2823 * 2824 * <pre>{@code 2825 * CSVFormat format = aFormat.builder().setHeader("name", "email", "phone").get(); 2826 * }</pre> 2827 * <p> 2828 * The header is also used by the {@link CSVPrinter}. 2829 * </p> 2830 * 2831 * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise. 2832 * @return A new CSVFormat that is equal to this but with the specified header 2833 * @see Builder#setSkipHeaderRecord(boolean) 2834 * @deprecated Use {@link Builder#setHeader(String...)} 2835 */ 2836 @Deprecated 2837 public CSVFormat withHeader(final String... header) { 2838 return builder().setHeader(header).get(); 2839 } 2840 2841 /** 2842 * Builds a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will be printed first, before the headers. 2843 * This setting is ignored by the parser. 2844 * 2845 * <pre>{@code 2846 * CSVFormat format = aFormat.builder().setHeaderComments("Generated by Apache Commons CSV.", Instant.now()).get(); 2847 * }</pre> 2848 * 2849 * @param headerComments the headerComments which will be printed by the Printer before the actual CSV data. 2850 * @return A new CSVFormat that is equal to this but with the specified header 2851 * @see Builder#setSkipHeaderRecord(boolean) 2852 * @since 1.1 2853 * @deprecated Use {@link Builder#setHeaderComments(Object...)} 2854 */ 2855 @Deprecated 2856 public CSVFormat withHeaderComments(final Object... headerComments) { 2857 return builder().setHeaderComments(headerComments).get(); 2858 } 2859 2860 /** 2861 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 2862 * 2863 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2864 * @see Builder#setIgnoreEmptyLines(boolean) 2865 * @since 1.1 2866 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean) Builder#setIgnoreEmptyLines(true)} 2867 */ 2868 @Deprecated 2869 public CSVFormat withIgnoreEmptyLines() { 2870 return builder().setIgnoreEmptyLines(true).get(); 2871 } 2872 2873 /** 2874 * Builds a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 2875 * 2876 * @param ignoreEmptyLines the empty line skipping behavior, {@code true} to ignore the empty lines between the records, {@code false} to translate empty 2877 * lines to empty records. 2878 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2879 * @deprecated Use {@link Builder#setIgnoreEmptyLines(boolean)} 2880 */ 2881 @Deprecated 2882 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 2883 return builder().setIgnoreEmptyLines(ignoreEmptyLines).get(); 2884 } 2885 2886 /** 2887 * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 2888 * 2889 * @return A new CSVFormat that will ignore the new case header name behavior. 2890 * @see Builder#setIgnoreHeaderCase(boolean) 2891 * @since 1.3 2892 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} 2893 */ 2894 @Deprecated 2895 public CSVFormat withIgnoreHeaderCase() { 2896 return builder().setIgnoreHeaderCase(true).get(); 2897 } 2898 2899 /** 2900 * Builds a new {@code CSVFormat} with whether header names should be accessed ignoring case. 2901 * 2902 * @param ignoreHeaderCase the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as is. 2903 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 2904 * @since 1.3 2905 * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean)} 2906 */ 2907 @Deprecated 2908 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 2909 return builder().setIgnoreHeaderCase(ignoreHeaderCase).get(); 2910 } 2911 2912 /** 2913 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. 2914 * 2915 * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. 2916 * @see Builder#setIgnoreSurroundingSpaces(boolean) 2917 * @since 1.1 2918 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean) Builder#setIgnoreSurroundingSpaces(true)} 2919 */ 2920 @Deprecated 2921 public CSVFormat withIgnoreSurroundingSpaces() { 2922 return builder().setIgnoreSurroundingSpaces(true).get(); 2923 } 2924 2925 /** 2926 * Builds a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. 2927 * 2928 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the spaces as is. 2929 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2930 * @deprecated Use {@link Builder#setIgnoreSurroundingSpaces(boolean)} 2931 */ 2932 @Deprecated 2933 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 2934 return builder().setIgnoreSurroundingSpaces(ignoreSurroundingSpaces).get(); 2935 } 2936 2937 /** 2938 * Builds a new {@code CSVFormat} with conversions to and from null for strings on input and output. 2939 * <ul> 2940 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading records.</li> 2941 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 2942 * </ul> 2943 * 2944 * @param nullString the String to convert to and from {@code null}. No substitution occurs if {@code null} 2945 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 2946 * @deprecated Use {@link Builder#setNullString(String)} 2947 */ 2948 @Deprecated 2949 public CSVFormat withNullString(final String nullString) { 2950 return builder().setNullString(nullString).get(); 2951 } 2952 2953 /** 2954 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2955 * 2956 * @param quoteChar the quote character 2957 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2958 * @throws IllegalArgumentException thrown if the specified character is a line break 2959 * @deprecated Use {@link Builder#setQuote(char)} 2960 */ 2961 @Deprecated 2962 public CSVFormat withQuote(final char quoteChar) { 2963 return builder().setQuote(quoteChar).get(); 2964 } 2965 2966 /** 2967 * Builds a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2968 * 2969 * @param quoteChar the quote character, use {@code null} to disable. 2970 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2971 * @throws IllegalArgumentException thrown if the specified character is a line break 2972 * @deprecated Use {@link Builder#setQuote(Character)} 2973 */ 2974 @Deprecated 2975 public CSVFormat withQuote(final Character quoteChar) { 2976 return builder().setQuote(quoteChar).get(); 2977 } 2978 2979 /** 2980 * Builds a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 2981 * 2982 * @param quoteMode the quote policy to use for output. 2983 * @return A new CSVFormat that is equal to this but with the specified quote policy 2984 * @deprecated Use {@link Builder#setQuoteMode(QuoteMode)} 2985 */ 2986 @Deprecated 2987 public CSVFormat withQuoteMode(final QuoteMode quoteMode) { 2988 return builder().setQuoteMode(quoteMode).get(); 2989 } 2990 2991 /** 2992 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified character. 2993 * 2994 * <p> 2995 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 2996 * "\r\n" 2997 * </p> 2998 * 2999 * @param recordSeparator the record separator to use for output. 3000 * @return A new CSVFormat that is equal to this but with the specified output record separator 3001 * @deprecated Use {@link Builder#setRecordSeparator(char)} 3002 */ 3003 @Deprecated 3004 public CSVFormat withRecordSeparator(final char recordSeparator) { 3005 return builder().setRecordSeparator(recordSeparator).get(); 3006 } 3007 3008 /** 3009 * Builds a new {@code CSVFormat} with the record separator of the format set to the specified String. 3010 * 3011 * <p> 3012 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3013 * "\r\n" 3014 * </p> 3015 * 3016 * @param recordSeparator the record separator to use for output. 3017 * @return A new CSVFormat that is equal to this but with the specified output record separator 3018 * @throws IllegalArgumentException if recordSeparator is none of CR, LF or CRLF 3019 * @deprecated Use {@link Builder#setRecordSeparator(String)} 3020 */ 3021 @Deprecated 3022 public CSVFormat withRecordSeparator(final String recordSeparator) { 3023 return builder().setRecordSeparator(recordSeparator).get(); 3024 } 3025 3026 /** 3027 * Builds a new {@code CSVFormat} with skipping the header record set to {@code true}. 3028 * 3029 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3030 * @see Builder#setSkipHeaderRecord(boolean) 3031 * @see Builder#setHeader(String...) 3032 * @since 1.1 3033 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean) Builder#setSkipHeaderRecord(true)} 3034 */ 3035 @Deprecated 3036 public CSVFormat withSkipHeaderRecord() { 3037 return builder().setSkipHeaderRecord(true).get(); 3038 } 3039 3040 /** 3041 * Builds a new {@code CSVFormat} with whether to skip the header record. 3042 * 3043 * @param skipHeaderRecord whether to skip the header record. 3044 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 3045 * @see Builder#setHeader(String...) 3046 * @deprecated Use {@link Builder#setSkipHeaderRecord(boolean)} 3047 */ 3048 @Deprecated 3049 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 3050 return builder().setSkipHeaderRecord(skipHeaderRecord).get(); 3051 } 3052 3053 /** 3054 * Builds a new {@code CSVFormat} with the record separator of the format set to the operating system's line separator string, typically CR+LF on Windows 3055 * and LF on Linux. 3056 * 3057 * <p> 3058 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently only works for inputs with '\n', '\r' and 3059 * "\r\n" 3060 * </p> 3061 * 3062 * @return A new CSVFormat that is equal to this but with the operating system's line separator string. 3063 * @since 1.6 3064 * @deprecated Use {@link Builder#setRecordSeparator(String) setRecordSeparator(System.lineSeparator())} 3065 */ 3066 @Deprecated 3067 public CSVFormat withSystemRecordSeparator() { 3068 return builder().setRecordSeparator(System.lineSeparator()).get(); 3069 } 3070 3071 /** 3072 * Builds a new {@code CSVFormat} to add a trailing delimiter. 3073 * 3074 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 3075 * @since 1.3 3076 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean) Builder#setTrailingDelimiter(true)} 3077 */ 3078 @Deprecated 3079 public CSVFormat withTrailingDelimiter() { 3080 return builder().setTrailingDelimiter(true).get(); 3081 } 3082 3083 /** 3084 * Builds a new {@code CSVFormat} with whether to add a trailing delimiter. 3085 * 3086 * @param trailingDelimiter whether to add a trailing delimiter. 3087 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 3088 * @since 1.3 3089 * @deprecated Use {@link Builder#setTrailingDelimiter(boolean)} 3090 */ 3091 @Deprecated 3092 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 3093 return builder().setTrailingDelimiter(trailingDelimiter).get(); 3094 } 3095 3096 /** 3097 * Builds a new {@code CSVFormat} to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3098 * 3099 * @return A new CSVFormat that is equal to this but with the trim setting on. 3100 * @since 1.3 3101 * @deprecated Use {@link Builder#setTrim(boolean) Builder#setTrim(true)} 3102 */ 3103 @Deprecated 3104 public CSVFormat withTrim() { 3105 return builder().setTrim(true).get(); 3106 } 3107 3108 /** 3109 * Builds a new {@code CSVFormat} with whether to trim leading and trailing blanks. See {@link #getTrim()} for details of where this is used. 3110 * 3111 * @param trim whether to trim leading and trailing blanks. 3112 * @return A new CSVFormat that is equal to this but with the specified trim setting. 3113 * @since 1.3 3114 * @deprecated Use {@link Builder#setTrim(boolean)} 3115 */ 3116 @Deprecated 3117 public CSVFormat withTrim(final boolean trim) { 3118 return builder().setTrim(trim).get(); 3119 } 3120}