001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.UncheckedIOException; 006import org.jsoup.UnsupportedMimeTypeException; 007import org.jsoup.internal.ControllableInputStream; 008import org.jsoup.internal.SharedConstants; 009import org.jsoup.internal.StringUtil; 010import org.jsoup.nodes.Document; 011import org.jsoup.parser.Parser; 012import org.jsoup.parser.TokenQueue; 013import org.jspecify.annotations.Nullable; 014 015import javax.net.ssl.HttpsURLConnection; 016import javax.net.ssl.SSLSocketFactory; 017import java.io.BufferedInputStream; 018import java.io.BufferedWriter; 019import java.io.ByteArrayInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.OutputStream; 023import java.io.OutputStreamWriter; 024import java.net.CookieManager; 025import java.net.CookieStore; 026import java.net.HttpURLConnection; 027import java.net.InetSocketAddress; 028import java.net.MalformedURLException; 029import java.net.Proxy; 030import java.net.URL; 031import java.net.URLEncoder; 032import java.nio.Buffer; 033import java.nio.ByteBuffer; 034import java.nio.charset.Charset; 035import java.nio.charset.IllegalCharsetNameException; 036import java.util.ArrayList; 037import java.util.Collection; 038import java.util.Collections; 039import java.util.LinkedHashMap; 040import java.util.List; 041import java.util.Map; 042import java.util.regex.Pattern; 043import java.util.zip.GZIPInputStream; 044import java.util.zip.Inflater; 045import java.util.zip.InflaterInputStream; 046 047import static org.jsoup.Connection.Method.HEAD; 048import static org.jsoup.helper.DataUtil.UTF_8; 049import static org.jsoup.internal.Normalizer.lowerCase; 050 051/** 052 * Implementation of {@link Connection}. 053 * @see org.jsoup.Jsoup#connect(String) 054 */ 055@SuppressWarnings("CharsetObjectCanBeUsed") 056public class HttpConnection implements Connection { 057 public static final String CONTENT_ENCODING = "Content-Encoding"; 058 /** 059 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 060 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 061 */ 062 public static final String DEFAULT_UA = 063 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"; 064 private static final String USER_AGENT = "User-Agent"; 065 public static final String CONTENT_TYPE = "Content-Type"; 066 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 067 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 068 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 069 private static final String DefaultUploadType = "application/octet-stream"; 070 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 071 072 /** 073 Create a new Connection, with the request URL specified. 074 @param url the URL to fetch from 075 @return a new Connection object 076 */ 077 public static Connection connect(String url) { 078 Connection con = new HttpConnection(); 079 con.url(url); 080 return con; 081 } 082 083 /** 084 Create a new Connection, with the request URL specified. 085 @param url the URL to fetch from 086 @return a new Connection object 087 */ 088 public static Connection connect(URL url) { 089 Connection con = new HttpConnection(); 090 con.url(url); 091 return con; 092 } 093 094 /** 095 Create a new, empty HttpConnection. 096 */ 097 public HttpConnection() { 098 req = new Request(); 099 } 100 101 /** 102 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 103 copied. All other settings (proxy, parser, cookies, etc) are copied. 104 @param copy the request to copy 105 */ 106 HttpConnection(Request copy) { 107 req = new Request(copy); 108 } 109 110 private static String encodeMimeName(String val) { 111 return val.replace("\"", "%22"); 112 } 113 114 private HttpConnection.Request req; 115 private Connection.@Nullable Response res; 116 117 @Override 118 public Connection newRequest() { 119 // copy the prototype request for the different settings, cookie manager, etc 120 return new HttpConnection(req); 121 } 122 123 /** Create a new Connection that just wraps the provided Request and Response */ 124 private HttpConnection(Request req, Response res) { 125 this.req = req; 126 this.res = res; 127 } 128 129 @Override 130 public Connection url(URL url) { 131 req.url(url); 132 return this; 133 } 134 135 @Override 136 public Connection url(String url) { 137 Validate.notEmptyParam(url, "url"); 138 try { 139 req.url(new URL(url)); 140 } catch (MalformedURLException e) { 141 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 142 } 143 return this; 144 } 145 146 @Override 147 public Connection proxy(@Nullable Proxy proxy) { 148 req.proxy(proxy); 149 return this; 150 } 151 152 @Override 153 public Connection proxy(String host, int port) { 154 req.proxy(host, port); 155 return this; 156 } 157 158 @Override 159 public Connection userAgent(String userAgent) { 160 Validate.notNullParam(userAgent, "userAgent"); 161 req.header(USER_AGENT, userAgent); 162 return this; 163 } 164 165 @Override 166 public Connection timeout(int millis) { 167 req.timeout(millis); 168 return this; 169 } 170 171 @Override 172 public Connection maxBodySize(int bytes) { 173 req.maxBodySize(bytes); 174 return this; 175 } 176 177 @Override 178 public Connection followRedirects(boolean followRedirects) { 179 req.followRedirects(followRedirects); 180 return this; 181 } 182 183 @Override 184 public Connection referrer(String referrer) { 185 Validate.notNullParam(referrer, "referrer"); 186 req.header("Referer", referrer); 187 return this; 188 } 189 190 @Override 191 public Connection method(Method method) { 192 req.method(method); 193 return this; 194 } 195 196 @Override 197 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 198 req.ignoreHttpErrors(ignoreHttpErrors); 199 return this; 200 } 201 202 @Override 203 public Connection ignoreContentType(boolean ignoreContentType) { 204 req.ignoreContentType(ignoreContentType); 205 return this; 206 } 207 208 @Override 209 public Connection data(String key, String value) { 210 req.data(KeyVal.create(key, value)); 211 return this; 212 } 213 214 @Override 215 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 216 req.sslSocketFactory(sslSocketFactory); 217 return this; 218 } 219 220 @Override 221 public Connection data(String key, String filename, InputStream inputStream) { 222 req.data(KeyVal.create(key, filename, inputStream)); 223 return this; 224 } 225 226 @Override 227 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 228 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 229 return this; 230 } 231 232 @Override 233 public Connection data(Map<String, String> data) { 234 Validate.notNullParam(data, "data"); 235 for (Map.Entry<String, String> entry : data.entrySet()) { 236 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 237 } 238 return this; 239 } 240 241 @Override 242 public Connection data(String... keyvals) { 243 Validate.notNullParam(keyvals, "keyvals"); 244 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 245 for (int i = 0; i < keyvals.length; i += 2) { 246 String key = keyvals[i]; 247 String value = keyvals[i+1]; 248 Validate.notEmpty(key, "Data key must not be empty"); 249 Validate.notNull(value, "Data value must not be null"); 250 req.data(KeyVal.create(key, value)); 251 } 252 return this; 253 } 254 255 @Override 256 public Connection data(Collection<Connection.KeyVal> data) { 257 Validate.notNullParam(data, "data"); 258 for (Connection.KeyVal entry: data) { 259 req.data(entry); 260 } 261 return this; 262 } 263 264 @Override 265 public Connection.KeyVal data(String key) { 266 Validate.notEmptyParam(key, "key"); 267 for (Connection.KeyVal keyVal : request().data()) { 268 if (keyVal.key().equals(key)) 269 return keyVal; 270 } 271 return null; 272 } 273 274 @Override 275 public Connection requestBody(String body) { 276 req.requestBody(body); 277 return this; 278 } 279 280 @Override 281 public Connection header(String name, String value) { 282 req.header(name, value); 283 return this; 284 } 285 286 @Override 287 public Connection headers(Map<String,String> headers) { 288 Validate.notNullParam(headers, "headers"); 289 for (Map.Entry<String,String> entry : headers.entrySet()) { 290 req.header(entry.getKey(),entry.getValue()); 291 } 292 return this; 293 } 294 295 @Override 296 public Connection cookie(String name, String value) { 297 req.cookie(name, value); 298 return this; 299 } 300 301 @Override 302 public Connection cookies(Map<String, String> cookies) { 303 Validate.notNullParam(cookies, "cookies"); 304 for (Map.Entry<String, String> entry : cookies.entrySet()) { 305 req.cookie(entry.getKey(), entry.getValue()); 306 } 307 return this; 308 } 309 310 @Override 311 public Connection cookieStore(CookieStore cookieStore) { 312 // create a new cookie manager using the new store 313 req.cookieManager = new CookieManager(cookieStore, null); 314 return this; 315 } 316 317 @Override 318 public CookieStore cookieStore() { 319 return req.cookieManager.getCookieStore(); 320 } 321 322 @Override 323 public Connection parser(Parser parser) { 324 req.parser(parser); 325 return this; 326 } 327 328 @Override 329 public Document get() throws IOException { 330 req.method(Method.GET); 331 execute(); 332 Validate.notNull(res); 333 return res.parse(); 334 } 335 336 @Override 337 public Document post() throws IOException { 338 req.method(Method.POST); 339 execute(); 340 Validate.notNull(res); 341 return res.parse(); 342 } 343 344 @Override 345 public Connection.Response execute() throws IOException { 346 res = Response.execute(req); 347 return res; 348 } 349 350 @Override 351 public Connection.Request request() { 352 return req; 353 } 354 355 @Override 356 public Connection request(Connection.Request request) { 357 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 358 return this; 359 } 360 361 @Override 362 public Connection.Response response() { 363 if (res == null) { 364 throw new IllegalArgumentException("You must execute the request before getting a response."); 365 } 366 return res; 367 } 368 369 @Override 370 public Connection response(Connection.Response response) { 371 res = response; 372 return this; 373 } 374 375 @Override 376 public Connection postDataCharset(String charset) { 377 req.postDataCharset(charset); 378 return this; 379 } 380 381 @Override public Connection auth(RequestAuthenticator authenticator) { 382 req.auth(authenticator); 383 return this; 384 } 385 386 @SuppressWarnings("unchecked") 387 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 388 private static final URL UnsetUrl; // only used if you created a new Request() 389 static { 390 try { 391 UnsetUrl = new URL("http://undefined/"); 392 } catch (MalformedURLException e) { 393 throw new IllegalStateException(e); 394 } 395 } 396 397 URL url = UnsetUrl; 398 Method method = Method.GET; 399 Map<String, List<String>> headers; 400 Map<String, String> cookies; 401 402 private Base() { 403 headers = new LinkedHashMap<>(); 404 cookies = new LinkedHashMap<>(); 405 } 406 407 private Base(Base<T> copy) { 408 url = copy.url; // unmodifiable object 409 method = copy.method; 410 headers = new LinkedHashMap<>(); 411 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 412 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 413 } 414 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 415 } 416 417 @Override 418 public URL url() { 419 if (url == UnsetUrl) 420 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 421 return url; 422 } 423 424 @Override 425 public T url(URL url) { 426 Validate.notNullParam(url, "url"); 427 this.url = new UrlBuilder(url).build(); 428 return (T) this; 429 } 430 431 @Override 432 public Method method() { 433 return method; 434 } 435 436 @Override 437 public T method(Method method) { 438 Validate.notNullParam(method, "method"); 439 this.method = method; 440 return (T) this; 441 } 442 443 @Override 444 public String header(String name) { 445 Validate.notNullParam(name, "name"); 446 List<String> vals = getHeadersCaseInsensitive(name); 447 if (vals.size() > 0) { 448 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 449 return StringUtil.join(vals, ", "); 450 } 451 452 return null; 453 } 454 455 @Override 456 public T addHeader(String name, @Nullable String value) { 457 Validate.notEmptyParam(name, "name"); 458 //noinspection ConstantConditions 459 value = value == null ? "" : value; 460 461 List<String> values = headers(name); 462 if (values.isEmpty()) { 463 values = new ArrayList<>(); 464 headers.put(name, values); 465 } 466 values.add(value); 467 468 return (T) this; 469 } 470 471 @Override 472 public List<String> headers(String name) { 473 Validate.notEmptyParam(name, "name"); 474 return getHeadersCaseInsensitive(name); 475 } 476 477 @Override 478 public T header(String name, String value) { 479 Validate.notEmptyParam(name, "name"); 480 removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding" 481 addHeader(name, value); 482 return (T) this; 483 } 484 485 @Override 486 public boolean hasHeader(String name) { 487 Validate.notEmptyParam(name, "name"); 488 return !getHeadersCaseInsensitive(name).isEmpty(); 489 } 490 491 /** 492 * Test if the request has a header with this value (case insensitive). 493 */ 494 @Override 495 public boolean hasHeaderWithValue(String name, String value) { 496 Validate.notEmpty(name); 497 Validate.notEmpty(value); 498 List<String> values = headers(name); 499 for (String candidate : values) { 500 if (value.equalsIgnoreCase(candidate)) 501 return true; 502 } 503 return false; 504 } 505 506 @Override 507 public T removeHeader(String name) { 508 Validate.notEmptyParam(name, "name"); 509 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 510 if (entry != null) 511 headers.remove(entry.getKey()); // ensures correct case 512 return (T) this; 513 } 514 515 @Override 516 public Map<String, String> headers() { 517 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 518 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 519 String header = entry.getKey(); 520 List<String> values = entry.getValue(); 521 if (values.size() > 0) 522 map.put(header, values.get(0)); 523 } 524 return map; 525 } 526 527 @Override 528 public Map<String, List<String>> multiHeaders() { 529 return headers; 530 } 531 532 private List<String> getHeadersCaseInsensitive(String name) { 533 Validate.notNull(name); 534 535 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 536 if (name.equalsIgnoreCase(entry.getKey())) 537 return entry.getValue(); 538 } 539 540 return Collections.emptyList(); 541 } 542 543 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 544 String lc = lowerCase(name); 545 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 546 if (lowerCase(entry.getKey()).equals(lc)) 547 return entry; 548 } 549 return null; 550 } 551 552 @Override 553 public String cookie(String name) { 554 Validate.notEmptyParam(name, "name"); 555 return cookies.get(name); 556 } 557 558 @Override 559 public T cookie(String name, String value) { 560 Validate.notEmptyParam(name, "name"); 561 Validate.notNullParam(value, "value"); 562 cookies.put(name, value); 563 return (T) this; 564 } 565 566 @Override 567 public boolean hasCookie(String name) { 568 Validate.notEmptyParam(name, "name"); 569 return cookies.containsKey(name); 570 } 571 572 @Override 573 public T removeCookie(String name) { 574 Validate.notEmptyParam(name, "name"); 575 cookies.remove(name); 576 return (T) this; 577 } 578 579 @Override 580 public Map<String, String> cookies() { 581 return cookies; 582 } 583 } 584 585 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 586 static { 587 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 588 // make sure that we can send Sec-Fetch-Site headers etc. 589 } 590 591 private @Nullable Proxy proxy; 592 private int timeoutMilliseconds; 593 private int maxBodySizeBytes; 594 private boolean followRedirects; 595 private final Collection<Connection.KeyVal> data; 596 private @Nullable String body = null; 597 private boolean ignoreHttpErrors = false; 598 private boolean ignoreContentType = false; 599 private Parser parser; 600 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 601 private String postDataCharset = DataUtil.defaultCharsetName; 602 private @Nullable SSLSocketFactory sslSocketFactory; 603 private CookieManager cookieManager; 604 private @Nullable RequestAuthenticator authenticator; 605 private volatile boolean executing = false; 606 607 Request() { 608 super(); 609 timeoutMilliseconds = 30000; // 30 seconds 610 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 611 followRedirects = true; 612 data = new ArrayList<>(); 613 method = Method.GET; 614 addHeader("Accept-Encoding", "gzip"); 615 addHeader(USER_AGENT, DEFAULT_UA); 616 parser = Parser.htmlParser(); 617 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 618 } 619 620 Request(Request copy) { 621 super(copy); 622 proxy = copy.proxy; 623 postDataCharset = copy.postDataCharset; 624 timeoutMilliseconds = copy.timeoutMilliseconds; 625 maxBodySizeBytes = copy.maxBodySizeBytes; 626 followRedirects = copy.followRedirects; 627 data = new ArrayList<>(); // data not copied 628 //body not copied 629 ignoreHttpErrors = copy.ignoreHttpErrors; 630 ignoreContentType = copy.ignoreContentType; 631 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 632 parserDefined = copy.parserDefined; 633 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 634 cookieManager = copy.cookieManager; 635 authenticator = copy.authenticator; 636 executing = false; 637 } 638 639 @Override 640 public Proxy proxy() { 641 return proxy; 642 } 643 644 @Override 645 public Request proxy(@Nullable Proxy proxy) { 646 this.proxy = proxy; 647 return this; 648 } 649 650 @Override 651 public Request proxy(String host, int port) { 652 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 653 return this; 654 } 655 656 @Override 657 public int timeout() { 658 return timeoutMilliseconds; 659 } 660 661 @Override 662 public Request timeout(int millis) { 663 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 664 timeoutMilliseconds = millis; 665 return this; 666 } 667 668 @Override 669 public int maxBodySize() { 670 return maxBodySizeBytes; 671 } 672 673 @Override 674 public Connection.Request maxBodySize(int bytes) { 675 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 676 maxBodySizeBytes = bytes; 677 return this; 678 } 679 680 @Override 681 public boolean followRedirects() { 682 return followRedirects; 683 } 684 685 @Override 686 public Connection.Request followRedirects(boolean followRedirects) { 687 this.followRedirects = followRedirects; 688 return this; 689 } 690 691 @Override 692 public boolean ignoreHttpErrors() { 693 return ignoreHttpErrors; 694 } 695 696 @Override 697 public SSLSocketFactory sslSocketFactory() { 698 return sslSocketFactory; 699 } 700 701 @Override 702 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 703 this.sslSocketFactory = sslSocketFactory; 704 } 705 706 @Override 707 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 708 this.ignoreHttpErrors = ignoreHttpErrors; 709 return this; 710 } 711 712 @Override 713 public boolean ignoreContentType() { 714 return ignoreContentType; 715 } 716 717 @Override 718 public Connection.Request ignoreContentType(boolean ignoreContentType) { 719 this.ignoreContentType = ignoreContentType; 720 return this; 721 } 722 723 @Override 724 public Request data(Connection.KeyVal keyval) { 725 Validate.notNullParam(keyval, "keyval"); 726 data.add(keyval); 727 return this; 728 } 729 730 @Override 731 public Collection<Connection.KeyVal> data() { 732 return data; 733 } 734 735 @Override 736 public Connection.Request requestBody(@Nullable String body) { 737 this.body = body; 738 return this; 739 } 740 741 @Override 742 public String requestBody() { 743 return body; 744 } 745 746 @Override 747 public Request parser(Parser parser) { 748 this.parser = parser; 749 parserDefined = true; 750 return this; 751 } 752 753 @Override 754 public Parser parser() { 755 return parser; 756 } 757 758 @Override 759 public Connection.Request postDataCharset(String charset) { 760 Validate.notNullParam(charset, "charset"); 761 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 762 this.postDataCharset = charset; 763 return this; 764 } 765 766 @Override 767 public String postDataCharset() { 768 return postDataCharset; 769 } 770 771 CookieManager cookieManager() { 772 return cookieManager; 773 } 774 775 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 776 this.authenticator = authenticator; 777 return this; 778 } 779 780 @Override @Nullable public RequestAuthenticator auth() { 781 return authenticator; 782 } 783 } 784 785 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 786 private static final int MAX_REDIRECTS = 20; 787 private static final String LOCATION = "Location"; 788 private final int statusCode; 789 private final String statusMessage; 790 private @Nullable ByteBuffer byteData; 791 private @Nullable ControllableInputStream bodyStream; 792 private @Nullable HttpURLConnection conn; 793 private @Nullable String charset; 794 private @Nullable final String contentType; 795 private boolean executed = false; 796 private boolean inputStreamRead = false; 797 private int numRedirects = 0; 798 private final HttpConnection.Request req; 799 800 /* 801 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 802 */ 803 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 804 805 /** 806 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 807 are created from the HttpURLConnection and fields defined. 808 */ 809 Response() { 810 super(); 811 statusCode = 400; 812 statusMessage = "Request not made"; 813 req = new Request(); 814 contentType = null; 815 } 816 817 static Response execute(HttpConnection.Request req) throws IOException { 818 return execute(req, null); 819 } 820 821 static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException { 822 synchronized (req) { 823 Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 824 req.executing = true; 825 } 826 Validate.notNullParam(req, "req"); 827 URL url = req.url(); 828 Validate.notNull(url, "URL must be specified to connect"); 829 String protocol = url.getProtocol(); 830 if (!protocol.equals("http") && !protocol.equals("https")) 831 throw new MalformedURLException("Only http & https protocols supported"); 832 final boolean methodHasBody = req.method().hasBody(); 833 final boolean hasRequestBody = req.requestBody() != null; 834 if (!methodHasBody) 835 Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method()); 836 837 // set up the request for execution 838 String mimeBoundary = null; 839 if (req.data().size() > 0 && (!methodHasBody || hasRequestBody)) 840 serialiseRequestUrl(req); 841 else if (methodHasBody) 842 mimeBoundary = setOutputContentType(req); 843 844 long startTime = System.nanoTime(); 845 HttpURLConnection conn = createConnection(req); 846 Response res = null; 847 try { 848 conn.connect(); 849 if (conn.getDoOutput()) { 850 OutputStream out = conn.getOutputStream(); 851 try { writePost(req, out, mimeBoundary); } 852 catch (IOException e) { conn.disconnect(); throw e; } 853 finally { out.close(); } 854 } 855 856 int status = conn.getResponseCode(); 857 res = new Response(conn, req, previousResponse); 858 859 // redirect if there's a location header (from 3xx, or 201 etc) 860 if (res.hasHeader(LOCATION) && req.followRedirects()) { 861 if (status != HTTP_TEMP_REDIR) { 862 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 863 req.data().clear(); 864 req.requestBody(null); 865 req.removeHeader(CONTENT_TYPE); 866 } 867 868 String location = res.header(LOCATION); 869 Validate.notNull(location); 870 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 871 location = location.substring(6); 872 URL redir = StringUtil.resolve(req.url(), location); 873 req.url(redir); 874 875 req.executing = false; 876 return execute(req, res); 877 } 878 if ((status < 200 || status >= 400) && !req.ignoreHttpErrors()) 879 throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString()); 880 881 // check that we can handle the returned content type; if not, abort before fetching it 882 String contentType = res.contentType(); 883 if (contentType != null 884 && !req.ignoreContentType() 885 && !contentType.startsWith("text/") 886 && !xmlContentTypeRxp.matcher(contentType).matches() 887 ) 888 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 889 contentType, req.url().toString()); 890 891 // switch to the XML parser if content type is xml and not parser not explicitly set 892 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 893 if (!req.parserDefined) req.parser(Parser.xmlParser()); 894 } 895 896 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 897 if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 898 InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream(); 899 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 900 stream = new GZIPInputStream(stream); 901 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 902 stream = new InflaterInputStream(stream, new Inflater(true)); 903 904 res.bodyStream = ControllableInputStream.wrap( 905 stream, SharedConstants.DefaultBufferSize, req.maxBodySize()) 906 .timeout(startTime, req.timeout()); 907 } else { 908 res.byteData = DataUtil.emptyByteBuffer(); 909 } 910 } catch (IOException e) { 911 if (res != null) res.safeClose(); // will be non-null if got to conn 912 throw e; 913 } finally { 914 req.executing = false; 915 916 // detach any thread local auth delegate 917 if (req.authenticator != null) 918 AuthenticationHandler.handler.remove(); 919 } 920 921 res.executed = true; 922 return res; 923 } 924 925 @Override 926 public int statusCode() { 927 return statusCode; 928 } 929 930 @Override 931 public String statusMessage() { 932 return statusMessage; 933 } 934 935 @Override 936 public String charset() { 937 return charset; 938 } 939 940 @Override 941 public Response charset(String charset) { 942 this.charset = charset; 943 return this; 944 } 945 946 @Override 947 public String contentType() { 948 return contentType; 949 } 950 951 public Document parse() throws IOException { 952 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 953 InputStream stream = bodyStream; 954 if (byteData != null) { // bytes have been read in to the buffer, parse that 955 stream = new ByteArrayInputStream(byteData.array()); 956 inputStreamRead = false; // ok to reparse if in bytes 957 } 958 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 959 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 960 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 961 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 962 inputStreamRead = true; 963 safeClose(); 964 return doc; 965 } 966 967 private void prepareByteData() { 968 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 969 if (bodyStream != null && byteData == null) { 970 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 971 try { 972 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 973 } catch (IOException e) { 974 throw new UncheckedIOException(e); 975 } finally { 976 inputStreamRead = true; 977 safeClose(); 978 } 979 } 980 } 981 982 @Override 983 public String body() { 984 prepareByteData(); 985 Validate.notNull(byteData); 986 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 987 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 988 .decode(byteData).toString(); 989 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 990 return body; 991 } 992 993 @Override 994 public byte[] bodyAsBytes() { 995 prepareByteData(); 996 Validate.notNull(byteData); 997 return byteData.array(); 998 } 999 1000 @Override 1001 public Connection.Response bufferUp() { 1002 prepareByteData(); 1003 return this; 1004 } 1005 1006 @Override 1007 public BufferedInputStream bodyStream() { 1008 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1009 1010 // if we have read to bytes (via buffer up), return those as a stream. 1011 if (byteData != null) { 1012 return new BufferedInputStream(new ByteArrayInputStream(byteData.array()), SharedConstants.DefaultBufferSize); 1013 } 1014 1015 Validate.isFalse(inputStreamRead, "Request has already been read"); 1016 Validate.notNull(bodyStream); 1017 inputStreamRead = true; 1018 return bodyStream.inputStream(); 1019 } 1020 1021 // set up connection defaults, and details from request 1022 private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException { 1023 Proxy proxy = req.proxy(); 1024 final HttpURLConnection conn = (HttpURLConnection) ( 1025 proxy == null ? 1026 req.url().openConnection() : 1027 req.url().openConnection(proxy) 1028 ); 1029 1030 conn.setRequestMethod(req.method().name()); 1031 conn.setInstanceFollowRedirects(false); // don't rely on native redirection support 1032 conn.setConnectTimeout(req.timeout()); 1033 conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read 1034 1035 if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection) 1036 ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory()); 1037 if (req.authenticator != null) 1038 AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally 1039 if (req.method().hasBody()) 1040 conn.setDoOutput(true); 1041 CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store 1042 for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) { 1043 for (String value : header.getValue()) { 1044 conn.addRequestProperty(header.getKey(), value); 1045 } 1046 } 1047 return conn; 1048 } 1049 1050 /** 1051 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1052 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1053 */ 1054 private void safeClose() { 1055 if (bodyStream != null) { 1056 try { 1057 bodyStream.close(); 1058 } catch (IOException e) { 1059 // no-op 1060 } finally { 1061 bodyStream = null; 1062 } 1063 } 1064 if (conn != null) { 1065 conn.disconnect(); 1066 conn = null; 1067 } 1068 } 1069 1070 // set up url, method, header, cookies 1071 private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException { 1072 this.conn = conn; 1073 this.req = request; 1074 method = Method.valueOf(conn.getRequestMethod()); 1075 url = conn.getURL(); 1076 statusCode = conn.getResponseCode(); 1077 statusMessage = conn.getResponseMessage(); 1078 contentType = conn.getContentType(); 1079 1080 Map<String, List<String>> resHeaders = createHeaderMap(conn); 1081 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1082 CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store 1083 1084 if (previousResponse != null) { // was redirected 1085 // map previous response cookies into this response cookies() object 1086 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1087 if (!hasCookie(prevCookie.getKey())) 1088 cookie(prevCookie.getKey(), prevCookie.getValue()); 1089 } 1090 previousResponse.safeClose(); 1091 1092 // enforce too many redirects: 1093 numRedirects = previousResponse.numRedirects + 1; 1094 if (numRedirects >= MAX_REDIRECTS) 1095 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1096 } 1097 } 1098 1099 private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) { 1100 // the default sun impl of conn.getHeaderFields() returns header values out of order 1101 final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>(); 1102 int i = 0; 1103 while (true) { 1104 final String key = conn.getHeaderFieldKey(i); 1105 final String val = conn.getHeaderField(i); 1106 if (key == null && val == null) 1107 break; 1108 i++; 1109 if (key == null || val == null) 1110 continue; // skip http1.1 line 1111 1112 if (headers.containsKey(key)) 1113 headers.get(key).add(val); 1114 else { 1115 final ArrayList<String> vals = new ArrayList<>(); 1116 vals.add(val); 1117 headers.put(key, vals); 1118 } 1119 } 1120 return headers; 1121 } 1122 1123 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1124 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1125 String name = entry.getKey(); 1126 if (name == null) 1127 continue; // http/1.1 line 1128 1129 List<String> values = entry.getValue(); 1130 if (name.equalsIgnoreCase("Set-Cookie")) { 1131 for (String value : values) { 1132 if (value == null) 1133 continue; 1134 TokenQueue cd = new TokenQueue(value); 1135 String cookieName = cd.chompTo("=").trim(); 1136 String cookieVal = cd.consumeTo(";").trim(); 1137 // ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required 1138 // name not blank, value not null 1139 if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first 1140 cookie(cookieName, cookieVal); 1141 } 1142 } 1143 for (String value : values) { 1144 addHeader(name, fixHeaderEncoding(value)); 1145 } 1146 } 1147 } 1148 1149 /** 1150 Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that 1151 and re-decode the string as UTF-8. 1152 * @param val a header value string that may have been incorrectly decoded as 8859. 1153 * @return a potentially re-decoded string. 1154 */ 1155 @Nullable 1156 private static String fixHeaderEncoding(@Nullable String val) { 1157 if (val == null) return val; 1158 byte[] bytes = val.getBytes(ISO_8859_1); 1159 if (looksLikeUtf8(bytes)) 1160 return new String(bytes, UTF_8); 1161 else 1162 return val; 1163 } 1164 1165 private static boolean looksLikeUtf8(byte[] input) { 1166 int i = 0; 1167 // BOM: 1168 if (input.length >= 3 1169 && (input[0] & 0xFF) == 0xEF 1170 && (input[1] & 0xFF) == 0xBB 1171 && (input[2] & 0xFF) == 0xBF) { 1172 i = 3; 1173 } 1174 1175 int end; 1176 boolean foundNonAscii = false; 1177 for (int j = input.length; i < j; ++i) { 1178 int o = input[i]; 1179 if ((o & 0x80) == 0) { 1180 continue; // ASCII 1181 } 1182 foundNonAscii = true; 1183 1184 // UTF-8 leading: 1185 if ((o & 0xE0) == 0xC0) { 1186 end = i + 1; 1187 } else if ((o & 0xF0) == 0xE0) { 1188 end = i + 2; 1189 } else if ((o & 0xF8) == 0xF0) { 1190 end = i + 3; 1191 } else { 1192 return false; 1193 } 1194 1195 if (end >= input.length) 1196 return false; 1197 1198 while (i < end) { 1199 i++; 1200 o = input[i]; 1201 if ((o & 0xC0) != 0x80) { 1202 return false; 1203 } 1204 } 1205 } 1206 return foundNonAscii; 1207 } 1208 1209 private @Nullable static String setOutputContentType(final Connection.Request req) { 1210 final String contentType = req.header(CONTENT_TYPE); 1211 String bound = null; 1212 if (contentType != null) { 1213 // no-op; don't add content type as already set (e.g. for requestBody()) 1214 // todo - if content type already set, we could add charset 1215 1216 // if user has set content type to multipart/form-data, auto add boundary. 1217 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1218 bound = DataUtil.mimeBoundary(); 1219 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1220 } 1221 1222 } 1223 else if (needsMultipart(req)) { 1224 bound = DataUtil.mimeBoundary(); 1225 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1226 } else { 1227 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1228 } 1229 return bound; 1230 } 1231 1232 private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException { 1233 final Collection<Connection.KeyVal> data = req.data(); 1234 final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset()))); 1235 1236 if (boundary != null) { 1237 // boundary will be set if we're in multipart mode 1238 for (Connection.KeyVal keyVal : data) { 1239 w.write("--"); 1240 w.write(boundary); 1241 w.write("\r\n"); 1242 w.write("Content-Disposition: form-data; name=\""); 1243 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1244 w.write("\""); 1245 final InputStream input = keyVal.inputStream(); 1246 if (input != null) { 1247 w.write("; filename=\""); 1248 w.write(encodeMimeName(keyVal.value())); 1249 w.write("\"\r\nContent-Type: "); 1250 String contentType = keyVal.contentType(); 1251 w.write(contentType != null ? contentType : DefaultUploadType); 1252 w.write("\r\n\r\n"); 1253 w.flush(); // flush 1254 DataUtil.crossStreams(input, outputStream); 1255 outputStream.flush(); 1256 } else { 1257 w.write("\r\n\r\n"); 1258 w.write(keyVal.value()); 1259 } 1260 w.write("\r\n"); 1261 } 1262 w.write("--"); 1263 w.write(boundary); 1264 w.write("--"); 1265 } else { 1266 String body = req.requestBody(); 1267 if (body != null) { 1268 // data will be in query string, we're sending a plaintext body 1269 w.write(body); 1270 } 1271 else { 1272 // regular form data (application/x-www-form-urlencoded) 1273 boolean first = true; 1274 for (Connection.KeyVal keyVal : data) { 1275 if (!first) 1276 w.append('&'); 1277 else 1278 first = false; 1279 1280 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1281 w.write('='); 1282 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1283 } 1284 } 1285 } 1286 w.close(); 1287 } 1288 1289 // for get url reqs, serialise the data map into the url 1290 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1291 UrlBuilder in = new UrlBuilder(req.url()); 1292 1293 for (Connection.KeyVal keyVal : req.data()) { 1294 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1295 in.appendKeyVal(keyVal); 1296 } 1297 req.url(in.build()); 1298 req.data().clear(); // moved into url as get params 1299 } 1300 } 1301 1302 private static boolean needsMultipart(Connection.Request req) { 1303 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1304 for (Connection.KeyVal keyVal : req.data()) { 1305 if (keyVal.hasInputStream()) 1306 return true; 1307 } 1308 return false; 1309 } 1310 1311 public static class KeyVal implements Connection.KeyVal { 1312 private String key; 1313 private String value; 1314 private @Nullable InputStream stream; 1315 private @Nullable String contentType; 1316 1317 public static KeyVal create(String key, String value) { 1318 return new KeyVal(key, value); 1319 } 1320 1321 public static KeyVal create(String key, String filename, InputStream stream) { 1322 return new KeyVal(key, filename) 1323 .inputStream(stream); 1324 } 1325 1326 private KeyVal(String key, String value) { 1327 Validate.notEmptyParam(key, "key"); 1328 Validate.notNullParam(value, "value"); 1329 this.key = key; 1330 this.value = value; 1331 } 1332 1333 @Override 1334 public KeyVal key(String key) { 1335 Validate.notEmptyParam(key, "key"); 1336 this.key = key; 1337 return this; 1338 } 1339 1340 @Override 1341 public String key() { 1342 return key; 1343 } 1344 1345 @Override 1346 public KeyVal value(String value) { 1347 Validate.notNullParam(value, "value"); 1348 this.value = value; 1349 return this; 1350 } 1351 1352 @Override 1353 public String value() { 1354 return value; 1355 } 1356 1357 public KeyVal inputStream(InputStream inputStream) { 1358 Validate.notNullParam(value, "inputStream"); 1359 this.stream = inputStream; 1360 return this; 1361 } 1362 1363 @Override 1364 public InputStream inputStream() { 1365 return stream; 1366 } 1367 1368 @Override 1369 public boolean hasInputStream() { 1370 return stream != null; 1371 } 1372 1373 @Override 1374 public Connection.KeyVal contentType(String contentType) { 1375 Validate.notEmpty(contentType); 1376 this.contentType = contentType; 1377 return this; 1378 } 1379 1380 @Override 1381 public String contentType() { 1382 return contentType; 1383 } 1384 1385 @Override 1386 public String toString() { 1387 return key + "=" + value; 1388 } 1389 } 1390}