001package org.jsoup; 002 003import org.jsoup.helper.RequestAuthenticator; 004import org.jsoup.nodes.Document; 005import org.jsoup.parser.Parser; 006import org.jsoup.parser.StreamParser; 007import org.jspecify.annotations.Nullable; 008 009import javax.net.ssl.SSLSocketFactory; 010import java.io.BufferedInputStream; 011import java.io.IOException; 012import java.io.InputStream; 013import java.io.UncheckedIOException; 014import java.net.Authenticator; 015import java.net.CookieStore; 016import java.net.Proxy; 017import java.net.URL; 018import java.util.Collection; 019import java.util.List; 020import java.util.Map; 021 022/** 023 The Connection interface is a convenient HTTP client and session object to fetch content from the web, and parse them 024 into Documents. 025 <p>To start a new session, use either {@link org.jsoup.Jsoup#newSession()} or {@link org.jsoup.Jsoup#connect(String)}. 026 Connections contain {@link Connection.Request} and {@link Connection.Response} objects (once executed). Configuration 027 settings (URL, timeout, useragent, etc) set on a session will be applied by default to each subsequent request.</p> 028 <p>To start a new request from the session, use {@link #newRequest()}.</p> 029 <p>Cookies are stored in memory for the duration of the session. For that reason, do not use one single session for all 030 requests in a long-lived application, or you are likely to run out of memory, unless care is taken to clean up the 031 cookie store. The cookie store for the session is available via {@link #cookieStore()}. You may provide your own 032 implementation via {@link #cookieStore(java.net.CookieStore)} before making requests.</p> 033 <p>Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}), 034 or by methods in the {@link Connection.Request} object directly. All request configuration must be made before the request is 035 executed. When used as an ongoing session, initialize all defaults prior to making multi-threaded {@link 036#newRequest()}s.</p> 037 <p>Note that the term "Connection" used here does not mean that a long-lived connection is held against a server for 038 the lifetime of the Connection object. A socket connection is only made at the point of request execution ({@link 039#execute()}, {@link #get()}, or {@link #post()}), and the server's response consumed.</p> 040 <p>For multi-threaded implementations, it is important to use a {@link #newRequest()} for each request. The session may 041 be shared across concurrent threads, but a not a specific request.</p> 042 <p><b>HTTP/2</b> support: On JDK/JRE 11 and above, requests use {@link java.net.http.HttpClient}, which supports 043 HTTP/2. To use the legacy {@link java.net.HttpURLConnection} instead, set 044 <code>System.setProperty("jsoup.useHttpClient", "false")</code>.</p> 045 */ 046@SuppressWarnings("unused") 047public interface Connection { 048 049 /** 050 * GET and POST http methods. 051 */ 052 enum Method { 053 GET(false), 054 POST(true), 055 PUT(true), 056 DELETE(true), 057 /** 058 Note that unfortunately, PATCH is not supported in many JDKs. 059 */ 060 PATCH(true), 061 HEAD(false), 062 OPTIONS(false), 063 TRACE(false); 064 065 private final boolean hasBody; 066 067 Method(boolean hasBody) { 068 this.hasBody = hasBody; 069 } 070 071 /** 072 * Check if this HTTP method has/needs a request body 073 * @return if body needed 074 */ 075 public final boolean hasBody() { 076 return hasBody; 077 } 078 } 079 080 /** 081 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 082 may then be independently changed on the returned {@link Connection.Request} object). 083 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 084 @since 1.14.1 085 */ 086 Connection newRequest(); 087 088 /** 089 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 090 may then be independently changed on the returned {@link Connection.Request} object). 091 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 092 @param url URL for the new request 093 @since 1.17.1 094 */ 095 default Connection newRequest(String url) { 096 return newRequest().url(url); 097 } 098 099 /** 100 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 101 may then be independently changed on the returned {@link Connection.Request} object). 102 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 103 @param url URL for the new request 104 @since 1.17.1 105 */ 106 default Connection newRequest(URL url) { 107 return newRequest().url(url); 108 } 109 110 /** 111 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 112 * @param url URL to connect to 113 * @return this Connection, for chaining 114 */ 115 Connection url(URL url); 116 117 /** 118 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 119 * @param url URL to connect to 120 * @return this Connection, for chaining 121 */ 122 Connection url(String url); 123 124 /** 125 * Set the proxy to use for this request. Set to <code>null</code> to disable a previously set proxy. 126 * @param proxy proxy to use 127 * @return this Connection, for chaining 128 */ 129 Connection proxy(@Nullable Proxy proxy); 130 131 /** 132 * Set the HTTP proxy to use for this request. 133 * @param host the proxy hostname 134 * @param port the proxy port 135 * @return this Connection, for chaining 136 */ 137 Connection proxy(String host, int port); 138 139 /** 140 * Set the request user-agent header. 141 * @param userAgent user-agent to use 142 * @return this Connection, for chaining 143 * @see org.jsoup.helper.HttpConnection#DEFAULT_UA 144 */ 145 Connection userAgent(String userAgent); 146 147 /** 148 Set the total maximum request duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be 149 thrown. 150 <p>The default timeout is <b>30 seconds</b> (30,000 millis). A timeout of zero is treated as an infinite timeout.</p> 151 <p>This timeout specifies the combined maximum duration of the connection time and the time to read 152 the full response.</p> 153 <p>Implementation note: when this <code>Connection</code> is backed by <code>HttpURLConnection</code> (rather than <code>HttpClient</code>, as used in JRE/JDK 11+), this timeout is implemented by setting both the socket connect and read timeouts to half of the specified value.</p> 154 155 @param millis number of milliseconds (thousandths of a second) before timing out connects or reads. 156 @return this Connection, for chaining 157 @see #maxBodySize(int) 158 */ 159 Connection timeout(int millis); 160 161 /** 162 * Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed, 163 * and the input truncated (i.e. the body content will be trimmed). <b>The default maximum is 2MB</b>. A max size of 164 * <code>0</code> is treated as an infinite amount (bounded only by your patience and the memory available on your 165 * machine). 166 * 167 * @param bytes number of bytes to read from the input before truncating 168 * @return this Connection, for chaining 169 */ 170 Connection maxBodySize(int bytes); 171 172 /** 173 * Set the request referrer (aka "referer") header. 174 * @param referrer referrer to use 175 * @return this Connection, for chaining 176 */ 177 Connection referrer(String referrer); 178 179 /** 180 * Configures the connection to (not) follow server redirects. By default, this is <b>true</b>. 181 * @param followRedirects true if server redirects should be followed. 182 * @return this Connection, for chaining 183 */ 184 Connection followRedirects(boolean followRedirects); 185 186 /** 187 * Set the request method to use, GET or POST. Default is GET. 188 * @param method HTTP request method 189 * @return this Connection, for chaining 190 */ 191 Connection method(Method method); 192 193 /** 194 * Configures the connection to not throw exceptions when an HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By 195 * default, this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the 196 * response is populated with the error body, and the status message will reflect the error. 197 * @param ignoreHttpErrors - false (default) if HTTP errors should be ignored. 198 * @return this Connection, for chaining 199 */ 200 Connection ignoreHttpErrors(boolean ignoreHttpErrors); 201 202 /** 203 * Ignore the document's Content-Type when parsing the response. By default, this is <b>false</b>, an unrecognised 204 * content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse 205 * a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type. 206 * @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a 207 * Document. 208 * @return this Connection, for chaining 209 */ 210 Connection ignoreContentType(boolean ignoreContentType); 211 212 /** 213 * Set custom SSL socket factory 214 * @param sslSocketFactory custom SSL socket factory 215 * @return this Connection, for chaining 216 */ 217 Connection sslSocketFactory(SSLSocketFactory sslSocketFactory); 218 219 /** 220 * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the 221 * request body for POSTs. A request may have multiple values of the same name. 222 * @param key data key 223 * @param value data value 224 * @return this Connection, for chaining 225 */ 226 Connection data(String key, String value); 227 228 /** 229 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 230 * input stream. 231 * <p>Use the {@link #data(String, String, InputStream, String)} method to set the uploaded file's mimetype.</p> 232 * @param key data key (form item name) 233 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 234 * component. 235 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 236 * You must close the InputStream in a {@code finally} block. 237 * @return this Connection, for chaining 238 * @see #data(String, String, InputStream, String) 239 */ 240 Connection data(String key, String filename, InputStream inputStream); 241 242 /** 243 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 244 * input stream. 245 * @param key data key (form item name) 246 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 247 * component. 248 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 249 * @param contentType the Content Type (aka mimetype) to specify for this file. 250 * You must close the InputStream in a {@code finally} block. 251 * @return this Connection, for chaining 252 */ 253 Connection data(String key, String filename, InputStream inputStream, String contentType); 254 255 /** 256 * Adds all of the supplied data to the request data parameters 257 * @param data collection of data parameters 258 * @return this Connection, for chaining 259 */ 260 Connection data(Collection<KeyVal> data); 261 262 /** 263 * Adds all of the supplied data to the request data parameters 264 * @param data map of data parameters 265 * @return this Connection, for chaining 266 */ 267 Connection data(Map<String, String> data); 268 269 /** 270 Add one or more request {@code key, val} data parameter pairs. 271 <p>Multiple parameters may be set at once, e.g.: 272 <code>.data("name", "jsoup", "language", "Java", "language", "English");</code> creates a query string like: 273 <code>{@literal ?name=jsoup&language=Java&language=English}</code></p> 274 <p>For GET requests, data parameters will be sent on the request query string. For POST (and other methods that 275 contain a body), they will be sent as body form parameters, unless the body is explicitly set by 276 {@link #requestBody(String)}, in which case they will be query string parameters.</p> 277 278 @param keyvals a set of key value pairs. 279 @return this Connection, for chaining 280 */ 281 Connection data(String... keyvals); 282 283 /** 284 * Get the data KeyVal for this key, if any 285 * @param key the data key 286 * @return null if not set 287 */ 288 @Nullable KeyVal data(String key); 289 290 /** 291 * Set a POST (or PUT) request body. Useful when a server expects a plain request body (such as JSON), and not a set 292 * of URL encoded form key/value pairs. E.g.: 293 * <code><pre>Jsoup.connect(url) 294 * .requestBody(json) 295 * .header("Content-Type", "application/json") 296 * .post();</pre></code> 297 * If any data key/vals are supplied, they will be sent as URL query params. 298 * @see #requestBodyStream(InputStream) 299 * @return this Request, for chaining 300 */ 301 Connection requestBody(String body); 302 303 /** 304 Set the request body. Useful for posting data such as byte arrays or files, and the server expects a single request 305 body (and not a multipart upload). E.g.: 306 <code><pre> Jsoup.connect(url) 307 .requestBody(new ByteArrayInputStream(bytes)) 308 .header("Content-Type", "application/octet-stream") 309 .post(); 310 </pre></code> 311 <p>Or, use a FileInputStream to data from disk.</p> 312 <p>You should close the stream in a finally block.</p> 313 314 @param stream the input stream to send. 315 @return this Request, for chaining 316 @see #requestBody(String) 317 @since 1.20.1 318 */ 319 default Connection requestBodyStream(InputStream stream) { 320 throw new UnsupportedOperationException(); 321 } 322 323 /** 324 * Set a request header. Replaces any existing header with the same case-insensitive name. 325 * @param name header name 326 * @param value header value 327 * @return this Connection, for chaining 328 * @see org.jsoup.Connection.Request#header(String, String) 329 * @see org.jsoup.Connection.Request#headers() 330 */ 331 Connection header(String name, String value); 332 333 /** 334 * Sets each of the supplied headers on the request. Existing headers with the same case-insensitive name will be 335 * replaced with the new value. 336 * @param headers map of headers name {@literal ->} value pairs 337 * @return this Connection, for chaining 338 * @see org.jsoup.Connection.Request#headers() 339 */ 340 Connection headers(Map<String,String> headers); 341 342 /** 343 * Set a cookie to be sent in the request. 344 * @param name name of cookie 345 * @param value value of cookie 346 * @return this Connection, for chaining 347 */ 348 Connection cookie(String name, String value); 349 350 /** 351 * Adds each of the supplied cookies to the request. 352 * @param cookies map of cookie name {@literal ->} value pairs 353 * @return this Connection, for chaining 354 */ 355 Connection cookies(Map<String, String> cookies); 356 357 /** 358 Provide a custom or pre-filled CookieStore to be used on requests made by this Connection. 359 @param cookieStore a cookie store to use for subsequent requests 360 @return this Connection, for chaining 361 @since 1.14.1 362 */ 363 Connection cookieStore(CookieStore cookieStore); 364 365 /** 366 Get the cookie store used by this Connection. 367 @return the cookie store 368 @since 1.14.1 369 */ 370 CookieStore cookieStore(); 371 372 /** 373 * Provide a specific parser to use when parsing the response to a Document. If not set, jsoup defaults to the 374 * {@link Parser#htmlParser() HTML parser}, unless the response content-type is XML, in which case the 375 * {@link Parser#xmlParser() XML parser} is used. 376 * @param parser alternate parser 377 * @return this Connection, for chaining 378 */ 379 Connection parser(Parser parser); 380 381 /** 382 * Set the character-set used to encode the request body. Defaults to {@code UTF-8}. 383 * @param charset character set to encode the request body 384 * @return this Connection, for chaining 385 */ 386 Connection postDataCharset(String charset); 387 388 /** 389 Set the authenticator to use for this connection, enabling requests to URLs, and via proxies, that require 390 authentication credentials. 391 <p>The authentication scheme used is automatically detected during the request execution. 392 Supported schemes (subject to the platform) are {@code basic}, {@code digest}, {@code NTLM}, 393 and {@code Kerberos}.</p> 394 395 <p>To use, supply a {@link RequestAuthenticator} function that: 396 <ol> 397 <li>validates the URL that is requesting authentication, and</li> 398 <li>returns the appropriate credentials (username and password)</li> 399 </ol> 400 </p> 401 402 <p>For example, to authenticate both to a proxy and a downstream web server: 403 <code><pre> 404 Connection session = Jsoup.newSession() 405 .proxy("proxy.example.com", 8080) 406 .auth(auth -> { 407 if (auth.isServer()) { // provide credentials for the request url 408 Validate.isTrue(auth.url().getHost().equals("example.com")); 409 // check that we're sending credentials were we expect, and not redirected out 410 return auth.credentials("username", "password"); 411 } else { // auth.isProxy() 412 return auth.credentials("proxy-user", "proxy-password"); 413 } 414 }); 415 416 Connection.Response response = session.newRequest("https://example.com/adminzone/").execute(); 417 </pre></code> 418 </p> 419 420 <p>The system may cache the authentication and use it for subsequent requests to the same resource.</p> 421 422 <p><b>Implementation notes</b></p> 423 <p>For compatibility, on a Java 8 platform, authentication is set up via the system-wide default 424 {@link java.net.Authenticator#setDefault(Authenticator)} method via a ThreadLocal delegator. Whilst the 425 authenticator used is request specific and thread-safe, if you have other calls to {@code setDefault}, they will be 426 incompatible with this implementation.</p> 427 <p>On Java 9 and above, the preceding note does not apply; authenticators are directly set on the request. </p> 428 <p>If you are attempting to authenticate to a proxy that uses the {@code basic} scheme and will be fetching HTTPS 429 URLs, you need to configure your Java platform to enable that, by setting the 430 {@code jdk.http.auth.tunneling.disabledSchemes} system property to {@code ""}. 431 This must be executed prior to any authorization attempts. E.g.: 432 <code><pre> 433 static { 434 System.setProperty("jdk.http.auth.tunneling.disabledSchemes", ""); 435 // removes Basic, which is otherwise excluded from auth for CONNECT tunnels 436 }</pre></code> 437 </p> 438 * @param authenticator the authenticator to use in this connection 439 * @return this Connection, for chaining 440 * @since 1.17.1 441 */ 442 default Connection auth(@Nullable RequestAuthenticator authenticator) { 443 throw new UnsupportedOperationException(); 444 } 445 446 /** 447 * Execute the request as a GET, and parse the result. 448 * @return parsed Document 449 * @throws java.net.MalformedURLException if the request URL is not an HTTP or HTTPS URL, or is otherwise malformed 450 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 451 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 452 * @throws java.net.SocketTimeoutException if the connection times out 453 * @throws IOException on error 454 */ 455 Document get() throws IOException; 456 457 /** 458 * Execute the request as a POST, and parse the result. 459 * @return parsed Document 460 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 461 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 462 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 463 * @throws java.net.SocketTimeoutException if the connection times out 464 * @throws IOException on error 465 */ 466 Document post() throws IOException; 467 468 /** 469 * Execute the request. 470 * @return the executed {@link Response} 471 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 472 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 473 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 474 * @throws java.net.SocketTimeoutException if the connection times out 475 * @throws IOException on error 476 */ 477 Response execute() throws IOException; 478 479 /** 480 * Get the request object associated with this connection 481 * @return request 482 */ 483 Request request(); 484 485 /** 486 * Set the connection's request 487 * @param request new request object 488 * @return this Connection, for chaining 489 */ 490 Connection request(Request request); 491 492 /** 493 * Get the response, once the request has been executed. 494 * @return response 495 * @throws IllegalArgumentException if called before the response has been executed. 496 */ 497 Response response(); 498 499 /** 500 * Set the connection's response 501 * @param response new response 502 * @return this Connection, for chaining 503 */ 504 Connection response(Response response); 505 506 /** 507 Set the response progress handler, which will be called periodically as the response body is downloaded. Since 508 documents are parsed as they are downloaded, this is also a good proxy for the parse progress. 509 <p>The Response object is supplied as the progress context, and may be read from to obtain headers etc.</p> 510 @param handler the progress handler 511 @return this Connection, for chaining 512 @since 1.18.1 513 */ 514 default Connection onResponseProgress(Progress<Response> handler) { 515 throw new UnsupportedOperationException(); 516 } 517 518 /** 519 * Common methods for Requests and Responses 520 * @param <T> Type of Base, either Request or Response 521 */ 522 @SuppressWarnings("UnusedReturnValue") 523 interface Base<T extends Base<T>> { 524 /** 525 * Get the URL of this Request or Response. For redirected responses, this will be the final destination URL. 526 * @return URL 527 * @throws IllegalArgumentException if called on a Request that was created without a URL. 528 */ 529 URL url(); 530 531 /** 532 * Set the URL 533 * @param url new URL 534 * @return this, for chaining 535 */ 536 T url(URL url); 537 538 /** 539 * Get the request method, which defaults to <code>GET</code> 540 * @return method 541 */ 542 Method method(); 543 544 /** 545 * Set the request method 546 * @param method new method 547 * @return this, for chaining 548 */ 549 T method(Method method); 550 551 /** 552 * Get the value of a header. If there is more than one header value with the same name, the headers are returned 553 * comma separated, per <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2">rfc2616-sec4</a>. 554 * <p> 555 * Header names are case-insensitive. 556 * </p> 557 * @param name name of header (case-insensitive) 558 * @return value of header, or null if not set. 559 * @see #hasHeader(String) 560 * @see #cookie(String) 561 */ 562 @Nullable String header(String name); 563 564 /** 565 * Get the values of a header. 566 * @param name header name, case-insensitive. 567 * @return a list of values for this header, or an empty list if not set. 568 */ 569 List<String> headers(String name); 570 571 /** 572 * Set a header. This method will overwrite any existing header with the same case-insensitive name. If there 573 * is more than one value for this header, this method will update the first matching header. 574 * <p>For compatibility, if the content of the header includes text that cannot be represented by ISO-8859-1, 575 * then it should be encoded first per <a href="https://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.</p> 576 * @param name Name of header 577 * @param value Value of header 578 * @return this, for chaining 579 * @see #addHeader(String, String) 580 */ 581 T header(String name, String value); 582 583 /** 584 * Add a header. The header will be added regardless of whether a header with the same name already exists. 585 * <p>For compatibility, if the content of the header includes text that cannot be represented by ISO-8859-1, 586 * then it should be encoded first per <a href="https://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.</p> 587 * @param name Name of new header 588 * @param value Value of new header 589 * @return this, for chaining 590 */ 591 T addHeader(String name, String value); 592 593 /** 594 * Check if a header is present 595 * @param name name of header (case-insensitive) 596 * @return if the header is present in this request/response 597 */ 598 boolean hasHeader(String name); 599 600 /** 601 * Check if a header is present, with the given value 602 * @param name header name (case-insensitive) 603 * @param value value (case-insensitive) 604 * @return if the header and value pair are set in this req/res 605 */ 606 boolean hasHeaderWithValue(String name, String value); 607 608 /** 609 * Remove headers by name. If there is more than one header with this name, they will all be removed. 610 * @param name name of header to remove (case-insensitive) 611 * @return this, for chaining 612 */ 613 T removeHeader(String name); 614 615 /** 616 * Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple 617 * values, only the first header is returned. 618 * <p>Note that this is a view of the headers only, and changes made to this map will not be reflected in the 619 * request/response object.</p> 620 * @return headers 621 * @see #multiHeaders() 622 623 */ 624 Map<String, String> headers(); 625 626 /** 627 * Retreive all of the headers, keyed by the header name, and with a list of values per header. 628 * @return a list of multiple values per header. 629 */ 630 Map<String, List<String>> multiHeaders(); 631 632 /** 633 * Get a cookie value by name from this request/response. 634 * @param name name of cookie to retrieve. 635 * @return value of cookie, or null if not set 636 */ 637 @Nullable String cookie(String name); 638 639 /** 640 * Set a cookie in this request/response. 641 * @param name name of cookie 642 * @param value value of cookie 643 * @return this, for chaining 644 */ 645 T cookie(String name, String value); 646 647 /** 648 * Check if a cookie is present 649 * @param name name of cookie 650 * @return if the cookie is present in this request/response 651 */ 652 boolean hasCookie(String name); 653 654 /** 655 * Remove a cookie by name 656 * @param name name of cookie to remove 657 * @return this, for chaining 658 */ 659 T removeCookie(String name); 660 661 /** 662 Retrieve the request/response cookies as a map. For response cookies, if duplicate cookie names were sent, the 663 last one set will be the one included. For session management, rather than using these response cookies, prefer 664 to use {@link Jsoup#newSession()} and related methods. 665 666 @return simple cookie map 667 @see #cookieStore() 668 */ 669 Map<String, String> cookies(); 670 } 671 672 /** 673 * Represents a HTTP request. 674 */ 675 @SuppressWarnings("UnusedReturnValue") 676 interface Request extends Base<Request> { 677 /** 678 * Get the proxy used for this request. 679 * @return the proxy; <code>null</code> if not enabled. 680 */ 681 @Nullable Proxy proxy(); 682 683 /** 684 * Update the proxy for this request. 685 * @param proxy the proxy ot use; <code>null</code> to disable. 686 * @return this Request, for chaining 687 */ 688 Request proxy(@Nullable Proxy proxy); 689 690 /** 691 * Set the HTTP proxy to use for this request. 692 * @param host the proxy hostname 693 * @param port the proxy port 694 * @return this Connection, for chaining 695 */ 696 Request proxy(String host, int port); 697 698 /** 699 * Get the request timeout, in milliseconds. 700 * @return the timeout in milliseconds. 701 */ 702 int timeout(); 703 704 /** 705 * Update the request timeout. 706 * @param millis timeout, in milliseconds 707 * @return this Request, for chaining 708 */ 709 Request timeout(int millis); 710 711 /** 712 * Get the maximum body size, in bytes. 713 * @return the maximum body size, in bytes. 714 */ 715 int maxBodySize(); 716 717 /** 718 * Update the maximum body size, in bytes. 719 * @param bytes maximum body size, in bytes. 720 * @return this Request, for chaining 721 */ 722 Request maxBodySize(int bytes); 723 724 /** 725 * Get the current followRedirects configuration. 726 * @return true if followRedirects is enabled. 727 */ 728 boolean followRedirects(); 729 730 /** 731 * Configures the request to (not) follow server redirects. By default this is <b>true</b>. 732 * @param followRedirects true if server redirects should be followed. 733 * @return this Request, for chaining 734 */ 735 Request followRedirects(boolean followRedirects); 736 737 /** 738 * Get the current ignoreHttpErrors configuration. 739 * @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be 740 * thrown. 741 */ 742 boolean ignoreHttpErrors(); 743 744 /** 745 * Configures the request to ignore HTTP errors in the response. 746 * @param ignoreHttpErrors set to true to ignore HTTP errors. 747 * @return this Request, for chaining 748 */ 749 Request ignoreHttpErrors(boolean ignoreHttpErrors); 750 751 /** 752 * Get the current ignoreContentType configuration. 753 * @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to 754 * be thrown. 755 */ 756 boolean ignoreContentType(); 757 758 /** 759 * Configures the request to ignore the Content-Type of the response. 760 * @param ignoreContentType set to true to ignore the content type. 761 * @return this Request, for chaining 762 */ 763 Request ignoreContentType(boolean ignoreContentType); 764 765 /** 766 * Get the current custom SSL socket factory, if any. 767 * @return custom SSL socket factory if set, null otherwise 768 */ 769 @Nullable SSLSocketFactory sslSocketFactory(); 770 771 /** 772 * Set a custom SSL socket factory. 773 * @param sslSocketFactory SSL socket factory 774 */ 775 void sslSocketFactory(SSLSocketFactory sslSocketFactory); 776 777 /** 778 * Add a data parameter to the request 779 * @param keyval data to add. 780 * @return this Request, for chaining 781 */ 782 Request data(KeyVal keyval); 783 784 /** 785 * Get all of the request's data parameters 786 * @return collection of keyvals 787 */ 788 Collection<KeyVal> data(); 789 790 /** 791 * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set of URL 792 * encoded form key/value pairs. E.g.: 793 * <code><pre>Jsoup.connect(url) 794 * .requestBody(json) 795 * .header("Content-Type", "application/json") 796 * .post();</pre></code> 797 * <p>If any data key/vals are supplied, they will be sent as URL query params.</p> 798 * @param body to use as the request body. Set to null to clear a previously set body. 799 * @return this Request, for chaining 800 * @see #requestBodyStream(InputStream) 801 */ 802 Request requestBody(@Nullable String body); 803 804 /** 805 * Get the current request body. 806 * @return null if not set. 807 */ 808 @Nullable String requestBody(); 809 810 /** 811 Set the request body. Useful for posting data such as byte arrays or files, and the server expects a single 812 request body (and not a multipart upload). E.g.: 813 <code><pre> Jsoup.connect(url) 814 .requestBody(new ByteArrayInputStream(bytes)) 815 .header("Content-Type", "application/octet-stream") 816 .post(); 817 </pre></code> 818 <p>Or, use a FileInputStream to data from disk.</p> 819 <p>You should close the stream in a finally block.</p> 820 821 @param stream the input stream to send. 822 @return this Request, for chaining 823 @see #requestBody(String) 824 @since 1.20.1 825 */ 826 default Request requestBodyStream(InputStream stream) { 827 throw new UnsupportedOperationException(); 828 } 829 830 /** 831 * Specify the parser to use when parsing the document. 832 * @param parser parser to use. 833 * @return this Request, for chaining 834 */ 835 Request parser(Parser parser); 836 837 /** 838 * Get the current parser to use when parsing the document. 839 * @return current Parser 840 */ 841 Parser parser(); 842 843 /** 844 * Sets the post data character set for x-www-form-urlencoded post data 845 * @param charset character set to encode post data 846 * @return this Request, for chaining 847 */ 848 Request postDataCharset(String charset); 849 850 /** 851 * Gets the post data character set for x-www-form-urlencoded post data 852 * @return character set to encode post data 853 */ 854 String postDataCharset(); 855 856 /** 857 Set the authenticator to use for this request. 858 See {@link Connection#auth(RequestAuthenticator) Connection.auth(authenticator)} for examples and 859 implementation notes. 860 * @param authenticator the authenticator 861 * @return this Request, for chaining. 862 * @since 1.17.1 863 */ 864 default Request auth(@Nullable RequestAuthenticator authenticator) { 865 throw new UnsupportedOperationException(); 866 } 867 868 /** 869 Get the RequestAuthenticator, if any, that will be used on this request. 870 * @return the RequestAuthenticator, or {@code null} if not set 871 * @since 1.17.1 872 */ 873 @Nullable 874 default RequestAuthenticator auth() { 875 throw new UnsupportedOperationException(); 876 } 877 } 878 879 /** 880 * Represents a HTTP response. 881 */ 882 interface Response extends Base<Response> { 883 884 /** 885 * Get the status code of the response. 886 * @return status code 887 */ 888 int statusCode(); 889 890 /** 891 * Get the status message of the response. 892 * @return status message 893 */ 894 String statusMessage(); 895 896 /** 897 * Get the character set name of the response, derived from the content-type header. 898 * @return character set name if set, <b>null</b> if not 899 */ 900 @Nullable String charset(); 901 902 /** 903 * Set / override the response character set. When the document body is parsed it will be with this charset. 904 * @param charset to decode body as 905 * @return this Response, for chaining 906 */ 907 Response charset(String charset); 908 909 /** 910 * Get the response content type (e.g. "text/html"); 911 * @return the response content type, or <b>null</b> if one was not set 912 */ 913 @Nullable String contentType(); 914 915 /** 916 Read and parse the body of the response as a Document. If you intend to parse the same response multiple times, 917 you should {@link #readFully()} first, which will buffer the body into memory. 918 919 @return a parsed Document 920 @throws IOException if an IO exception occurs whilst reading the body. 921 @see #readFully() 922 */ 923 Document parse() throws IOException; 924 925 /** 926 Read the response body, and returns it as a plain String. 927 928 @return body 929 @throws IOException if an IO exception occurs whilst reading the body. 930 @since 1.21.1 931 */ 932 default String readBody() throws IOException { 933 throw new UnsupportedOperationException(); 934 } 935 936 /** 937 Get the body of the response as a plain String. 938 939 <p>Will throw an UncheckedIOException if the body has not been buffered and an error occurs whilst reading the 940 body; use {@link #readFully()} first to buffer the body and catch any exceptions explicitly. Or more simply, 941 {@link #readBody()}.</p> 942 943 @return body 944 @throws UncheckedIOException if an IO exception occurs whilst reading the body. 945 @see #readBody() 946 @see #readFully() 947 */ 948 String body(); 949 950 /** 951 Get the body of the response as an array of bytes. 952 953 <p>Will throw an UncheckedIOException if the body has not been buffered and an error occurs whilst reading the 954 body; use {@link #readFully()} first to buffer the body and catch any exceptions explicitly.</p> 955 956 @return body bytes 957 @throws UncheckedIOException if an IO exception occurs whilst reading the body. 958 @see #readFully() 959 */ 960 byte[] bodyAsBytes(); 961 962 /** 963 Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the same 964 connection response. Otherwise, once the response is read, its InputStream will have been drained and may not be 965 re-read. 966 967 <p>Subsequent calls methods than consume the body, such as {@link #parse()}, {@link #body()}, 968 {@link #bodyAsBytes()}, will not need to read the body again, and will not throw exceptions.</p> 969 <p>Calling {@link #readBody()}} has the same effect.</p> 970 971 @return this response, for chaining 972 @throws IOException if an IO exception occurs during buffering. 973 @since 1.21.1 974 */ 975 default Response readFully() throws IOException { 976 throw new UnsupportedOperationException(); 977 } 978 979 /** 980 * Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the 981 * same connection response. Otherwise, once the response is read, its InputStream will have been drained and 982 * may not be re-read. 983 * <p>Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect.</p> 984 * @return this response, for chaining 985 * @throws UncheckedIOException if an IO exception occurs during buffering. 986 * @deprecated use {@link #readFully()} instead (for the checked exception). Will be removed in a future version. 987 */ 988 @Deprecated 989 Response bufferUp(); 990 991 /** 992 Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done 993 with it. 994 <p>Other body methods (like readFully, body, parse, etc) will generally not work in conjunction with this method, 995 as it consumes the InputStream.</p> 996 <p>Any configured max size or maximum read timeout applied to the connection will not be applied to this stream, 997 unless {@link #readFully()} is called prior.</p> 998 <p>This method is useful for writing large responses to disk, without buffering them completely into memory 999 first.</p> 1000 @return the response body input stream 1001 */ 1002 BufferedInputStream bodyStream(); 1003 1004 /** 1005 Returns a {@link StreamParser} that will parse the Response progressively. 1006 * @return a StreamParser, prepared to parse this response. 1007 * @throws IOException if an IO exception occurs preparing the parser. 1008 */ 1009 default StreamParser streamParser() throws IOException { 1010 throw new UnsupportedOperationException(); 1011 } 1012 } 1013 1014 /** 1015 * A Key:Value tuple(+), used for form data. 1016 */ 1017 interface KeyVal { 1018 1019 /** 1020 * Update the key of a keyval 1021 * @param key new key 1022 * @return this KeyVal, for chaining 1023 */ 1024 KeyVal key(String key); 1025 1026 /** 1027 * Get the key of a keyval 1028 * @return the key 1029 */ 1030 String key(); 1031 1032 /** 1033 * Update the value of a keyval 1034 * @param value the new value 1035 * @return this KeyVal, for chaining 1036 */ 1037 KeyVal value(String value); 1038 1039 /** 1040 * Get the value of a keyval 1041 * @return the value 1042 */ 1043 String value(); 1044 1045 /** 1046 * Add or update an input stream to this keyVal 1047 * @param inputStream new input stream 1048 * @return this KeyVal, for chaining 1049 */ 1050 KeyVal inputStream(InputStream inputStream); 1051 1052 /** 1053 * Get the input stream associated with this keyval, if any 1054 * @return input stream if set, or null 1055 */ 1056 @Nullable InputStream inputStream(); 1057 1058 /** 1059 * Does this keyval have an input stream? 1060 * @return true if this keyval does indeed have an input stream 1061 */ 1062 boolean hasInputStream(); 1063 1064 /** 1065 * Set the Content Type header used in the MIME body (aka mimetype) when uploading files. 1066 * Only useful if {@link #inputStream(InputStream)} is set. 1067 * <p>Will default to {@code application/octet-stream}.</p> 1068 * @param contentType the new content type 1069 * @return this KeyVal 1070 */ 1071 KeyVal contentType(String contentType); 1072 1073 /** 1074 * Get the current Content Type, or {@code null} if not set. 1075 * @return the current Content Type. 1076 */ 1077 @Nullable String contentType(); 1078 } 1079}