001package org.jsoup.helper;
002
003import org.jsoup.Connection;
004import org.jsoup.HttpStatusException;
005import org.jsoup.UncheckedIOException;
006import org.jsoup.UnsupportedMimeTypeException;
007import org.jsoup.internal.ControllableInputStream;
008import org.jsoup.internal.SharedConstants;
009import org.jsoup.internal.StringUtil;
010import org.jsoup.nodes.Document;
011import org.jsoup.parser.Parser;
012import org.jsoup.parser.TokenQueue;
013import org.jspecify.annotations.Nullable;
014
015import javax.net.ssl.HttpsURLConnection;
016import javax.net.ssl.SSLSocketFactory;
017import java.io.BufferedInputStream;
018import java.io.BufferedWriter;
019import java.io.ByteArrayInputStream;
020import java.io.IOException;
021import java.io.InputStream;
022import java.io.OutputStream;
023import java.io.OutputStreamWriter;
024import java.net.CookieManager;
025import java.net.CookieStore;
026import java.net.HttpURLConnection;
027import java.net.InetSocketAddress;
028import java.net.MalformedURLException;
029import java.net.Proxy;
030import java.net.URL;
031import java.net.URLEncoder;
032import java.nio.Buffer;
033import java.nio.ByteBuffer;
034import java.nio.charset.Charset;
035import java.nio.charset.IllegalCharsetNameException;
036import java.util.ArrayList;
037import java.util.Collection;
038import java.util.Collections;
039import java.util.LinkedHashMap;
040import java.util.List;
041import java.util.Map;
042import java.util.regex.Pattern;
043import java.util.zip.GZIPInputStream;
044import java.util.zip.Inflater;
045import java.util.zip.InflaterInputStream;
046
047import static org.jsoup.Connection.Method.HEAD;
048import static org.jsoup.helper.DataUtil.UTF_8;
049import static org.jsoup.internal.Normalizer.lowerCase;
050
051/**
052 * Implementation of {@link Connection}.
053 * @see org.jsoup.Jsoup#connect(String)
054 */
055@SuppressWarnings("CharsetObjectCanBeUsed")
056public class HttpConnection implements Connection {
057    public static final String CONTENT_ENCODING = "Content-Encoding";
058    /**
059     * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop
060     * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA.
061     */
062    public static final String DEFAULT_UA =
063        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36";
064    private static final String USER_AGENT = "User-Agent";
065    public static final String CONTENT_TYPE = "Content-Type";
066    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
067    public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded";
068    private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set.
069    private static final String DefaultUploadType = "application/octet-stream";
070    private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
071
072    /**
073     Create a new Connection, with the request URL specified.
074     @param url the URL to fetch from
075     @return a new Connection object
076     */
077    public static Connection connect(String url) {
078        Connection con = new HttpConnection();
079        con.url(url);
080        return con;
081    }
082
083    /**
084     Create a new Connection, with the request URL specified.
085     @param url the URL to fetch from
086     @return a new Connection object
087     */
088    public static Connection connect(URL url) {
089        Connection con = new HttpConnection();
090        con.url(url);
091        return con;
092    }
093
094    /**
095     Create a new, empty HttpConnection.
096     */
097    public HttpConnection() {
098        req = new Request();
099    }
100
101    /**
102     Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not
103     copied. All other settings (proxy, parser, cookies, etc) are copied.
104     @param copy the request to copy
105     */
106    HttpConnection(Request copy) {
107        req = new Request(copy);
108    }
109
110    private static String encodeMimeName(String val) {
111        return val.replace("\"", "%22");
112    }
113
114    private HttpConnection.Request req;
115    private Connection.@Nullable Response res;
116
117    @Override
118    public Connection newRequest() {
119        // copy the prototype request for the different settings, cookie manager, etc
120        return new HttpConnection(req);
121    }
122
123    /** Create a new Connection that just wraps the provided Request and Response */
124    private HttpConnection(Request req, Response res) {
125        this.req = req;
126        this.res = res;
127    }
128
129    @Override
130    public Connection url(URL url) {
131        req.url(url);
132        return this;
133    }
134
135    @Override
136    public Connection url(String url) {
137        Validate.notEmptyParam(url, "url");
138        try {
139            req.url(new URL(url));
140        } catch (MalformedURLException e) {
141            throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e);
142        }
143        return this;
144    }
145
146    @Override
147    public Connection proxy(@Nullable Proxy proxy) {
148        req.proxy(proxy);
149        return this;
150    }
151
152    @Override
153    public Connection proxy(String host, int port) {
154        req.proxy(host, port);
155        return this;
156    }
157
158    @Override
159    public Connection userAgent(String userAgent) {
160        Validate.notNullParam(userAgent, "userAgent");
161        req.header(USER_AGENT, userAgent);
162        return this;
163    }
164
165    @Override
166    public Connection timeout(int millis) {
167        req.timeout(millis);
168        return this;
169    }
170
171    @Override
172    public Connection maxBodySize(int bytes) {
173        req.maxBodySize(bytes);
174        return this;
175    }
176
177    @Override
178    public Connection followRedirects(boolean followRedirects) {
179        req.followRedirects(followRedirects);
180        return this;
181    }
182
183    @Override
184    public Connection referrer(String referrer) {
185        Validate.notNullParam(referrer, "referrer");
186        req.header("Referer", referrer);
187        return this;
188    }
189
190    @Override
191    public Connection method(Method method) {
192        req.method(method);
193        return this;
194    }
195
196    @Override
197    public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
198                req.ignoreHttpErrors(ignoreHttpErrors);
199                return this;
200        }
201
202    @Override
203    public Connection ignoreContentType(boolean ignoreContentType) {
204        req.ignoreContentType(ignoreContentType);
205        return this;
206    }
207
208    @Override
209    public Connection data(String key, String value) {
210        req.data(KeyVal.create(key, value));
211        return this;
212    }
213
214    @Override
215    public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) {
216            req.sslSocketFactory(sslSocketFactory);
217            return this;
218    }
219
220    @Override
221    public Connection data(String key, String filename, InputStream inputStream) {
222        req.data(KeyVal.create(key, filename, inputStream));
223        return this;
224    }
225
226    @Override
227    public Connection data(String key, String filename, InputStream inputStream, String contentType) {
228        req.data(KeyVal.create(key, filename, inputStream).contentType(contentType));
229        return this;
230    }
231
232    @Override
233    public Connection data(Map<String, String> data) {
234        Validate.notNullParam(data, "data");
235        for (Map.Entry<String, String> entry : data.entrySet()) {
236            req.data(KeyVal.create(entry.getKey(), entry.getValue()));
237        }
238        return this;
239    }
240
241    @Override
242    public Connection data(String... keyvals) {
243        Validate.notNullParam(keyvals, "keyvals");
244        Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
245        for (int i = 0; i < keyvals.length; i += 2) {
246            String key = keyvals[i];
247            String value = keyvals[i+1];
248            Validate.notEmpty(key, "Data key must not be empty");
249            Validate.notNull(value, "Data value must not be null");
250            req.data(KeyVal.create(key, value));
251        }
252        return this;
253    }
254
255    @Override
256    public Connection data(Collection<Connection.KeyVal> data) {
257        Validate.notNullParam(data, "data");
258        for (Connection.KeyVal entry: data) {
259            req.data(entry);
260        }
261        return this;
262    }
263
264    @Override
265    public Connection.KeyVal data(String key) {
266        Validate.notEmptyParam(key, "key");
267        for (Connection.KeyVal keyVal : request().data()) {
268            if (keyVal.key().equals(key))
269                return keyVal;
270        }
271        return null;
272    }
273
274    @Override
275    public Connection requestBody(String body) {
276        req.requestBody(body);
277        return this;
278    }
279
280    @Override
281    public Connection header(String name, String value) {
282        req.header(name, value);
283        return this;
284    }
285
286    @Override
287    public Connection headers(Map<String,String> headers) {
288        Validate.notNullParam(headers, "headers");
289        for (Map.Entry<String,String> entry : headers.entrySet()) {
290            req.header(entry.getKey(),entry.getValue());
291        }
292        return this;
293    }
294
295    @Override
296    public Connection cookie(String name, String value) {
297        req.cookie(name, value);
298        return this;
299    }
300
301    @Override
302    public Connection cookies(Map<String, String> cookies) {
303        Validate.notNullParam(cookies, "cookies");
304        for (Map.Entry<String, String> entry : cookies.entrySet()) {
305            req.cookie(entry.getKey(), entry.getValue());
306        }
307        return this;
308    }
309
310    @Override
311    public Connection cookieStore(CookieStore cookieStore) {
312        // create a new cookie manager using the new store
313        req.cookieManager = new CookieManager(cookieStore, null);
314        return this;
315    }
316
317    @Override
318    public CookieStore cookieStore() {
319        return req.cookieManager.getCookieStore();
320    }
321
322    @Override
323    public Connection parser(Parser parser) {
324        req.parser(parser);
325        return this;
326    }
327
328    @Override
329    public Document get() throws IOException {
330        req.method(Method.GET);
331        execute();
332        Validate.notNull(res);
333        return res.parse();
334    }
335
336    @Override
337    public Document post() throws IOException {
338        req.method(Method.POST);
339        execute();
340        Validate.notNull(res);
341        return res.parse();
342    }
343
344    @Override
345    public Connection.Response execute() throws IOException {
346        res = Response.execute(req);
347        return res;
348    }
349
350    @Override
351    public Connection.Request request() {
352        return req;
353    }
354
355    @Override
356    public Connection request(Connection.Request request) {
357        req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired
358        return this;
359    }
360
361    @Override
362    public Connection.Response response() {
363        if (res == null) {
364            throw new IllegalArgumentException("You must execute the request before getting a response.");
365        }
366        return res;
367    }
368
369    @Override
370    public Connection response(Connection.Response response) {
371        res = response;
372        return this;
373    }
374
375    @Override
376    public Connection postDataCharset(String charset) {
377        req.postDataCharset(charset);
378        return this;
379    }
380
381    @Override public Connection auth(RequestAuthenticator authenticator) {
382        req.auth(authenticator);
383        return this;
384    }
385
386    @SuppressWarnings("unchecked")
387    private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> {
388        private static final URL UnsetUrl; // only used if you created a new Request()
389        static {
390            try {
391                UnsetUrl = new URL("http://undefined/");
392            } catch (MalformedURLException e) {
393                throw new IllegalStateException(e);
394            }
395        }
396
397        URL url = UnsetUrl;
398        Method method = Method.GET;
399        Map<String, List<String>> headers;
400        Map<String, String> cookies;
401
402        private Base() {
403            headers = new LinkedHashMap<>();
404            cookies = new LinkedHashMap<>();
405        }
406
407        private Base(Base<T> copy) {
408            url = copy.url; // unmodifiable object
409            method = copy.method;
410            headers = new LinkedHashMap<>();
411            for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) {
412                headers.put(entry.getKey(), new ArrayList<>(entry.getValue()));
413            }
414            cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings
415        }
416
417        @Override
418        public URL url() {
419            if (url == UnsetUrl)
420                throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request.");
421            return url;
422        }
423
424        @Override
425        public T url(URL url) {
426            Validate.notNullParam(url, "url");
427            this.url = new UrlBuilder(url).build();
428            return (T) this;
429        }
430
431        @Override
432        public Method method() {
433            return method;
434        }
435
436        @Override
437        public T method(Method method) {
438            Validate.notNullParam(method, "method");
439            this.method = method;
440            return (T) this;
441        }
442
443        @Override
444        public String header(String name) {
445            Validate.notNullParam(name, "name");
446            List<String> vals = getHeadersCaseInsensitive(name);
447            if (vals.size() > 0) {
448                // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
449                return StringUtil.join(vals, ", ");
450            }
451
452            return null;
453        }
454
455        @Override
456        public T addHeader(String name, @Nullable String value) {
457            Validate.notEmptyParam(name, "name");
458            //noinspection ConstantConditions
459            value = value == null ? "" : value;
460
461            List<String> values = headers(name);
462            if (values.isEmpty()) {
463                values = new ArrayList<>();
464                headers.put(name, values);
465            }
466            values.add(value);
467
468            return (T) this;
469        }
470
471        @Override
472        public List<String> headers(String name) {
473            Validate.notEmptyParam(name, "name");
474            return getHeadersCaseInsensitive(name);
475        }
476
477        @Override
478        public T header(String name, String value) {
479            Validate.notEmptyParam(name, "name");
480            removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
481            addHeader(name, value);
482            return (T) this;
483        }
484
485        @Override
486        public boolean hasHeader(String name) {
487            Validate.notEmptyParam(name, "name");
488            return !getHeadersCaseInsensitive(name).isEmpty();
489        }
490
491        /**
492         * Test if the request has a header with this value (case insensitive).
493         */
494        @Override
495        public boolean hasHeaderWithValue(String name, String value) {
496            Validate.notEmpty(name);
497            Validate.notEmpty(value);
498            List<String> values = headers(name);
499            for (String candidate : values) {
500                if (value.equalsIgnoreCase(candidate))
501                    return true;
502            }
503            return false;
504        }
505
506        @Override
507        public T removeHeader(String name) {
508            Validate.notEmptyParam(name, "name");
509            Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too
510            if (entry != null)
511                headers.remove(entry.getKey()); // ensures correct case
512            return (T) this;
513        }
514
515        @Override
516        public Map<String, String> headers() {
517            LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size());
518            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
519                String header = entry.getKey();
520                List<String> values = entry.getValue();
521                if (values.size() > 0)
522                    map.put(header, values.get(0));
523            }
524            return map;
525        }
526
527        @Override
528        public Map<String, List<String>> multiHeaders() {
529            return headers;
530        }
531
532        private List<String> getHeadersCaseInsensitive(String name) {
533            Validate.notNull(name);
534
535            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
536                if (name.equalsIgnoreCase(entry.getKey()))
537                    return entry.getValue();
538            }
539
540            return Collections.emptyList();
541        }
542
543        private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) {
544            String lc = lowerCase(name);
545            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
546                if (lowerCase(entry.getKey()).equals(lc))
547                    return entry;
548            }
549            return null;
550        }
551
552        @Override
553        public String cookie(String name) {
554            Validate.notEmptyParam(name, "name");
555            return cookies.get(name);
556        }
557
558        @Override
559        public T cookie(String name, String value) {
560            Validate.notEmptyParam(name, "name");
561            Validate.notNullParam(value, "value");
562            cookies.put(name, value);
563            return (T) this;
564        }
565
566        @Override
567        public boolean hasCookie(String name) {
568            Validate.notEmptyParam(name, "name");
569            return cookies.containsKey(name);
570        }
571
572        @Override
573        public T removeCookie(String name) {
574            Validate.notEmptyParam(name, "name");
575            cookies.remove(name);
576            return (T) this;
577        }
578
579        @Override
580        public Map<String, String> cookies() {
581            return cookies;
582        }
583    }
584
585    public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request {
586        static {
587            System.setProperty("sun.net.http.allowRestrictedHeaders", "true");
588            // make sure that we can send Sec-Fetch-Site headers etc.
589        }
590
591        private @Nullable Proxy proxy;
592        private int timeoutMilliseconds;
593        private int maxBodySizeBytes;
594        private boolean followRedirects;
595        private final Collection<Connection.KeyVal> data;
596        private @Nullable String body = null;
597        private boolean ignoreHttpErrors = false;
598        private boolean ignoreContentType = false;
599        private Parser parser;
600        private boolean parserDefined = false; // called parser(...) vs initialized in ctor
601        private String postDataCharset = DataUtil.defaultCharsetName;
602        private @Nullable SSLSocketFactory sslSocketFactory;
603        private CookieManager cookieManager;
604        private @Nullable RequestAuthenticator authenticator;
605        private volatile boolean executing = false;
606
607        Request() {
608            super();
609            timeoutMilliseconds = 30000; // 30 seconds
610            maxBodySizeBytes = 1024 * 1024 * 2; // 2MB
611            followRedirects = true;
612            data = new ArrayList<>();
613            method = Method.GET;
614            addHeader("Accept-Encoding", "gzip");
615            addHeader(USER_AGENT, DEFAULT_UA);
616            parser = Parser.htmlParser();
617            cookieManager = new CookieManager(); // creates a default InMemoryCookieStore
618        }
619
620        Request(Request copy) {
621            super(copy);
622            proxy = copy.proxy;
623            postDataCharset = copy.postDataCharset;
624            timeoutMilliseconds = copy.timeoutMilliseconds;
625            maxBodySizeBytes = copy.maxBodySizeBytes;
626            followRedirects = copy.followRedirects;
627            data = new ArrayList<>(); // data not copied
628            //body not copied
629            ignoreHttpErrors = copy.ignoreHttpErrors;
630            ignoreContentType = copy.ignoreContentType;
631            parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy
632            parserDefined = copy.parserDefined;
633            sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
634            cookieManager = copy.cookieManager;
635            authenticator = copy.authenticator;
636            executing = false;
637        }
638
639        @Override
640        public Proxy proxy() {
641            return proxy;
642        }
643
644        @Override
645        public Request proxy(@Nullable Proxy proxy) {
646            this.proxy = proxy;
647            return this;
648        }
649
650        @Override
651        public Request proxy(String host, int port) {
652            this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port));
653            return this;
654        }
655
656        @Override
657        public int timeout() {
658            return timeoutMilliseconds;
659        }
660
661        @Override
662        public Request timeout(int millis) {
663            Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
664            timeoutMilliseconds = millis;
665            return this;
666        }
667
668        @Override
669        public int maxBodySize() {
670            return maxBodySizeBytes;
671        }
672
673        @Override
674        public Connection.Request maxBodySize(int bytes) {
675            Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger");
676            maxBodySizeBytes = bytes;
677            return this;
678        }
679
680        @Override
681        public boolean followRedirects() {
682            return followRedirects;
683        }
684
685        @Override
686        public Connection.Request followRedirects(boolean followRedirects) {
687            this.followRedirects = followRedirects;
688            return this;
689        }
690
691        @Override
692        public boolean ignoreHttpErrors() {
693            return ignoreHttpErrors;
694        }
695
696        @Override
697        public SSLSocketFactory sslSocketFactory() {
698            return sslSocketFactory;
699        }
700
701        @Override
702        public void sslSocketFactory(SSLSocketFactory sslSocketFactory) {
703            this.sslSocketFactory = sslSocketFactory;
704        }
705
706        @Override
707        public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
708            this.ignoreHttpErrors = ignoreHttpErrors;
709            return this;
710        }
711
712        @Override
713        public boolean ignoreContentType() {
714            return ignoreContentType;
715        }
716
717        @Override
718        public Connection.Request ignoreContentType(boolean ignoreContentType) {
719            this.ignoreContentType = ignoreContentType;
720            return this;
721        }
722
723        @Override
724        public Request data(Connection.KeyVal keyval) {
725            Validate.notNullParam(keyval, "keyval");
726            data.add(keyval);
727            return this;
728        }
729
730        @Override
731        public Collection<Connection.KeyVal> data() {
732            return data;
733        }
734
735        @Override
736        public Connection.Request requestBody(@Nullable String body) {
737            this.body = body;
738            return this;
739        }
740
741        @Override
742        public String requestBody() {
743            return body;
744        }
745
746        @Override
747        public Request parser(Parser parser) {
748            this.parser = parser;
749            parserDefined = true;
750            return this;
751        }
752
753        @Override
754        public Parser parser() {
755            return parser;
756        }
757
758        @Override
759        public Connection.Request postDataCharset(String charset) {
760            Validate.notNullParam(charset, "charset");
761            if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset);
762            this.postDataCharset = charset;
763            return this;
764        }
765
766        @Override
767        public String postDataCharset() {
768            return postDataCharset;
769        }
770
771        CookieManager cookieManager() {
772            return cookieManager;
773        }
774
775        @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
776            this.authenticator = authenticator;
777            return this;
778        }
779
780        @Override @Nullable public RequestAuthenticator auth() {
781            return authenticator;
782        }
783    }
784
785    public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response {
786        private static final int MAX_REDIRECTS = 20;
787        private static final String LOCATION = "Location";
788        private final int statusCode;
789        private final String statusMessage;
790        private @Nullable ByteBuffer byteData;
791        private @Nullable ControllableInputStream bodyStream;
792        private @Nullable HttpURLConnection conn;
793        private @Nullable String charset;
794        private @Nullable final String contentType;
795        private boolean executed = false;
796        private boolean inputStreamRead = false;
797        private int numRedirects = 0;
798        private final HttpConnection.Request req;
799
800        /*
801         * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc)
802         */
803        private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*");
804
805        /**
806         <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses
807         are created from the HttpURLConnection and fields defined.
808         */
809        Response() {
810            super();
811            statusCode = 400;
812            statusMessage = "Request not made";
813            req = new Request();
814            contentType = null;
815        }
816
817        static Response execute(HttpConnection.Request req) throws IOException {
818            return execute(req, null);
819        }
820
821        static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException {
822            synchronized (req) {
823                Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads.");
824                req.executing = true;
825            }
826            Validate.notNullParam(req, "req");
827            URL url = req.url();
828            Validate.notNull(url, "URL must be specified to connect");
829            String protocol = url.getProtocol();
830            if (!protocol.equals("http") && !protocol.equals("https"))
831                throw new MalformedURLException("Only http & https protocols supported");
832            final boolean methodHasBody = req.method().hasBody();
833            final boolean hasRequestBody = req.requestBody() != null;
834            if (!methodHasBody)
835                Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method());
836
837            // set up the request for execution
838            String mimeBoundary = null;
839            if (req.data().size() > 0 && (!methodHasBody || hasRequestBody))
840                serialiseRequestUrl(req);
841            else if (methodHasBody)
842                mimeBoundary = setOutputContentType(req);
843
844            long startTime = System.nanoTime();
845            HttpURLConnection conn = createConnection(req);
846            Response res = null;
847            try {
848                conn.connect();
849                if (conn.getDoOutput()) {
850                    OutputStream out = conn.getOutputStream();
851                    try { writePost(req, out, mimeBoundary); }
852                    catch (IOException e) { conn.disconnect(); throw e; }
853                    finally { out.close(); }
854                }
855
856                int status = conn.getResponseCode();
857                res = new Response(conn, req, previousResponse);
858
859                // redirect if there's a location header (from 3xx, or 201 etc)
860                if (res.hasHeader(LOCATION) && req.followRedirects()) {
861                    if (status != HTTP_TEMP_REDIR) {
862                        req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
863                        req.data().clear();
864                        req.requestBody(null);
865                        req.removeHeader(CONTENT_TYPE);
866                    }
867
868                    String location = res.header(LOCATION);
869                    Validate.notNull(location);
870                    if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php
871                        location = location.substring(6);
872                    URL redir = StringUtil.resolve(req.url(), location);
873                    req.url(redir);
874
875                    req.executing = false;
876                    return execute(req, res);
877                }
878                if ((status < 200 || status >= 400) && !req.ignoreHttpErrors())
879                        throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString());
880
881                // check that we can handle the returned content type; if not, abort before fetching it
882                String contentType = res.contentType();
883                if (contentType != null
884                        && !req.ignoreContentType()
885                        && !contentType.startsWith("text/")
886                        && !xmlContentTypeRxp.matcher(contentType).matches()
887                        )
888                    throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml",
889                            contentType, req.url().toString());
890
891                // switch to the XML parser if content type is xml and not parser not explicitly set
892                if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) {
893                    if (!req.parserDefined) req.parser(Parser.xmlParser());
894                }
895
896                res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
897                if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
898                    InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
899                    if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
900                        stream = new GZIPInputStream(stream);
901                    else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate"))
902                        stream = new InflaterInputStream(stream, new Inflater(true));
903                    
904                    res.bodyStream = ControllableInputStream.wrap(
905                        stream, SharedConstants.DefaultBufferSize, req.maxBodySize())
906                        .timeout(startTime, req.timeout());
907                } else {
908                    res.byteData = DataUtil.emptyByteBuffer();
909                }
910            } catch (IOException e) {
911                if (res != null) res.safeClose(); // will be non-null if got to conn
912                throw e;
913            } finally {
914                req.executing = false;
915
916                // detach any thread local auth delegate
917                if (req.authenticator != null)
918                    AuthenticationHandler.handler.remove();
919            }
920
921            res.executed = true;
922            return res;
923        }
924
925        @Override
926        public int statusCode() {
927            return statusCode;
928        }
929
930        @Override
931        public String statusMessage() {
932            return statusMessage;
933        }
934
935        @Override
936        public String charset() {
937            return charset;
938        }
939
940        @Override
941        public Response charset(String charset) {
942            this.charset = charset;
943            return this;
944        }
945
946        @Override
947        public String contentType() {
948            return contentType;
949        }
950
951        public Document parse() throws IOException {
952            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
953            InputStream stream = bodyStream;
954            if (byteData != null) { // bytes have been read in to the buffer, parse that
955                stream = new ByteArrayInputStream(byteData.array());
956                inputStreamRead = false; // ok to reparse if in bytes
957            }
958            Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
959            Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser());
960            doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req?
961            charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
962            inputStreamRead = true;
963            safeClose();
964            return doc;
965        }
966
967        private void prepareByteData() {
968            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
969            if (bodyStream != null && byteData == null) {
970                Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())");
971                try {
972                    byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize());
973                } catch (IOException e) {
974                    throw new UncheckedIOException(e);
975                } finally {
976                    inputStreamRead = true;
977                    safeClose();
978                }
979            }
980        }
981
982        @Override
983        public String body() {
984            prepareByteData();
985            Validate.notNull(byteData);
986            // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
987            String body = (charset == null ? UTF_8 : Charset.forName(charset))
988                .decode(byteData).toString();
989            ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9
990            return body;
991        }
992
993        @Override
994        public byte[] bodyAsBytes() {
995            prepareByteData();
996            Validate.notNull(byteData);
997            return byteData.array();
998        }
999
1000        @Override
1001        public Connection.Response bufferUp() {
1002            prepareByteData();
1003            return this;
1004        }
1005
1006        @Override
1007        public BufferedInputStream bodyStream() {
1008            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1009
1010            // if we have read to bytes (via buffer up), return those as a stream.
1011            if (byteData != null) {
1012                return new BufferedInputStream(new ByteArrayInputStream(byteData.array()), SharedConstants.DefaultBufferSize);
1013            }
1014
1015            Validate.isFalse(inputStreamRead, "Request has already been read");
1016            Validate.notNull(bodyStream);
1017            inputStreamRead = true;
1018            return bodyStream.inputStream();
1019        }
1020
1021        // set up connection defaults, and details from request
1022        private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException {
1023            Proxy proxy = req.proxy();
1024            final HttpURLConnection conn = (HttpURLConnection) (
1025                proxy == null ?
1026                req.url().openConnection() :
1027                req.url().openConnection(proxy)
1028            );
1029
1030            conn.setRequestMethod(req.method().name());
1031            conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
1032            conn.setConnectTimeout(req.timeout());
1033            conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read
1034
1035            if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection)
1036                ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory());
1037            if (req.authenticator != null)
1038                AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally
1039            if (req.method().hasBody())
1040                conn.setDoOutput(true);
1041            CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store
1042            for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) {
1043                for (String value : header.getValue()) {
1044                    conn.addRequestProperty(header.getKey(), value);
1045                }
1046            }
1047            return conn;
1048        }
1049
1050        /**
1051         * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows
1052         * keep-alives to work (as the underlying connection is actually held open, despite the name).
1053         */
1054        private void safeClose() {
1055            if (bodyStream != null) {
1056                try {
1057                    bodyStream.close();
1058                } catch (IOException e) {
1059                    // no-op
1060                } finally {
1061                    bodyStream = null;
1062                }
1063            }
1064            if (conn != null) {
1065                conn.disconnect();
1066                conn = null;
1067            }
1068        }
1069
1070        // set up url, method, header, cookies
1071        private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException {
1072            this.conn = conn;
1073            this.req = request;
1074            method = Method.valueOf(conn.getRequestMethod());
1075            url = conn.getURL();
1076            statusCode = conn.getResponseCode();
1077            statusMessage = conn.getResponseMessage();
1078            contentType = conn.getContentType();
1079
1080            Map<String, List<String>> resHeaders = createHeaderMap(conn);
1081            processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
1082            CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store
1083
1084            if (previousResponse != null) { // was redirected
1085                // map previous response cookies into this response cookies() object
1086                for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
1087                    if (!hasCookie(prevCookie.getKey()))
1088                        cookie(prevCookie.getKey(), prevCookie.getValue());
1089                }
1090                previousResponse.safeClose();
1091
1092                // enforce too many redirects:
1093                numRedirects = previousResponse.numRedirects + 1;
1094                if (numRedirects >= MAX_REDIRECTS)
1095                    throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
1096            }
1097        }
1098
1099        private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) {
1100            // the default sun impl of conn.getHeaderFields() returns header values out of order
1101            final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>();
1102            int i = 0;
1103            while (true) {
1104                final String key = conn.getHeaderFieldKey(i);
1105                final String val = conn.getHeaderField(i);
1106                if (key == null && val == null)
1107                    break;
1108                i++;
1109                if (key == null || val == null)
1110                    continue; // skip http1.1 line
1111
1112                if (headers.containsKey(key))
1113                    headers.get(key).add(val);
1114                else {
1115                    final ArrayList<String> vals = new ArrayList<>();
1116                    vals.add(val);
1117                    headers.put(key, vals);
1118                }
1119            }
1120            return headers;
1121        }
1122
1123        void processResponseHeaders(Map<String, List<String>> resHeaders) {
1124            for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
1125                String name = entry.getKey();
1126                if (name == null)
1127                    continue; // http/1.1 line
1128
1129                List<String> values = entry.getValue();
1130                if (name.equalsIgnoreCase("Set-Cookie")) {
1131                    for (String value : values) {
1132                        if (value == null)
1133                            continue;
1134                        TokenQueue cd = new TokenQueue(value);
1135                        String cookieName = cd.chompTo("=").trim();
1136                        String cookieVal = cd.consumeTo(";").trim();
1137                        // ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required
1138                        // name not blank, value not null
1139                        if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first
1140                            cookie(cookieName, cookieVal);
1141                    }
1142                }
1143                for (String value : values) {
1144                    addHeader(name, fixHeaderEncoding(value));
1145                }
1146            }
1147        }
1148
1149        /**
1150         Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that
1151         and re-decode the string as UTF-8.
1152         * @param val a header value string that may have been incorrectly decoded as 8859.
1153         * @return a potentially re-decoded string.
1154         */
1155        @Nullable
1156        private static String fixHeaderEncoding(@Nullable String val) {
1157            if (val == null) return val;
1158            byte[] bytes = val.getBytes(ISO_8859_1);
1159            if (looksLikeUtf8(bytes))
1160                return new String(bytes, UTF_8);
1161            else
1162                return val;
1163        }
1164
1165        private static boolean looksLikeUtf8(byte[] input) {
1166            int i = 0;
1167            // BOM:
1168            if (input.length >= 3
1169                && (input[0] & 0xFF) == 0xEF
1170                && (input[1] & 0xFF) == 0xBB
1171                && (input[2] & 0xFF) == 0xBF) {
1172                i = 3;
1173            }
1174
1175            int end;
1176            boolean foundNonAscii = false;
1177            for (int j = input.length; i < j; ++i) {
1178                int o = input[i];
1179                if ((o & 0x80) == 0) {
1180                    continue; // ASCII
1181                }
1182                foundNonAscii = true;
1183
1184                // UTF-8 leading:
1185                if ((o & 0xE0) == 0xC0) {
1186                    end = i + 1;
1187                } else if ((o & 0xF0) == 0xE0) {
1188                    end = i + 2;
1189                } else if ((o & 0xF8) == 0xF0) {
1190                    end = i + 3;
1191                } else {
1192                    return false;
1193                }
1194
1195                if (end >= input.length)
1196                    return false;
1197
1198                while (i < end) {
1199                    i++;
1200                    o = input[i];
1201                    if ((o & 0xC0) != 0x80) {
1202                        return false;
1203                    }
1204                }
1205            }
1206            return foundNonAscii;
1207        }
1208
1209        private @Nullable static String setOutputContentType(final Connection.Request req) {
1210            final String contentType = req.header(CONTENT_TYPE);
1211            String bound = null;
1212            if (contentType != null) {
1213                // no-op; don't add content type as already set (e.g. for requestBody())
1214                // todo - if content type already set, we could add charset
1215
1216                // if user has set content type to multipart/form-data, auto add boundary.
1217                if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) {
1218                    bound = DataUtil.mimeBoundary();
1219                    req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1220                }
1221
1222            }
1223            else if (needsMultipart(req)) {
1224                bound = DataUtil.mimeBoundary();
1225                req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1226            } else {
1227                req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset());
1228            }
1229            return bound;
1230        }
1231
1232        private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException {
1233            final Collection<Connection.KeyVal> data = req.data();
1234            final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset())));
1235
1236            if (boundary != null) {
1237                // boundary will be set if we're in multipart mode
1238                for (Connection.KeyVal keyVal : data) {
1239                    w.write("--");
1240                    w.write(boundary);
1241                    w.write("\r\n");
1242                    w.write("Content-Disposition: form-data; name=\"");
1243                    w.write(encodeMimeName(keyVal.key())); // encodes " to %22
1244                    w.write("\"");
1245                    final InputStream input = keyVal.inputStream();
1246                    if (input != null) {
1247                        w.write("; filename=\"");
1248                        w.write(encodeMimeName(keyVal.value()));
1249                        w.write("\"\r\nContent-Type: ");
1250                        String contentType = keyVal.contentType();
1251                        w.write(contentType != null ? contentType : DefaultUploadType);
1252                        w.write("\r\n\r\n");
1253                        w.flush(); // flush
1254                        DataUtil.crossStreams(input, outputStream);
1255                        outputStream.flush();
1256                    } else {
1257                        w.write("\r\n\r\n");
1258                        w.write(keyVal.value());
1259                    }
1260                    w.write("\r\n");
1261                }
1262                w.write("--");
1263                w.write(boundary);
1264                w.write("--");
1265            } else {
1266                String body = req.requestBody();
1267                if (body != null) {
1268                    // data will be in query string, we're sending a plaintext body
1269                    w.write(body);
1270                }
1271                else {
1272                    // regular form data (application/x-www-form-urlencoded)
1273                    boolean first = true;
1274                    for (Connection.KeyVal keyVal : data) {
1275                        if (!first)
1276                            w.append('&');
1277                        else
1278                            first = false;
1279
1280                        w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset()));
1281                        w.write('=');
1282                        w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset()));
1283                    }
1284                }
1285            }
1286            w.close();
1287        }
1288
1289        // for get url reqs, serialise the data map into the url
1290        private static void serialiseRequestUrl(Connection.Request req) throws IOException {
1291            UrlBuilder in = new UrlBuilder(req.url());
1292
1293            for (Connection.KeyVal keyVal : req.data()) {
1294                Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string.");
1295                in.appendKeyVal(keyVal);
1296            }
1297            req.url(in.build());
1298            req.data().clear(); // moved into url as get params
1299        }
1300    }
1301
1302    private static boolean needsMultipart(Connection.Request req) {
1303        // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary
1304        for (Connection.KeyVal keyVal : req.data()) {
1305            if (keyVal.hasInputStream())
1306                return true;
1307        }
1308        return false;
1309    }
1310
1311    public static class KeyVal implements Connection.KeyVal {
1312        private String key;
1313        private String value;
1314        private @Nullable InputStream stream;
1315        private @Nullable String contentType;
1316
1317        public static KeyVal create(String key, String value) {
1318            return new KeyVal(key, value);
1319        }
1320
1321        public static KeyVal create(String key, String filename, InputStream stream) {
1322            return new KeyVal(key, filename)
1323                .inputStream(stream);
1324        }
1325
1326        private KeyVal(String key, String value) {
1327            Validate.notEmptyParam(key, "key");
1328            Validate.notNullParam(value, "value");
1329            this.key = key;
1330            this.value = value;
1331        }
1332
1333        @Override
1334        public KeyVal key(String key) {
1335            Validate.notEmptyParam(key, "key");
1336            this.key = key;
1337            return this;
1338        }
1339
1340        @Override
1341        public String key() {
1342            return key;
1343        }
1344
1345        @Override
1346        public KeyVal value(String value) {
1347            Validate.notNullParam(value, "value");
1348            this.value = value;
1349            return this;
1350        }
1351
1352        @Override
1353        public String value() {
1354            return value;
1355        }
1356
1357        public KeyVal inputStream(InputStream inputStream) {
1358            Validate.notNullParam(value, "inputStream");
1359            this.stream = inputStream;
1360            return this;
1361        }
1362
1363        @Override
1364        public InputStream inputStream() {
1365            return stream;
1366        }
1367
1368        @Override
1369        public boolean hasInputStream() {
1370            return stream != null;
1371        }
1372
1373        @Override
1374        public Connection.KeyVal contentType(String contentType) {
1375            Validate.notEmpty(contentType);
1376            this.contentType = contentType;
1377            return this;
1378        }
1379
1380        @Override
1381        public String contentType() {
1382            return contentType;
1383        }
1384
1385        @Override
1386        public String toString() {
1387            return key + "=" + value;
1388        }
1389    }
1390}