View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2024, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(http_open,
   39          [ http_open/3,                % +URL, -Stream, +Options
   40            http_set_authorization/2,   % +URL, +Authorization
   41            http_close_keep_alive/1     % +Address
   42          ]).   43:- autoload(library(aggregate),[aggregate_all/3]).   44:- autoload(library(apply),[foldl/4,include/3]).   45:- autoload(library(base64),[base64/3]).   46:- use_module(library(debug),[debug/3,debugging/1]).   47:- autoload(library(error),
   48	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   49	    ]).   50:- autoload(library(lists),[last/2,member/2]).   51:- autoload(library(option),
   52	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   53	      option/3, select_option/3
   54	    ]).   55:- autoload(library(readutil),[read_line_to_codes/2]).   56:- autoload(library(uri),
   57	    [ uri_resolve/3, uri_components/2, uri_data/3,
   58              uri_authority_components/2, uri_authority_data/3,
   59	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   60	    ]).   61:- autoload(library(http/http_header),
   62            [ http_parse_header/2, http_post_data/3 ]).   63:- autoload(library(http/http_stream),[stream_range_open/3]).   64:- if(exists_source(library(ssl))).   65:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   66:- endif.   67:- use_module(library(socket)).   68:- use_module(library(settings)).   69
   70:- setting(http:max_keep_alive_idle, number, 2,
   71           "Time to keep idle keep alive connections around").   72:- setting(http:max_keep_alive_connections, integer, 10,
   73           "Maximum number of client keep alive connections").   74:- setting(http:max_keep_alive_host_connections, integer, 2,
   75           "Maximum number of client keep alive to a single host").   76
   77/** <module> HTTP client library
   78
   79This library defines http_open/3, which opens an URL as a Prolog stream.
   80The functionality of the  library  can   be  extended  by  loading two
   81additional modules that act as plugins:
   82
   83    * library(http/http_ssl_plugin)
   84    Loading this library causes http_open/3 to handle HTTPS connections.
   85    Relevant options for SSL certificate handling are handed to
   86    ssl_context/3. This plugin is loaded automatically if the scheme
   87    `https` is requested using a default SSL context. See the plugin for
   88    additional information regarding security.
   89
   90    * library(zlib)
   91    Loading this library supports the `gzip` transfer encoding.  This
   92    plugin is lazily loaded if a connection is opened that claims this
   93    transfer encoding.
   94
   95    * library(http/http_cookie)
   96    Loading this library adds tracking cookies to http_open/3. Returned
   97    cookies are collected in the Prolog database and supplied for
   98    subsequent requests.
   99
  100    * library(http/http_stream)
  101    This library adds support for _chunked_ encoding. It is lazily
  102    loaded if the server sends a ``Transfer-encoding: chunked`` header.
  103
  104
  105Here is a simple example to fetch a web-page:
  106
  107```
  108?- http_open('http://www.google.com/search?q=prolog', In, []),
  109   copy_stream_data(In, user_output),
  110   close(In).
  111<!doctype html><head><title>prolog - Google Search</title><script>
  112...
  113```
  114
  115The example below fetches the modification time of a web-page. Note that
  116=|Modified|= is =|''|= (the empty atom) if the  web-server does not provide a
  117time-stamp for the resource. See also parse_time/2.
  118
  119```
  120modified(URL, Stamp) :-
  121       http_open(URL, In,
  122                 [ method(head),
  123                   header(last_modified, Modified)
  124                 ]),
  125       close(In),
  126       Modified \== '',
  127       parse_time(Modified, Stamp).
  128```
  129
  130Then next example uses Google search. It exploits library(uri) to manage
  131URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  132navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  133queries if the data returned by Google changes (this example indeed
  134no longer works and currently fails at the first xpath/3 call)
  135
  136```
  137:- use_module(library(http/http_open)).
  138:- use_module(library(xpath)).
  139:- use_module(library(sgml)).
  140:- use_module(library(uri)).
  141
  142google(For, Title, HREF) :-
  143        uri_encoded(query_value, For, Encoded),
  144        atom_concat('http://www.google.com/search?q=', Encoded, URL),
  145        http_open(URL, In, []),
  146        call_cleanup(
  147            load_html(In, DOM, []),
  148            close(In)),
  149        xpath(DOM, //h3(@class=r), Result),
  150        xpath(Result, //a(@href=HREF0, text), Title),
  151        uri_components(HREF0, Components),
  152        uri_data(search, Components, Query),
  153        uri_query_components(Query, Parts),
  154        memberchk(q=HREF, Parts).
  155```
  156
  157An example query is below:
  158
  159```
  160?- google(prolog, Title, HREF).
  161Title = 'SWI-Prolog',
  162HREF = 'http://www.swi-prolog.org/' ;
  163Title = 'Prolog - Wikipedia',
  164HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  165Title = 'Prolog - Wikipedia, the free encyclopedia',
  166HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  167Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  168HREF = 'http://www.pro-log.nl/' ;
  169Title = 'Learn Prolog Now!',
  170HREF = 'http://www.learnprolognow.org/' ;
  171Title = 'Free Online Version - Learn Prolog
  172...
  173```
  174
  175@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  176     documents.
  177@see http_get/3 and http_post/4 provide an alternative interface that
  178     convert the reply depending on the =|Content-Type|= header.
  179*/
  180
  181:- multifile
  182    http:encoding_filter/3,           % +Encoding, +In0, -In
  183    http:current_transfer_encoding/1, % ?Encoding
  184    http:disable_encoding_filter/1,   % +ContentType
  185    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  186                                      % -NewStreamPair, +Options
  187    http:open_options/2,              % +Parts, -Options
  188    http:write_cookies/3,             % +Out, +Parts, +Options
  189    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  190    http:authenticate_client/2,       % +URL, +Action
  191    http:http_connection_over_proxy/6.  192
  193:- meta_predicate
  194    http_open(+,-,:).  195
  196:- predicate_options(http_open/3, 3,
  197                     [ authorization(compound),
  198                       final_url(-atom),
  199                       header(+atom, -atom),
  200                       headers(-list),
  201                       raw_headers(-list(string)),
  202                       connection(+atom),
  203                       method(oneof([delete,get,put,purge,head,
  204                                     post,patch,options])),
  205                       size(-integer),
  206                       status_code(-integer),
  207                       output(-stream),
  208                       timeout(number),
  209                       unix_socket(+atom),
  210                       proxy(atom, integer),
  211                       proxy_authorization(compound),
  212                       bypass_proxy(boolean),
  213                       request_header(any),
  214                       user_agent(atom),
  215                       version(-compound),
  216        % The option below applies if library(http/http_header) is loaded
  217                       post(any),
  218        % The options below apply if library(http/http_ssl_plugin)) is loaded
  219                       pem_password_hook(callable),
  220                       cacert_file(atom),
  221                       cert_verify_hook(callable)
  222                     ]).  223
  224%!  user_agent(-Agent) is det.
  225%
  226%   Default value for =|User-Agent|=,  can   be  overruled using the
  227%   option user_agent(Agent) of http_open/3.
  228
  229user_agent('SWI-Prolog').
  230
  231%!  http_open(+URL, -Stream, +Options) is det.
  232%
  233%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  234%   either an atom  specifying  a  URL   or  a  list  representing a
  235%   broken-down  URL  as  specified  below.   After  this  predicate
  236%   succeeds the data can be read from Stream. After completion this
  237%   stream must be  closed  using   the  built-in  Prolog  predicate
  238%   close/1. Options provides additional options:
  239%
  240%     * authenticate(+Boolean)
  241%     If `false` (default `true`), do _not_ try to automatically
  242%     authenticate the client if a 401 (Unauthorized) status code
  243%     is received.
  244%
  245%     * authorization(+Term)
  246%     Send authorization. See also http_set_authorization/2. Supported
  247%     schemes:
  248%
  249%       - basic(+User, +Password)
  250%       HTTP Basic authentication.
  251%       - bearer(+Token)
  252%       HTTP Bearer authentication.
  253%       - digest(+User, +Password)
  254%       HTTP Digest authentication.  This option is only provided
  255%       if the plugin library(http/http_digest) is also loaded.
  256%
  257%     * unix_socket(+Path)
  258%     Connect to the given Unix domain socket.  In this scenario
  259%     the host name and port or ignored.  If the server replies
  260%     with a _redirect_ message and the host differs from the
  261%     original host as normal TCP connection is used to handle
  262%     the redirect.  This option is inspired by curl(1)'s option
  263%     `--unix-socket`.
  264%
  265%     * connection(+Connection)
  266%     Specify the =Connection= header.  Default is =close=.  The
  267%     alternative is =|Keep-alive|=.  This maintains a pool of
  268%     available connections as determined by keep_connection/1.
  269%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  270%     Keep-alive connections can be closed explicitly using
  271%     http_close_keep_alive/1. Keep-alive connections may
  272%     significantly improve repetitive requests on the same server,
  273%     especially if the IP route is long, HTTPS is used or the
  274%     connection uses a proxy.
  275%
  276%     * final_url(-FinalURL)
  277%     Unify FinalURL with the final   destination. This differs from
  278%     the  original  URL  if  the  returned  head  of  the  original
  279%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  280%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  281%     URL constructed from the parts.
  282%
  283%     * header(Name, -AtomValue)
  284%     If provided, AtomValue is  unified  with   the  value  of  the
  285%     indicated  field  in  the  reply    header.  Name  is  matched
  286%     case-insensitive and the underscore  (_)   matches  the hyphen
  287%     (-). Multiple of these options  may   be  provided  to extract
  288%     multiple  header  fields.  If  the  header  is  not  available
  289%     AtomValue is unified to the empty atom ('').
  290%
  291%     * headers(-List)
  292%     If provided,  List is unified  with a list of  Name(Value) pairs
  293%     corresponding to  fields in  the reply  header.  Name  and Value
  294%     follow  the  same  conventions used  by  the  header(Name,Value)
  295%     option.  A  pseudo header status_code(Code) is  added to provide
  296%     the  HTTP status  as  an integer.   See also  raw_headers(-List)
  297%     which  provides  the  entire   HTTP  reply  header  in  unparsed
  298%     representation.
  299%
  300%     * method(+Method)
  301%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  302%     =patch=.
  303%     The  =head= message can be
  304%     used in combination with  the   header(Name,  Value) option to
  305%     access information on the resource   without actually fetching
  306%     the resource itself.  The  returned   stream  must  be  closed
  307%     immediately.
  308%
  309%     If post(Data) is provided, the default is =post=.
  310%
  311%     * size(-Size)
  312%     Size is unified with the   integer value of =|Content-Length|=
  313%     in the reply header.
  314%
  315%     * version(-Version)
  316%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  317%     are integers representing the HTTP version in the reply header.
  318%
  319%     * range(+Range)
  320%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  321%     where `From` is an integer and `To`   is  either an integer or
  322%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  323%     to   ask   for    bytes    1000-1999,     use    the    option
  324%     range(bytes(1000,1999))
  325%
  326%     * raw_encoding(+Encoding)
  327%     Do not install a decoding filter for Encoding.  For example,
  328%     using raw_encoding('applocation/gzip') the system will not
  329%     decompress the stream if it is compressed using `gzip`.
  330%
  331%     * raw_headers(-Lines)
  332%     Unify Lines with a list of strings that represents the complete
  333%     reply header returned by the server.  See also headers(-List).
  334%
  335%     * redirect(+Boolean)
  336%     If `false` (default `true`), do _not_ automatically redirect
  337%     if a 3XX code is received.  Must be combined with
  338%     status_code(Code) and one of the header options to read the
  339%     redirect reply. In particular, without status_code(Code) a
  340%     redirect is mapped to an exception.
  341%
  342%     * status_code(-Code)
  343%     If this option is  present  and   Code  unifies  with the HTTP
  344%     status code, do *not* translate errors (4xx, 5xx) into an
  345%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  346%     returned, providing the application to read the error document
  347%     from the returned stream.
  348%
  349%     * output(-Out)
  350%     Unify the output stream with Out and do not close it. This can
  351%     be used to upgrade a connection.
  352%
  353%     * timeout(+Timeout)
  354%     If provided, set a timeout on   the stream using set_stream/2.
  355%     With this option if no new data arrives within Timeout seconds
  356%     the stream raises an exception.  Default   is  to wait forever
  357%     (=infinite=).
  358%
  359%     * post(+Data)
  360%     Issue a =POST= request on the HTTP server.  Data is
  361%     handed to http_post_data/3.
  362%
  363%     * proxy(+Host:Port)
  364%     Use an HTTP proxy to connect to the outside world.  See also
  365%     socket:proxy_for_url/3.  This option overrules the proxy
  366%     specification defined by socket:proxy_for_url/3.
  367%
  368%     * proxy(+Host, +Port)
  369%     Synonym for proxy(+Host:Port).  Deprecated.
  370%
  371%     * proxy_authorization(+Authorization)
  372%     Send authorization to the proxy.  Otherwise   the  same as the
  373%     =authorization= option.
  374%
  375%     * bypass_proxy(+Boolean)
  376%     If =true=, bypass proxy hooks.  Default is =false=.
  377%
  378%     * request_header(Name = Value)
  379%     Additional  name-value  parts  are  added   in  the  order  of
  380%     appearance to the HTTP request   header.  No interpretation is
  381%     done.
  382%
  383%     * max_redirect(+Max)
  384%     Sets the maximum length of a redirection chain.  This is needed
  385%     for some IRIs that redirect indefinitely to other IRIs without
  386%     looping (e.g., redirecting to IRIs with a random element in them).
  387%     Max must be either a non-negative integer or the atom `infinite`.
  388%     The default value is `10`.
  389%
  390%     * user_agent(+Agent)
  391%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  392%     header. Default is =SWI-Prolog=.
  393%
  394%   The hook http:open_options/2 can  be   used  to  provide default
  395%   options   based   on   the   broken-down     URL.   The   option
  396%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  397%   interfaces that commonly return status   codes  other than `200`
  398%   that need to be be processed by the client code.
  399%
  400%   @param URL is either an atom or string (url) or a list of _parts_.
  401%
  402%               When provided, this list may contain the fields
  403%               =scheme=, =user=, =password=, =host=, =port=, =path=
  404%               and either =query_string= (whose argument is an atom)
  405%               or =search= (whose argument is a list of
  406%               =|Name(Value)|= or =|Name=Value|= compound terms).
  407%               Only =host= is mandatory.  The example below opens the
  408%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  409%               Note that values must *not* be quoted because the
  410%               library inserts the required quotes.
  411%
  412%               ```
  413%               http_open([ host('www.example.com'),
  414%                           path('/my/path'),
  415%                           search([ q='Hello world',
  416%                                    lang=en
  417%                                  ])
  418%                         ])
  419%               ```
  420%
  421%   @throws error(existence_error(url, Id),Context) is raised if the
  422%   HTTP result code is not in the range 200..299. Context has the
  423%   shape context(Message, status(Code, TextCode)), where `Code` is the
  424%   numeric HTTP code and `TextCode` is the textual description thereof
  425%   provided by the server. `Message` may provide additional details or
  426%   may be unbound.
  427%
  428%   @see ssl_context/3 for SSL related options if
  429%   library(http/http_ssl_plugin) is loaded.
  430
  431:- multifile
  432    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  433
  434http_open(URL, Stream, QOptions) :-
  435    meta_options(is_meta, QOptions, Options0),
  436    (   atomic(URL)
  437    ->  parse_url_ex(URL, Parts)
  438    ;   Parts = URL
  439    ),
  440    autoload_https(Parts),
  441    upgrade_ssl_options(Parts, Options0, Options),
  442    add_authorization(Parts, Options, Options1),
  443    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  444    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  445    (   option(bypass_proxy(true), Options)
  446    ->  try_http_proxy(direct, Parts, Stream, Options2)
  447    ;   term_variables(Options2, Vars2),
  448        findall(Result-Vars2,
  449                try_a_proxy(Parts, Result, Options2),
  450                ResultList),
  451        last(ResultList, Status-Vars2)
  452    ->  (   Status = true(_Proxy, Stream)
  453        ->  true
  454        ;   throw(error(proxy_error(tried(ResultList)), _))
  455        )
  456    ;   try_http_proxy(direct, Parts, Stream, Options2)
  457    ).
  458
  459try_a_proxy(Parts, Result, Options) :-
  460    parts_uri(Parts, AtomicURL),
  461    option(host(Host), Parts),
  462    (   option(unix_socket(Path), Options)
  463    ->  Proxy = unix_socket(Path)
  464    ;   (   option(proxy(ProxyHost:ProxyPort), Options)
  465        ;   is_list(Options),
  466            memberchk(proxy(ProxyHost,ProxyPort), Options)
  467        )
  468    ->  Proxy = proxy(ProxyHost, ProxyPort)
  469    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  470    ),
  471    debug(http(proxy),
  472          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  473    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  474    ->  (   var(E)
  475        ->  !, Result = true(Proxy, Stream)
  476        ;   Result = error(Proxy, E)
  477        )
  478    ;   Result = false(Proxy)
  479    ),
  480    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  481
  482try_http_proxy(Method, Parts, Stream, Options0) :-
  483    option(host(Host), Parts),
  484    proxy_request_uri(Method, Parts, RequestURI),
  485    select_option(visited(Visited0), Options0, OptionsV, []),
  486    Options = [visited([Parts|Visited0])|OptionsV],
  487    parts_scheme(Parts, Scheme),
  488    default_port(Scheme, DefPort),
  489    url_part(port(Port), Parts, DefPort),
  490    host_and_port(Host, DefPort, Port, HostPort),
  491    (   option(connection(Connection), Options0),
  492        keep_alive(Connection),
  493        get_from_pool(Host:Port, StreamPair),
  494        debug(http(keep_alive), 'Trying Keep-alive to ~p using ~p',
  495              [ Host:Port, StreamPair ]),
  496        catch(send_rec_header(StreamPair, Stream, HostPort,
  497                              RequestURI, Parts, Options),
  498              Error,
  499              keep_alive_error(Error, StreamPair))
  500    ->  true
  501    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  502                                        SocketStreamPair, Options, Options1),
  503        (   catch(http:http_protocol_hook(Scheme, Parts,
  504                                          SocketStreamPair,
  505                                          StreamPair, Options),
  506                  Error,
  507                  ( close(SocketStreamPair, [force(true)]),
  508                    throw(Error)))
  509        ->  true
  510        ;   StreamPair = SocketStreamPair
  511        ),
  512        send_rec_header(StreamPair, Stream, HostPort,
  513                        RequestURI, Parts, Options1)
  514    ),
  515    return_final_url(Options).
  516
  517proxy_request_uri(direct, Parts, RequestURI) :-
  518    !,
  519    parts_request_uri(Parts, RequestURI).
  520proxy_request_uri(unix_socket(_), Parts, RequestURI) :-
  521    !,
  522    parts_request_uri(Parts, RequestURI).
  523proxy_request_uri(_, Parts, RequestURI) :-
  524    parts_uri(Parts, RequestURI).
  525
  526http:http_connection_over_proxy(unix_socket(Path), _, _,
  527                                StreamPair, Options, Options) :-
  528    !,
  529    unix_domain_socket(Socket),
  530    tcp_connect(Socket, Path),
  531    tcp_open_socket(Socket, In, Out),
  532    stream_pair(StreamPair, In, Out).
  533http:http_connection_over_proxy(direct, _, Host:Port,
  534                                StreamPair, Options, Options) :-
  535    !,
  536    open_socket(Host:Port, StreamPair, Options).
  537http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  538                                StreamPair, Options, Options) :-
  539    \+ ( memberchk(scheme(Scheme), Parts),
  540         secure_scheme(Scheme)
  541       ),
  542    !,
  543    % We do not want any /more/ proxy after this
  544    open_socket(ProxyHost:ProxyPort, StreamPair,
  545                [bypass_proxy(true)|Options]).
  546http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  547                                StreamPair, Options, Options) :-
  548    !,
  549    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  550    catch(negotiate_socks_connection(Host:Port, StreamPair),
  551          Error,
  552          ( close(StreamPair, [force(true)]),
  553            throw(Error)
  554          )).
  555
  556%!  hooked_options(+Parts, -Options) is nondet.
  557%
  558%   Calls  http:open_options/2  and  if  necessary    upgrades  old  SSL
  559%   cacerts_file(File) option to a cacerts(List) option to ensure proper
  560%   merging of options.
  561
  562hooked_options(Parts, Options) :-
  563    http:open_options(Parts, Options0),
  564    upgrade_ssl_options(Parts, Options0, Options).
  565
  566:- if(current_predicate(ssl_upgrade_legacy_options/2)).  567upgrade_ssl_options(Parts, Options0, Options) :-
  568    requires_ssl(Parts),
  569    !,
  570    ssl_upgrade_legacy_options(Options0, Options).
  571:- endif.  572upgrade_ssl_options(_, Options, Options).
  573
  574merge_options_rev(Old, New, Merged) :-
  575    merge_options(New, Old, Merged).
  576
  577is_meta(pem_password_hook).             % SSL plugin callbacks
  578is_meta(cert_verify_hook).
  579
  580
  581http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  582
  583default_port(https, 443) :- !.
  584default_port(wss,   443) :- !.
  585default_port(_,     80).
  586
  587host_and_port(Host, DefPort, DefPort, Host) :- !.
  588host_and_port(Host, _,       Port,    Host:Port).
  589
  590%!  autoload_https(+Parts) is det.
  591%
  592%   If the requested scheme is https or wss, load the HTTPS plugin.
  593
  594autoload_https(Parts) :-
  595    requires_ssl(Parts),
  596    memberchk(scheme(S), Parts),
  597    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  598    exists_source(library(http/http_ssl_plugin)),
  599    !,
  600    use_module(library(http/http_ssl_plugin)).
  601autoload_https(_).
  602
  603requires_ssl(Parts) :-
  604    memberchk(scheme(S), Parts),
  605    secure_scheme(S).
  606
  607secure_scheme(https).
  608secure_scheme(wss).
  609
  610%!  send_rec_header(+StreamPair, -Stream,
  611%!                  +Host, +RequestURI, +Parts, +Options) is det.
  612%
  613%   Send header to Out and process reply.  If there is an error or
  614%   failure, close In and Out and return the error or failure.
  615
  616send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  617    (   catch(guarded_send_rec_header(StreamPair, Stream,
  618                                      Host, RequestURI, Parts, Options),
  619              E, true)
  620    ->  (   var(E)
  621        ->  (   option(output(StreamPair), Options)
  622            ->  true
  623            ;   true
  624            )
  625        ;   close(StreamPair, [force(true)]),
  626            throw(E)
  627        )
  628    ;   close(StreamPair, [force(true)]),
  629        fail
  630    ).
  631
  632guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  633    user_agent(Agent, Options),
  634    method(Options, MNAME),
  635    http_version(Version),
  636    option(connection(Connection), Options, close),
  637    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  638    debug(http(send_request), "> Host: ~w", [Host]),
  639    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  640    debug(http(send_request), "> Connection: ~w", [Connection]),
  641    format(StreamPair,
  642           '~w ~w HTTP/~w\r\n\c
  643               Host: ~w\r\n\c
  644               User-Agent: ~w\r\n\c
  645               Connection: ~w\r\n',
  646           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  647    parts_uri(Parts, URI),
  648    x_headers(Options, URI, StreamPair),
  649    write_cookies(StreamPair, Parts, Options),
  650    (   option(post(PostData), Options)
  651    ->  http_post_data(PostData, StreamPair, [])
  652    ;   format(StreamPair, '\r\n', [])
  653    ),
  654    flush_output(StreamPair),
  655                                    % read the reply header
  656    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  657    update_cookies(Lines, Parts, Options),
  658    reply_header(Lines, Options),
  659    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  660            StreamPair, Stream).
  661
  662
  663%!  http_version(-Version:atom) is det.
  664%
  665%   HTTP version we publish. We  can  only   use  1.1  if we support
  666%   chunked encoding.
  667
  668http_version('1.1') :-
  669    http:current_transfer_encoding(chunked),
  670    !.
  671http_version('1.1') :-
  672    autoload_encoding(chunked),
  673    !.
  674http_version('1.0').
  675
  676method(Options, MNAME) :-
  677    option(post(_), Options),
  678    !,
  679    option(method(M), Options, post),
  680    (   map_method(M, MNAME0)
  681    ->  MNAME = MNAME0
  682    ;   domain_error(method, M)
  683    ).
  684method(Options, MNAME) :-
  685    option(method(M), Options, get),
  686    (   map_method(M, MNAME0)
  687    ->  MNAME = MNAME0
  688    ;   map_method(_, M)
  689    ->  MNAME = M
  690    ;   domain_error(method, M)
  691    ).
  692
  693%!  map_method(+MethodID, -Method)
  694%
  695%   Support additional ``METHOD`` keywords.  Default   are  the official
  696%   HTTP methods as defined by the various RFCs.
  697
  698:- multifile
  699    map_method/2.  700
  701map_method(delete,  'DELETE').
  702map_method(get,     'GET').
  703map_method(head,    'HEAD').
  704map_method(post,    'POST').
  705map_method(put,     'PUT').
  706map_method(patch,   'PATCH').
  707map_method(options, 'OPTIONS').
  708
  709%!  x_headers(+Options, +URI, +Out) is det.
  710%
  711%   Emit extra headers from   request_header(Name=Value)  options in
  712%   Options.
  713%
  714%   @tbd Use user/password fields
  715
  716x_headers(Options, URI, Out) :-
  717    x_headers_(Options, [url(URI)|Options], Out).
  718
  719x_headers_([], _, _).
  720x_headers_([H|T], Options, Out) :-
  721    x_header(H, Options, Out),
  722    x_headers_(T, Options, Out).
  723
  724x_header(request_header(Name=Value), _, Out) :-
  725    !,
  726    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  727    format(Out, '~w: ~w\r\n', [Name, Value]).
  728x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  729    !,
  730    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  731x_header(authorization(Authorization), Options, Out) :-
  732    !,
  733    auth_header(Authorization, Options, 'Authorization', Out).
  734x_header(range(Spec), _, Out) :-
  735    !,
  736    Spec =.. [Unit, From, To],
  737    (   To == end
  738    ->  ToT = ''
  739    ;   must_be(integer, To),
  740        ToT = To
  741    ),
  742    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  743    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  744x_header(_, _, _).
  745
  746%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  747
  748auth_header(basic(User, Password), _, Header, Out) :-
  749    !,
  750    format(codes(Codes), '~w:~w', [User, Password]),
  751    phrase(base64(Codes), Base64Codes),
  752    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  753    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  754auth_header(bearer(Token), _, Header, Out) :-
  755    !,
  756    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  757    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  758auth_header(Auth, Options, _, Out) :-
  759    option(url(URL), Options),
  760    add_method(Options, Options1),
  761    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  762    !.
  763auth_header(Auth, _, _, _) :-
  764    domain_error(authorization, Auth).
  765
  766user_agent(Agent, Options) :-
  767    (   option(user_agent(Agent), Options)
  768    ->  true
  769    ;   user_agent(Agent)
  770    ).
  771
  772add_method(Options0, Options) :-
  773    option(method(_), Options0),
  774    !,
  775    Options = Options0.
  776add_method(Options0, Options) :-
  777    option(post(_), Options0),
  778    !,
  779    Options = [method(post)|Options0].
  780add_method(Options0, [method(get)|Options0]).
  781
  782%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  783%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  784%
  785%   Handle the HTTP status once available. If   200-299, we are ok. If a
  786%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  787%   error.
  788%
  789%   @error  existence_error(url, URL)
  790
  791                                        % Redirections
  792do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  793    redirect_code(Code),
  794    option(redirect(true), Options0, true),
  795    location(Lines, RequestURI),
  796    !,
  797    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  798    close(In),
  799    parts_uri(Parts, Base),
  800    uri_resolve(RequestURI, Base, Redirected),
  801    parse_url_ex(Redirected, RedirectedParts),
  802    (   redirect_limit_exceeded(Options0, Max)
  803    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  804        throw(error(permission_error(redirect, http, Redirected),
  805                    context(_, Comment)))
  806    ;   redirect_loop(RedirectedParts, Options0)
  807    ->  throw(error(permission_error(redirect, http, Redirected),
  808                    context(_, 'Redirection loop')))
  809    ;   true
  810    ),
  811    redirect_options(Parts, RedirectedParts, Options0, Options),
  812    http_open(RedirectedParts, Stream, Options).
  813                                        % Need authentication
  814do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  815    authenticate_code(Code),
  816    option(authenticate(true), Options0, true),
  817    parts_uri(Parts, URI),
  818    parse_headers(Lines, Headers),
  819    http:authenticate_client(
  820             URI,
  821             auth_reponse(Headers, Options0, Options)),
  822    !,
  823    close(In0),
  824    http_open(Parts, Stream, Options).
  825                                        % Accepted codes
  826do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  827    (   option(status_code(Code), Options),
  828        Lines \== []
  829    ->  true
  830    ;   successful_code(Code)
  831    ),
  832    !,
  833    parts_uri(Parts, URI),
  834    parse_headers(Lines, Headers),
  835    return_version(Options, Version),
  836    return_size(Options, Headers),
  837    return_fields(Options, Headers),
  838    return_headers(Options, [status_code(Code)|Headers]),
  839    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  840    transfer_encoding_filter(Lines, In1, In, Options),
  841                                    % properly re-initialise the stream
  842    set_stream(In, file_name(URI)),
  843    set_stream(In, record_position(true)).
  844do_open(_, _, _, [], Options, _, _, _, _) :-
  845    option(connection(Connection), Options),
  846    keep_alive(Connection),
  847    !,
  848    throw(error(keep_alive(closed),_)).
  849                                        % report anything else as error
  850do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  851    parts_uri(Parts, URI),
  852    (   map_error_code(Code, Error)
  853    ->  Formal =.. [Error, url, URI]
  854    ;   Formal = existence_error(url, URI)
  855    ),
  856    throw(error(Formal, context(_, status(Code, Comment)))).
  857
  858
  859successful_code(Code) :-
  860    between(200, 299, Code).
  861
  862%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  863%
  864%   True if we have exceeded the maximum redirection length (default 10).
  865
  866redirect_limit_exceeded(Options, Max) :-
  867    option(visited(Visited), Options, []),
  868    length(Visited, N),
  869    option(max_redirect(Max), Options, 10),
  870    (Max == infinite -> fail ; N > Max).
  871
  872
  873%!  redirect_loop(+Parts, +Options) is semidet.
  874%
  875%   True if we are in  a  redirection   loop.  Note  that some sites
  876%   redirect once to the same place using  cookies or similar, so we
  877%   allow for two tries. In fact,   we  should probably test whether
  878%   authorization or cookie headers have changed.
  879
  880redirect_loop(Parts, Options) :-
  881    option(visited(Visited), Options, []),
  882    include(==(Parts), Visited, Same),
  883    length(Same, Count),
  884    Count > 2.
  885
  886
  887%!  redirect_options(+Parts, +RedirectedParts, +Options0, -Options) is det.
  888%
  889%   A redirect from a POST should do  a   GET  on the returned URI. This
  890%   means we must remove the method(post)   and  post(Data) options from
  891%   the original option-list.
  892%
  893%   If we are connecting over a Unix   domain socket we drop this option
  894%   if the redirect host does not match the initial host.
  895
  896redirect_options(Parts, RedirectedParts, Options0, Options) :-
  897    select_option(unix_socket(_), Options0, Options1),
  898    memberchk(host(Host), Parts),
  899    memberchk(host(RHost), RedirectedParts),
  900    debug(http(redirect), 'http_open: redirecting AF_UNIX ~w to ~w',
  901          [Host, RHost]),
  902    Host \== RHost,
  903    !,
  904    redirect_options(Options1, Options).
  905redirect_options(_, _, Options0, Options) :-
  906    redirect_options(Options0, Options).
  907
  908redirect_options(Options0, Options) :-
  909    (   select_option(post(_), Options0, Options1)
  910    ->  true
  911    ;   Options1 = Options0
  912    ),
  913    (   select_option(method(Method), Options1, Options),
  914        \+ redirect_method(Method)
  915    ->  true
  916    ;   Options = Options1
  917    ).
  918
  919redirect_method(delete).
  920redirect_method(get).
  921redirect_method(head).
  922
  923
  924%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  925%
  926%   Map HTTP error codes to Prolog errors.
  927%
  928%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  929%           counterpart.
  930
  931map_error_code(401, permission_error).
  932map_error_code(403, permission_error).
  933map_error_code(404, existence_error).
  934map_error_code(405, permission_error).
  935map_error_code(407, permission_error).
  936map_error_code(410, existence_error).
  937
  938redirect_code(301).                     % Moved Permanently
  939redirect_code(302).                     % Found (previously "Moved Temporary")
  940redirect_code(303).                     % See Other
  941redirect_code(307).                     % Temporary Redirect
  942
  943authenticate_code(401).
  944
  945%!  open_socket(+Address, -StreamPair, +Options) is det.
  946%
  947%   Create and connect a client socket to Address.  Options
  948%
  949%       * timeout(+Timeout)
  950%       Sets timeout on the stream, *after* connecting the
  951%       socket.
  952%
  953%   @tbd    Make timeout also work on tcp_connect/4.
  954%   @tbd    This is the same as do_connect/4 in http_client.pl
  955
  956open_socket(Address, StreamPair, Options) :-
  957    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  958    tcp_connect(Address, StreamPair, Options),
  959    stream_pair(StreamPair, In, Out),
  960    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  961    set_stream(In, record_position(false)),
  962    (   option(timeout(Timeout), Options)
  963    ->  set_stream(In, timeout(Timeout))
  964    ;   true
  965    ).
  966
  967
  968return_version(Options, Major-Minor) :-
  969    option(version(Major-Minor), Options, _).
  970
  971return_size(Options, Headers) :-
  972    (   memberchk(content_length(Size), Headers)
  973    ->  option(size(Size), Options, _)
  974    ;   true
  975    ).
  976
  977return_fields([], _).
  978return_fields([header(Name, Value)|T], Headers) :-
  979    !,
  980    (   Term =.. [Name,Value],
  981        memberchk(Term, Headers)
  982    ->  true
  983    ;   Value = ''
  984    ),
  985    return_fields(T, Headers).
  986return_fields([_|T], Lines) :-
  987    return_fields(T, Lines).
  988
  989return_headers(Options, Headers) :-
  990    option(headers(Headers), Options, _).
  991
  992%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  993%
  994%   Parse the header lines for   the  headers(-List) option. Invalid
  995%   header   lines   are   skipped,   printing   a   warning   using
  996%   print_message/2.
  997
  998parse_headers([], []) :- !.
  999parse_headers([Line|Lines], Headers) :-
 1000    catch(http_parse_header(Line, [Header]), Error, true),
 1001    (   var(Error)
 1002    ->  Headers = [Header|More]
 1003    ;   print_message(warning, Error),
 1004        Headers = More
 1005    ),
 1006    parse_headers(Lines, More).
 1007
 1008
 1009%!  return_final_url(+Options) is semidet.
 1010%
 1011%   If Options contains final_url(URL), unify URL with the final
 1012%   URL after redirections.
 1013
 1014return_final_url(Options) :-
 1015    option(final_url(URL), Options),
 1016    var(URL),
 1017    !,
 1018    option(visited([Parts|_]), Options),
 1019    parts_uri(Parts, URL).
 1020return_final_url(_).
 1021
 1022
 1023%!  transfer_encoding_filter(+Lines, +In0, -In, +Options) is det.
 1024%
 1025%   Install filters depending on the transfer  encoding. If In0 is a
 1026%   stream-pair, we close the output   side. If transfer-encoding is
 1027%   not specified, the content-encoding is  interpreted as a synonym
 1028%   for transfer-encoding, because many   servers incorrectly depend
 1029%   on  this.  Exceptions  to  this   are  content-types  for  which
 1030%   disable_encoding_filter/1 holds.
 1031
 1032transfer_encoding_filter(Lines, In0, In, Options) :-
 1033    transfer_encoding(Lines, Encoding),
 1034    !,
 1035    transfer_encoding_filter_(Encoding, In0, In, Options).
 1036transfer_encoding_filter(Lines, In0, In, Options) :-
 1037    content_encoding(Lines, Encoding),
 1038    content_type(Lines, Type),
 1039    \+ http:disable_encoding_filter(Type),
 1040    !,
 1041    transfer_encoding_filter_(Encoding, In0, In, Options).
 1042transfer_encoding_filter(_, In, In, _Options).
 1043
 1044transfer_encoding_filter_(Encoding, In0, In, Options) :-
 1045    option(raw_encoding(Encoding), Options),
 1046    !,
 1047    In = In0.
 1048transfer_encoding_filter_(Encoding, In0, In, _Options) :-
 1049    stream_pair(In0, In1, Out),
 1050    (   http:encoding_filter(Encoding, In1, In2)
 1051    ->  true
 1052    ;   autoload_encoding(Encoding),
 1053        http:encoding_filter(Encoding, In1, In2)
 1054    ->  true
 1055    ;   domain_error(http_encoding, Encoding)
 1056    ),
 1057    (   var(Out)
 1058    ->  In = In2
 1059    ;   stream_pair(In, In2, Out)
 1060    ).
 1061
 1062:- multifile
 1063    autoload_encoding/1. 1064
 1065:- if(exists_source(library(zlib))). 1066autoload_encoding(gzip) :-
 1067    use_module(library(zlib)).
 1068:- endif. 1069:- if(exists_source(library(http/http_stream))). 1070autoload_encoding(chunked) :-
 1071    use_module(library(http/http_stream)).
 1072:- endif. 1073
 1074content_type(Lines, Type) :-
 1075    member(Line, Lines),
 1076    phrase(field('content-type'), Line, Rest),
 1077    !,
 1078    atom_codes(Type, Rest).
 1079
 1080%!  http:disable_encoding_filter(+ContentType) is semidet.
 1081%
 1082%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
 1083%   encoding for specific values of   ContentType. This predicate is
 1084%   multifile and can thus be extended by the user.
 1085
 1086http:disable_encoding_filter('application/x-gzip').
 1087http:disable_encoding_filter('application/x-tar').
 1088http:disable_encoding_filter('x-world/x-vrml').
 1089http:disable_encoding_filter('application/zip').
 1090http:disable_encoding_filter('application/x-gzip').
 1091http:disable_encoding_filter('application/x-zip-compressed').
 1092http:disable_encoding_filter('application/x-compress').
 1093http:disable_encoding_filter('application/x-compressed').
 1094http:disable_encoding_filter('application/x-spoon').
 1095
 1096%!  transfer_encoding(+Lines, -Encoding) is semidet.
 1097%
 1098%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 1099%   header.
 1100
 1101transfer_encoding(Lines, Encoding) :-
 1102    what_encoding(transfer_encoding, Lines, Encoding).
 1103
 1104what_encoding(What, Lines, Encoding) :-
 1105    member(Line, Lines),
 1106    phrase(encoding_(What, Debug), Line, Rest),
 1107    !,
 1108    atom_codes(Encoding, Rest),
 1109    debug(http(What), '~w: ~p', [Debug, Rest]).
 1110
 1111encoding_(content_encoding, 'Content-encoding') -->
 1112    field('content-encoding').
 1113encoding_(transfer_encoding, 'Transfer-encoding') -->
 1114    field('transfer-encoding').
 1115
 1116%!  content_encoding(+Lines, -Encoding) is semidet.
 1117%
 1118%   True if Encoding is the value of the =|Content-encoding|=
 1119%   header.
 1120
 1121content_encoding(Lines, Encoding) :-
 1122    what_encoding(content_encoding, Lines, Encoding).
 1123
 1124%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 1125%!  -Comment:atom, -Lines:list) is det.
 1126%
 1127%   Read the HTTP reply-header.  If the reply is completely empty
 1128%   an existence error is thrown.  If the replied header is
 1129%   otherwise invalid a 500 HTTP error is simulated, having the
 1130%   comment =|Invalid reply header|=.
 1131%
 1132%   @param Parts    A list of compound terms that describe the
 1133%                   parsed request URI.
 1134%   @param Version  HTTP reply version as Major-Minor pair
 1135%   @param Code     Numeric HTTP reply-code
 1136%   @param Comment  Comment of reply-code as atom
 1137%   @param Lines    Remaining header lines as code-lists.
 1138%
 1139%   @error existence_error(http_reply, Uri)
 1140
 1141read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1142    read_line_to_codes(In, Line),
 1143    (   Line == end_of_file
 1144    ->  parts_uri(Parts, Uri),
 1145        existence_error(http_reply,Uri)
 1146    ;   true
 1147    ),
 1148    Line \== end_of_file,
 1149    phrase(first_line(Major-Minor, Code, Comment), Line),
 1150    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1151    read_line_to_codes(In, Line2),
 1152    rest_header(Line2, In, Lines),
 1153    !,
 1154    (   debugging(http(open))
 1155    ->  forall(member(HL, Lines),
 1156               debug(http(open), '~s', [HL]))
 1157    ;   true
 1158    ).
 1159read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1160
 1161rest_header([], _, []) :- !.            % blank line: end of header
 1162rest_header(L0, In, [L0|L]) :-
 1163    read_line_to_codes(In, L1),
 1164    rest_header(L1, In, L).
 1165
 1166%!  content_length(+Header, -Length:int) is semidet.
 1167%
 1168%   Find the Content-Length in an HTTP reply-header.
 1169
 1170content_length(Lines, Length) :-
 1171    member(Line, Lines),
 1172    phrase(content_length(Length0), Line),
 1173    !,
 1174    Length = Length0.
 1175
 1176location(Lines, RequestURI) :-
 1177    member(Line, Lines),
 1178    phrase(atom_field(location, RequestURI), Line),
 1179    !.
 1180
 1181connection(Lines, Connection) :-
 1182    member(Line, Lines),
 1183    phrase(atom_field(connection, Connection0), Line),
 1184    !,
 1185    Connection = Connection0.
 1186
 1187first_line(Major-Minor, Code, Comment) -->
 1188    "HTTP/", integer(Major), ".", integer(Minor),
 1189    skip_blanks,
 1190    integer(Code),
 1191    skip_blanks,
 1192    rest(Comment).
 1193
 1194atom_field(Name, Value) -->
 1195    field(Name),
 1196    rest(Value).
 1197
 1198content_length(Len) -->
 1199    field('content-length'),
 1200    integer(Len).
 1201
 1202field(Name) -->
 1203    { atom_codes(Name, Codes) },
 1204    field_codes(Codes).
 1205
 1206field_codes([]) -->
 1207    ":",
 1208    skip_blanks.
 1209field_codes([H|T]) -->
 1210    [C],
 1211    { match_header_char(H, C)
 1212    },
 1213    field_codes(T).
 1214
 1215match_header_char(C, C) :- !.
 1216match_header_char(C, U) :-
 1217    code_type(C, to_lower(U)),
 1218    !.
 1219match_header_char(0'_, 0'-).
 1220
 1221
 1222skip_blanks -->
 1223    [C],
 1224    { code_type(C, white)
 1225    },
 1226    !,
 1227    skip_blanks.
 1228skip_blanks -->
 1229    [].
 1230
 1231%!  integer(-Int)//
 1232%
 1233%   Read 1 or more digits and return as integer.
 1234
 1235integer(Code) -->
 1236    digit(D0),
 1237    digits(D),
 1238    { number_codes(Code, [D0|D])
 1239    }.
 1240
 1241digit(C) -->
 1242    [C],
 1243    { code_type(C, digit)
 1244    }.
 1245
 1246digits([D0|D]) -->
 1247    digit(D0),
 1248    !,
 1249    digits(D).
 1250digits([]) -->
 1251    [].
 1252
 1253%!  rest(-Atom:atom)//
 1254%
 1255%   Get rest of input as an atom.
 1256
 1257rest(Atom) --> call(rest_(Atom)).
 1258
 1259rest_(Atom, L, []) :-
 1260    atom_codes(Atom, L).
 1261
 1262
 1263%!  reply_header(+Lines, +Options) is det.
 1264%
 1265%   Return the entire reply header as  a list of strings to the option
 1266%   raw_headers(-Headers).
 1267
 1268reply_header(Lines, Options) :-
 1269    option(raw_headers(Headers), Options),
 1270    !,
 1271    maplist(string_codes, Headers, Lines).
 1272reply_header(_, _).
 1273
 1274
 1275                 /*******************************
 1276                 *   AUTHORIZATION MANAGEMENT   *
 1277                 *******************************/
 1278
 1279%!  http_set_authorization(+URL, +Authorization) is det.
 1280%
 1281%   Set user/password to supply with URLs   that have URL as prefix.
 1282%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1283%   authorization is cleared.  For example:
 1284%
 1285%   ```
 1286%   ?- http_set_authorization('http://www.example.com/private/',
 1287%                             basic('John', 'Secret'))
 1288%   ```
 1289%
 1290%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1291%           too.
 1292
 1293:- dynamic
 1294    stored_authorization/2,
 1295    cached_authorization/2. 1296
 1297http_set_authorization(URL, Authorization) :-
 1298    must_be(atom, URL),
 1299    retractall(stored_authorization(URL, _)),
 1300    (   Authorization = (-)
 1301    ->  true
 1302    ;   check_authorization(Authorization),
 1303        assert(stored_authorization(URL, Authorization))
 1304    ),
 1305    retractall(cached_authorization(_,_)).
 1306
 1307check_authorization(Var) :-
 1308    var(Var),
 1309    !,
 1310    instantiation_error(Var).
 1311check_authorization(basic(User, Password)) :-
 1312    must_be(atom, User),
 1313    must_be(text, Password).
 1314check_authorization(digest(User, Password)) :-
 1315    must_be(atom, User),
 1316    must_be(text, Password).
 1317
 1318%!  authorization(+URL, -Authorization) is semidet.
 1319%
 1320%   True if Authorization must be supplied for URL.
 1321%
 1322%   @tbd    Cleanup cache if it gets too big.
 1323
 1324authorization(_, _) :-
 1325    \+ stored_authorization(_, _),
 1326    !,
 1327    fail.
 1328authorization(URL, Authorization) :-
 1329    cached_authorization(URL, Authorization),
 1330    !,
 1331    Authorization \== (-).
 1332authorization(URL, Authorization) :-
 1333    (   stored_authorization(Prefix, Authorization),
 1334        sub_atom(URL, 0, _, _, Prefix)
 1335    ->  assert(cached_authorization(URL, Authorization))
 1336    ;   assert(cached_authorization(URL, -)),
 1337        fail
 1338    ).
 1339
 1340add_authorization(_, Options, Options) :-
 1341    option(authorization(_), Options),
 1342    !.
 1343add_authorization(Parts, Options0, Options) :-
 1344    url_part(user(User), Parts),
 1345    url_part(password(Passwd), Parts),
 1346    !,
 1347    Options = [authorization(basic(User,Passwd))|Options0].
 1348add_authorization(Parts, Options0, Options) :-
 1349    stored_authorization(_, _) ->   % quick test to avoid work
 1350    parts_uri(Parts, URL),
 1351    authorization(URL, Auth),
 1352    !,
 1353    Options = [authorization(Auth)|Options0].
 1354add_authorization(_, Options, Options).
 1355
 1356
 1357%!  parse_url_ex(+URL, -Parts)
 1358%
 1359%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1360%   fragment).
 1361
 1362parse_url_ex(URL, [uri(URL)|Parts]) :-
 1363    uri_components(URL, Components),
 1364    phrase(components(Components), Parts),
 1365    (   option(host(_), Parts)
 1366    ->  true
 1367    ;   domain_error(url, URL)
 1368    ).
 1369
 1370components(Components) -->
 1371    uri_scheme(Components),
 1372    uri_path(Components),
 1373    uri_authority(Components),
 1374    uri_request_uri(Components).
 1375
 1376uri_scheme(Components) -->
 1377    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1378    !,
 1379    [ scheme(Scheme)
 1380    ].
 1381uri_scheme(_) --> [].
 1382
 1383uri_path(Components) -->
 1384    { uri_data(path, Components, Path0), nonvar(Path0),
 1385      (   Path0 == ''
 1386      ->  Path = (/)
 1387      ;   Path = Path0
 1388      )
 1389    },
 1390    !,
 1391    [ path(Path)
 1392    ].
 1393uri_path(_) --> [].
 1394
 1395uri_authority(Components) -->
 1396    { uri_data(authority, Components, Auth), nonvar(Auth),
 1397      !,
 1398      uri_authority_components(Auth, Data)
 1399    },
 1400    [ authority(Auth) ],
 1401    auth_field(user, Data),
 1402    auth_field(password, Data),
 1403    auth_field(host, Data),
 1404    auth_field(port, Data).
 1405uri_authority(_) --> [].
 1406
 1407auth_field(Field, Data) -->
 1408    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1409      !,
 1410      (   atom(EncValue)
 1411      ->  uri_encoded(query_value, Value, EncValue)
 1412      ;   Value = EncValue
 1413      ),
 1414      Part =.. [Field,Value]
 1415    },
 1416    [ Part ].
 1417auth_field(_, _) --> [].
 1418
 1419uri_request_uri(Components) -->
 1420    { uri_data(path, Components, Path0),
 1421      uri_data(search, Components, Search),
 1422      (   Path0 == ''
 1423      ->  Path = (/)
 1424      ;   Path = Path0
 1425      ),
 1426      uri_data(path, Components2, Path),
 1427      uri_data(search, Components2, Search),
 1428      uri_components(RequestURI, Components2)
 1429    },
 1430    [ request_uri(RequestURI)
 1431    ].
 1432
 1433%!  parts_scheme(+Parts, -Scheme) is det.
 1434%!  parts_uri(+Parts, -URI) is det.
 1435%!  parts_request_uri(+Parts, -RequestURI) is det.
 1436%!  parts_search(+Parts, -Search) is det.
 1437%!  parts_authority(+Parts, -Authority) is semidet.
 1438
 1439parts_scheme(Parts, Scheme) :-
 1440    url_part(scheme(Scheme), Parts),
 1441    !.
 1442parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1443    url_part(protocol(Scheme), Parts),
 1444    !.
 1445parts_scheme(_, http).
 1446
 1447parts_authority(Parts, Auth) :-
 1448    url_part(authority(Auth), Parts),
 1449    !.
 1450parts_authority(Parts, Auth) :-
 1451    url_part(host(Host), Parts, _),
 1452    url_part(port(Port), Parts, _),
 1453    url_part(user(User), Parts, _),
 1454    url_part(password(Password), Parts, _),
 1455    uri_authority_components(Auth,
 1456                             uri_authority(User, Password, Host, Port)).
 1457
 1458parts_request_uri(Parts, RequestURI) :-
 1459    option(request_uri(RequestURI), Parts),
 1460    !.
 1461parts_request_uri(Parts, RequestURI) :-
 1462    url_part(path(Path), Parts, /),
 1463    ignore(parts_search(Parts, Search)),
 1464    uri_data(path, Data, Path),
 1465    uri_data(search, Data, Search),
 1466    uri_components(RequestURI, Data).
 1467
 1468parts_search(Parts, Search) :-
 1469    option(query_string(Search), Parts),
 1470    !.
 1471parts_search(Parts, Search) :-
 1472    option(search(Fields), Parts),
 1473    !,
 1474    uri_query_components(Search, Fields).
 1475
 1476
 1477parts_uri(Parts, URI) :-
 1478    option(uri(URI), Parts),
 1479    !.
 1480parts_uri(Parts, URI) :-
 1481    parts_scheme(Parts, Scheme),
 1482    ignore(parts_authority(Parts, Auth)),
 1483    parts_request_uri(Parts, RequestURI),
 1484    uri_components(RequestURI, Data),
 1485    uri_data(scheme, Data, Scheme),
 1486    uri_data(authority, Data, Auth),
 1487    uri_components(URI, Data).
 1488
 1489parts_port(Parts, Port) :-
 1490    parts_scheme(Parts, Scheme),
 1491    default_port(Scheme, DefPort),
 1492    url_part(port(Port), Parts, DefPort).
 1493
 1494url_part(Part, Parts) :-
 1495    Part =.. [Name,Value],
 1496    Gen =.. [Name,RawValue],
 1497    option(Gen, Parts),
 1498    !,
 1499    Value = RawValue.
 1500
 1501url_part(Part, Parts, Default) :-
 1502    Part =.. [Name,Value],
 1503    Gen =.. [Name,RawValue],
 1504    (   option(Gen, Parts)
 1505    ->  Value = RawValue
 1506    ;   Value = Default
 1507    ).
 1508
 1509
 1510                 /*******************************
 1511                 *            COOKIES           *
 1512                 *******************************/
 1513
 1514write_cookies(Out, Parts, Options) :-
 1515    http:write_cookies(Out, Parts, Options),
 1516    !.
 1517write_cookies(_, _, _).
 1518
 1519update_cookies(_, _, _) :-
 1520    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1521    !.
 1522update_cookies(Lines, Parts, Options) :-
 1523    (   member(Line, Lines),
 1524        phrase(atom_field('set_cookie', CookieData), Line),
 1525        http:update_cookies(CookieData, Parts, Options),
 1526        fail
 1527    ;   true
 1528    ).
 1529
 1530
 1531                 /*******************************
 1532                 *           OPEN ANY           *
 1533                 *******************************/
 1534
 1535:- multifile iostream:open_hook/6. 1536
 1537%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1538%!                     +Options0, -Options) is semidet.
 1539%
 1540%   Hook implementation that makes  open_any/5   support  =http= and
 1541%   =https= URLs for =|Mode == read|=.
 1542
 1543iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1544    (atom(URL) -> true ; string(URL)),
 1545    uri_is_global(URL),
 1546    uri_components(URL, Components),
 1547    uri_data(scheme, Components, Scheme),
 1548    http_scheme(Scheme),
 1549    !,
 1550    Options = Options0,
 1551    Close = close(Stream),
 1552    http_open(URL, Stream, Options0).
 1553
 1554http_scheme(http).
 1555http_scheme(https).
 1556
 1557
 1558                 /*******************************
 1559                 *          KEEP-ALIVE          *
 1560                 *******************************/
 1561
 1562%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1563%!                      +Stream0, -Stream,
 1564%!                      +Options) is det.
 1565%
 1566%   If we have agree on a Keep-alive   connection, return a range stream
 1567%   rather than the original stream. We also  use the content length and
 1568%   a range stream if we are dealing   with an HTTPS connection. This is
 1569%   because not all servers seem to  complete the TLS closing handshake.
 1570%   If the server does not complete  this   we  receive  a TLS handshake
 1571%   error on end-of-file, causing the read to fail.
 1572
 1573consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1574    option(connection(Asked), Options),
 1575    keep_alive(Asked),
 1576    connection(Lines, Given),
 1577    keep_alive(Given),
 1578    content_length(Lines, Bytes),
 1579    !,
 1580    stream_pair(StreamPair, In0, _),
 1581    connection_address(Host, Parts, HostPort),
 1582    debug(http(keep_alive),
 1583          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1584    stream_range_open(In0, In,
 1585                      [ size(Bytes),
 1586                        onclose(keep_alive(StreamPair, HostPort))
 1587                      ]).
 1588consider_keep_alive(Lines, Parts, _Host, StreamPair, In, _Options) :-
 1589    memberchk(scheme(https), Parts),
 1590    content_length(Lines, Bytes),
 1591    !,
 1592    stream_pair(StreamPair, In0, _),
 1593    stream_range_open(In0, In,
 1594                      [ size(Bytes),
 1595                        onclose(close_range(StreamPair))
 1596                      ]).
 1597consider_keep_alive(_, _, _, Stream, Stream, _).
 1598
 1599connection_address(Host, _, Host) :-
 1600    Host = _:_,
 1601    !.
 1602connection_address(Host, Parts, Host:Port) :-
 1603    parts_port(Parts, Port).
 1604
 1605keep_alive(keep_alive) :- !.
 1606keep_alive(Connection) :-
 1607    downcase_atom(Connection, 'keep-alive').
 1608
 1609%!  keep_alive(+StreamPair, +Host, +In, -Left) is det.
 1610%
 1611%   Callback when closing the range stream   used to process the content
 1612%   of the reply. This callback makes   the  stream available for future
 1613%   keep-alive connections or closes the stream. The stream is closed if
 1614%
 1615%     - There are too many bytes left unprocessed in the range stream.
 1616%     - There are too many pooled connections.
 1617
 1618:- public keep_alive/4. 1619:- det(keep_alive/4). 1620
 1621keep_alive(StreamPair, Host, _In, 0) :-
 1622    !,
 1623    add_to_pool_or_close(Host, StreamPair).
 1624keep_alive(StreamPair, Host, In, Left) :-
 1625    (   Left < 100,
 1626        debug(http(connection), 'Reading ~D left bytes', [Left]),
 1627        read_incomplete(In, Left)
 1628    ->  add_to_pool_or_close(Host, StreamPair)
 1629    ;   debug(http(connection),
 1630              'Closing connection due to excessive unprocessed input', []),
 1631        close_keep_alive(StreamPair)
 1632    ).
 1633
 1634add_to_pool_or_close(Host, StreamPair) :-
 1635    add_to_pool(Host, StreamPair),
 1636    !,
 1637    debug(http(connection), 'Added connection to ~p to pool', [Host]).
 1638add_to_pool_or_close(Host, StreamPair) :-
 1639    close_keep_alive(StreamPair),
 1640    debug(http(connection), 'Closed connection to ~p [pool full]', [Host]).
 1641
 1642close_keep_alive(StreamPair) :-
 1643    (   debugging(http(connection))
 1644    ->  catch(close(StreamPair), E,
 1645              print_message(warning, E))
 1646    ;   close(StreamPair, [force(true)])
 1647    ).
 1648
 1649:- public close_range/3. 1650close_range(StreamPair, _Raw, _BytesLeft) :-
 1651    close(StreamPair, [force(true)]).
 1652
 1653%!  read_incomplete(+In, +Left) is semidet.
 1654%
 1655%   If we have not all input from  a Keep-alive connection, read the
 1656%   remainder if it is short. Else, we fail and close the stream.
 1657
 1658read_incomplete(In, Left) :-
 1659    catch(setup_call_cleanup(
 1660              open_null_stream(Null),
 1661              copy_stream_data(In, Null, Left),
 1662              close(Null)),
 1663          error(_,_),
 1664          fail).
 1665
 1666:- dynamic
 1667    connection_pool/4,              % Hash, Address, Stream, Time
 1668    connection_gc_time/1. 1669
 1670%!  add_to_pool(+Address, +StreamPair) is semidet.
 1671%
 1672%   Add a connection  to  the  keep-alive   pool  after  completing  the
 1673%   interaction. Fails if there are already  too many connections in the
 1674%   pool.
 1675
 1676add_to_pool(Address, StreamPair) :-
 1677    keep_connection(Address),
 1678    get_time(Now),
 1679    term_hash(Address, Hash),
 1680    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1681
 1682%!  get_from_pool(+Address, -StreamPair) is nondet.
 1683%
 1684%   Get an existing Keep-Alive connection to  Address as StreamPair. The
 1685%   caller relies on non-determinism of this   predicate  to try another
 1686%   connection if the returned one is  already   closed  by the peer. We
 1687%   cannot rely on the non-determinism of retract/1 as that respects the
 1688%   _logical update view_. Therefore, we must   use retract/1 and commit
 1689%   as retract/1 guarantees that  the  first   retracted  clause  is not
 1690%   already retracted.
 1691
 1692get_from_pool(Address, StreamPair) :-
 1693    term_hash(Address, Hash),
 1694    repeat,
 1695    (   retract(connection_pool(Hash, Address, StreamPair, _))
 1696    ->  true
 1697    ;   !,
 1698        fail
 1699    ).
 1700
 1701%!  keep_connection(+Address) is semidet.
 1702%
 1703%   Succeeds if we want to keep the connection open. We currently keep a
 1704%   maximum of `http:max_keep_alive_connections` connections waiting and
 1705%   a maximum of `http:max_keep_alive_host_connections`  waiting for the
 1706%   same  address.  Connections  older  than  `http:max_keep_alive_idle`
 1707%   seconds are closed.
 1708
 1709keep_connection(Address) :-
 1710    setting(http:max_keep_alive_idle, Time),
 1711    close_old_connections(Time),
 1712    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1713    setting(http:max_keep_alive_connections, MaxConnections),
 1714    C =< MaxConnections,
 1715    term_hash(Address, Hash),
 1716    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1717    setting(http:max_keep_alive_host_connections, MaxHostConnections),
 1718    Count =< MaxHostConnections.
 1719
 1720close_old_connections(Timeout) :-
 1721    get_time(Now),
 1722    Before is Now - Timeout,
 1723    (   connection_gc_time(GC),
 1724        GC > Before
 1725    ->  true
 1726    ;   (   retractall(connection_gc_time(_)),
 1727            asserta(connection_gc_time(Now)),
 1728            connection_pool(Hash, Address, StreamPair, Added),
 1729            Added < Before,
 1730            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1731            debug(http(connection),
 1732                  'Closing inactive keep-alive to ~p', [Address]),
 1733            close(StreamPair, [force(true)]),
 1734            fail
 1735        ;   true
 1736        )
 1737    ).
 1738
 1739
 1740%!  http_close_keep_alive(+Address) is det.
 1741%
 1742%   Close all keep-alive connections matching Address. Address is of
 1743%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1744%   closes all currently known keep-alive connections.
 1745
 1746http_close_keep_alive(Address) :-
 1747    forall(get_from_pool(Address, StreamPair),
 1748           close(StreamPair, [force(true)])).
 1749
 1750%!  keep_alive_error(+Error, +StreamPair)
 1751%
 1752%   Deal with an error from  reusing   a  keep-alive  connection. If the
 1753%   error is due to an I/O error  or end-of-file, fail to backtrack over
 1754%   get_from_pool/2. Otherwise it is a real   error and we thus re-raise
 1755%   it. In all cases we close StreamPair rather than returning it to the
 1756%   pool as we may have done a partial read and thus be out of sync wrt.
 1757%   the HTTP protocol.
 1758
 1759keep_alive_error(error(keep_alive(closed), _), _) :-
 1760    !,
 1761    debug(http(connection), 'Keep-alive connection was closed', []),
 1762    fail.
 1763keep_alive_error(error(io_error(_,_), _), StreamPair) :-
 1764    !,
 1765    close(StreamPair, [force(true)]),
 1766    debug(http(connection), 'IO error on Keep-alive connection', []),
 1767    fail.
 1768keep_alive_error(error(existence_error(http_reply, _URL), _), _) :-
 1769    !,
 1770    debug(http(connection), 'Got empty reply on Keep-alive connection', []),
 1771    fail.
 1772keep_alive_error(Error, StreamPair) :-
 1773    close(StreamPair, [force(true)]),
 1774    throw(Error).
 1775
 1776
 1777                 /*******************************
 1778                 *     HOOK DOCUMENTATION       *
 1779                 *******************************/
 1780
 1781%!  http:open_options(+Parts, -Options) is nondet.
 1782%
 1783%   This hook is used by the HTTP   client library to define default
 1784%   options based on the the broken-down request-URL.  The following
 1785%   example redirects all traffic, except for localhost over a proxy:
 1786%
 1787%       ```
 1788%       :- multifile
 1789%           http:open_options/2.
 1790%
 1791%       http:open_options(Parts, Options) :-
 1792%           option(host(Host), Parts),
 1793%           Host \== localhost,
 1794%           Options = [proxy('proxy.local', 3128)].
 1795%       ```
 1796%
 1797%   This hook may return multiple   solutions.  The returned options
 1798%   are  combined  using  merge_options/3  where  earlier  solutions
 1799%   overrule later solutions.
 1800
 1801%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1802%
 1803%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1804%   open stream to the HTTP server, Parts is the broken-down request
 1805%   (see uri_components/2) and Options is the list of options passed
 1806%   to http_open.  The predicate is called as if using ignore/1.
 1807%
 1808%   @see complements http:update_cookies/3.
 1809%   @see library(http/http_cookie) implements cookie handling on
 1810%   top of these hooks.
 1811
 1812%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1813%
 1814%   Update the cookie database.  CookieData  is   the  value  of the
 1815%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1816%   uri_components/2) and Options is the list   of options passed to
 1817%   http_open.
 1818%
 1819%   @see complements http:write_cookies
 1820%   @see library(http/http_cookies) implements cookie handling on
 1821%   top of these hooks.