1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2023, VU University Amsterdam 7 SWI-Prolog Solutions b.v. 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(uri, 37 [ uri_components/2, % ?URI, ?Components 38 uri_data/3, % ?Field, +Components, ?Data 39 uri_data/4, % +Field, +Components, -Data, -New 40 uri_edit/3, % +Actions,+URI0,-URI 41 42 uri_normalized/2, % +URI, -NormalizedURI 43 iri_normalized/2, % +IRI, -NormalizedIRI 44 uri_normalized_iri/2, % +URI, -NormalizedIRI 45 uri_normalized/3, % +URI, +Base, -NormalizedURI 46 iri_normalized/3, % +IRI, +Base, -NormalizedIRI 47 uri_normalized_iri/3, % +URI, +Base, -NormalizedIRI 48 uri_resolve/3, % +URI, +Base, -AbsURI 49 uri_is_global/1, % +URI 50 uri_query_components/2, % ?QueryString, ?NameValueList 51 uri_authority_components/2, % ?Authority, ?Components 52 uri_authority_data/3, % ?Field, ?Components, ?Data 53 % Encoding 54 uri_encoded/3, % +Component, ?Value, ?Encoded 55 uri_file_name/2, % ?URI, ?Path 56 uri_iri/2 % ?URI, ?IRI 57 ]). 58:- autoload(library(error), [domain_error/2]). 59:- if(exists_source(library(socket))). 60:- autoload(library(socket), [gethostname/1]). 61:- endif. 62 63:- use_foreign_library(foreign(uri)).
^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 12 3 4 5 6 7 8 9
scheme
, authority
, path
, search
and fragment
106uri_data(scheme, uri_components(S, _, _, _, _), S). 107uri_data(authority, uri_components(_, A, _, _, _), A). 108uri_data(path, uri_components(_, _, P, _, _), P). 109uri_data(search, uri_components(_, _, _, S, _), S). 110uri_data(fragment, uri_components(_, _, _, _, F), F).
116uri_data(scheme, uri_components(_, A, P, Q, F), S, 117 uri_components(S, A, P, Q, F)). 118uri_data(authority, uri_components(S, _, P, Q, F), A, 119 uri_components(S, A, P, Q, F)). 120uri_data(path, uri_components(S, A, _, Q, F), P, 121 uri_components(S, A, P, Q, F)). 122uri_data(search, uri_components(S, A, P, _, F), Q, 123 uri_components(S, A, P, Q, F)). 124uri_data(fragment, uri_components(S, A, P, Q, _), F, 125 uri_components(S, A, P, Q, F)).
uri_is_global(URI) :- uri_components(URI, Components), uri_data(scheme, Components, Scheme), nonvar(Scheme), atom_length(Scheme, Len), Len > 1.
uri_normalized(URI, Base, NormalizedGlobalURI) :- uri_resolve(URI, Base, GlobalURI), uri_normalized(GlobalURI, NormalizedGlobalURI).
uri_normalized(URI, Base, NormalizedGlobalIRI) :- uri_resolve(URI, Base, GlobalURI), uri_normalized_iri(GlobalURI, NormalizedGlobalIRI).
?- uri_query_components(QS, [a=b, c('d+w'), n-'VU Amsterdam']). QS = 'a=b&c=d%2Bw&n=VU%20Amsterdam'. ?- uri_query_components('a=b&c=d%2Bw&n=VU%20Amsterdam', Q). Q = [a=b, c='d+w', n='VU Amsterdam'].
[ip]
,
returning the ip as host
, without the enclosing []
. When
constructing an authority string and the host contains :
, the
host is embraced in []
. If []
is not used correctly, the
behavior should be considered poorly defined. If there is no
balancing `]` or the host part does not end with `]`, these
characters are considered normal characters and part of the
(invalid) host name.user
, password
, host
and port
246uri_authority_data(user, uri_authority(U, _, _, _), U). 247uri_authority_data(password, uri_authority(_, P, _, _), P). 248uri_authority_data(host, uri_authority(_, _, H, _), H). 249uri_authority_data(port, uri_authority(_, _, _, P), P).
query_value
, fragment
, path
or
segment
. Besides alphanumerical characters, the following
characters are passed verbatim (the set is split in logical groups
according to RFC3986).
288uri_file_name(URI, FileName) :- 289 nonvar(URI), 290 !, 291 uri_components(URI, Components), 292 uri_data(scheme, Components, File), File == file, 293 uri_data(authority, Components, Host), 294 my_host(Host), 295 uri_data(path, Components, FileNameEnc), 296 uri_encoded(path, FileName0, FileNameEnc), 297 delete_leading_slash(FileName0, FileName). 298uri_file_name(URI, FileName) :- 299 nonvar(FileName), 300 !, 301 absolute_file_name(FileName, Path0), 302 ensure_leading_slash(Path0, Path), 303 uri_encoded(path, Path, PathEnc), 304 uri_data(scheme, Components, file), 305 uri_data(authority, Components, ''), 306 uri_data(path, Components, PathEnc), 307 uri_components(URI, Components). 308 309my_host('') :- !. 310my_host(localhost) :- !. 311:- if(exists_source(library(socket))). 312my_host(Host) :- 313 gethostname(Host). 314:- endif.
323ensure_leading_slash(Path, SlashPath) :- 324 ( sub_atom(Path, 0, _, _, /) 325 -> SlashPath = Path 326 ; atom_concat(/, Path, SlashPath) 327 ). 328 329:- if(current_prolog_flag(windows, true)). 330delete_leading_slash(Path, WinPath) :- 331 atom_concat(/, WinPath, Path), 332 is_absolute_file_name(WinPath), 333 !. 334:- endif. 335delete_leading_slash(Path, Path). 336 337 338 /******************************* 339 * MODIFYING * 340 *******************************/
http
, https
, etc.)path
component. If Path is not absolute it
is taken relative to the path of URI0.Key=Value
pairs of the current search (query)
component. New values replace existing values. If KeyValues
is written as =(KeyValues) the current search component is
ignored. KeyValues is a list, whose elements are one of
Key=Value
, Key-Value
or `Key(Value)`.
Components can be removed by using a variable as value, except
from path
which can be reset using path(/)
and query which can
be dropped using query(=([]))
.
376uri_edit(Actions, URI0, URI) :- 377 ( var(URI0) 378 -> URI1 = '/' 379 ; URI1 = URI0 380 ), 381 uri_components(URI1, Comp0), 382 edit_components(Actions, Comp0, Comp), 383 uri_components(URI, Comp). 384 385edit_components([], Comp0, Comp) => 386 Comp = Comp0. 387edit_components([H|T], Comp0, Comp) => 388 edit_components(H, Comp0, Comp1), 389 edit_components(T, Comp1, Comp). 390edit_components(scheme(Scheme), Comp0, Comp) => 391 uri_data(scheme, Comp0, Scheme, Comp). 392edit_components(path(Path), Comp0, Comp) => 393 uri_data(path, Comp0, Path0), 394 ( ( var(Path0) 395 ; Path0 == '' 396 ) 397 -> Path1 = '/' 398 ; Path1 = Path0 399 ), 400 uri_normalized(Path, Path1, Path2), 401 uri_data(path, Comp0, Path2, Comp). 402edit_components(fragment(Fragment), Comp0, Comp) => 403 uri_data(fragment, Comp0, Fragment, Comp). 404edit_components(Authority, Comp0, Comp), 405 authority_field(Authority) => 406 uri_data(authority, Comp0, Auth0), 407 ( var(Auth0) 408 -> true 409 ; uri_authority_components(Auth0, AComp0) 410 ), 411 edit_auth_components(Authority, AComp0, AComp), 412 uri_authority_components(Auth, AComp), 413 uri_data(authority, Comp0, Auth, Comp). 414edit_components(query(Search), Comp0, Comp) => 415 edit_components(search(Search), Comp0, Comp). 416edit_components(search(=(Search)), Comp0, Comp) => 417 uri_query_components(String, Search), 418 uri_data(search, Comp0, String, Comp). 419edit_components(search(Search), Comp0, Comp) => 420 uri_data(search, Comp0, SS0), 421 ( var(SS0) 422 -> Search0 = [] 423 ; uri_query_components(SS0, Search0) 424 ), 425 join_search(Search0, Search, Search1), 426 uri_query_components(SS1, Search1), 427 uri_data(search, Comp0, SS1, Comp). 428edit_components(Other, _, _) => 429 domain_error(uri_edit, Other). 430 user(_)) (. 432authority_field(password(_)). 433authority_field(host(_)). 434authority_field(port(_)). 435 436edit_auth_components(user(User), 437 uri_authority(_, Passwd, Host, Port), 438 uri_authority(User, Passwd, Host, Port)). 439edit_auth_components(password(Passwd), 440 uri_authority(User, _, Host, Port), 441 uri_authority(User, Passwd, Host, Port)). 442edit_auth_components(host(Host), 443 uri_authority(User, Passwd, _, Port), 444 uri_authority(User, Passwd, Host, Port)). 445edit_auth_components(port(Port), 446 uri_authority(User, Passwd, Host, _), 447 uri_authority(User, Passwd, Host, Port)). 448 449join_search([], Search, Search). 450join_search([N=_|ST], New, Search) :- 451 ( memberchk(N=_, New) 452 -> true 453 ; functor(T, N, 1), 454 memberchk(T, New) 455 -> true 456 ; memberchk(N-_, New) 457 ), 458 !, 459 join_search(ST, New, Search). 460join_search([H|ST], New, [H|Search]) :- 461 join_search(ST, New, Search). 462 463 464 /******************************* 465 * SANDBOX * 466 *******************************/ 467 468:- multifile sandbox:safe_primitive/1. 469 470sandbox:safe_primitive(uri:uri_components(_,_)). 471sandbox:safe_primitive(uri:uri_normalized(_,_)). 472sandbox:safe_primitive(uri:iri_normalized(_,_)). 473sandbox:safe_primitive(uri:uri_normalized_iri(_,_)). 474sandbox:safe_primitive(uri:uri_normalized(_,_,_)). 475sandbox:safe_primitive(uri:iri_normalized(_,_,_)). 476sandbox:safe_primitive(uri:uri_normalized_iri(_,_,_)). 477sandbox:safe_primitive(uri:uri_resolve(_,_,_)). 478sandbox:safe_primitive(uri:uri_is_global(_)). 479sandbox:safe_primitive(uri:uri_query_components(_,_)). 480sandbox:safe_primitive(uri:uri_authority_components(_,_)). 481sandbox:safe_primitive(uri:uri_encoded(_,_,_)). 482sandbox:safe_primitive(uri:uri_iri(_,_))
Process URIs
This library provides high-performance C-based primitives for manipulating URIs. We decided for a C-based implementation for the much better performance on raw character manipulation. Notably, URI handling primitives are used in time-critical parts of RDF processing. This implementation is based on RFC-3986:
The URI processing in this library is rather liberal. That is, we break URIs according to the rules, but we do not validate that the components are valid. Also, percent-decoding for IRIs is liberal. It first tries UTF-8; then ISO-Latin-1 and finally accepts %-characters verbatim.
Earlier experience has shown that strict enforcement of the URI syntax results in many errors that are accepted by many other web-document processing tools. */