1:- encoding(utf8).
    2:- module(
    3  rdf_deref,
    4  [
    5    rdf_deref_file/2,   % +FileSpec, :Goal_3
    6    rdf_deref_file/3,   % +FileSpec, :Goal_3, +Options
    7    rdf_deref_stream/3, % +Uri, +In, :Goal_3
    8    rdf_deref_stream/4, % +Uri, +In, :Goal_3, +Options
    9    rdf_deref_uri/2,    % +Uri, :Goal_3
   10    rdf_deref_uri/3     % +Uri, :Goal_3, +Options
   11  ]
   12).

RDF dereference

*/

   18:- use_module(library(aggregate)).   19:- use_module(library(error)).   20:- use_module(library(lists)).   21:- use_module(library(semweb/rdf_ntriples), []).   22:- use_module(library(semweb/rdfa), []).   23:- use_module(library(semweb/turtle), []).   24:- use_module(library(yall)).   25
   26:- use_module(library(archive_ext)).   27:- use_module(library(atom_ext)).   28:- use_module(library(dict)).   29:- use_module(library(file_ext)).   30:- use_module(library(hash_ext)).   31:- use_module(library(http_client2)).   32:- use_module(library(media_type)).   33:- use_module(library(stream_ext)).   34:- use_module(library(rdf_guess)).   35:- use_module(library(rdf_prefix)).   36:- use_module(library(rdf_term)).   37:- use_module(library(uri_ext)).   38
   39:- meta_predicate
   40    rdf_deref_file(+, 3),
   41    rdf_deref_file(+, 3, +),
   42    rdf_deref_stream(+, +, 3),
   43    rdf_deref_stream(+, +, 3, +),
   44    rdf_deref_uri(+, 3),
   45    rdf_deref_uri(+, 3, +).   46
   47:- rdf_meta
   48   rdf_deref_uri(r, :),   rdf_deref_uri(r, :, +).
 rdf_deref_file(+FileSpec:term, :Goal_3) is det
 rdf_deref_file(+FileSpec:term, :Goal_3, +Options:options) is det
Arguments:
Options- allows the following options to be set.
  • base_iri(+atom) By default, the base IRI is the file URI.
  • Other options are passed to rdf_defer_stream/4.
  • Tries to set the ‘media_type’ option based on the file name, if this option is not explicitly specified by the caller.
   68rdf_deref_file(Spec, Goal_3) :-
   69  rdf_deref_file(Spec, Goal_3, options{}).
   70
   71
   72rdf_deref_file(Spec, Goal_3, Options1) :-
   73  absolute_file_name(Spec, File, [access(read)]),
   74  uri_file_name(BaseIri, File),
   75  merge_dicts(Options1, options{base_iri: BaseIri}, Options2),
   76  read_from_file(
   77    File,
   78    {BaseIri,Goal_3,Options2}/[In0]>>
   79      rdf_deref_stream(BaseIri, In0, Goal_3, Options2)
   80  ).
 rdf_deref_stream(+BaseIri:iri, +In:istream, :Goal_3) is det
 rdf_deref_stream(+BaseIri:iri, +In:istream, :Goal_3, +Options:options) is det
The following call will be made:
call(:Goal_3, +BaseIri:iri, +Tuples:list(rdf_tuple), ?GraphName:rdf_graph_name)

The following options are supported:

bnode_prefix(+atom)
The default is a well-known IRI as per RDF 1.1.
content_type(+MediaType:media_type)
The parsed value of the HTTP ‘Content-Type’ header, if any.
media_type(+MediaType:media_type)
Overrules the RDF serialization format.
  107rdf_deref_stream(BaseIri, In, Goal_3) :-
  108  rdf_deref_stream(BaseIri, In, Goal_3, options{}).
  109
  110
  111rdf_deref_stream(BaseIri, In1, Mod:Goal_3, Options1) :-
  112  % BUG: https://github.com/SWI-Prolog/swipl-devel/issues/765
  113  %archive_stream(In1, In2),
  114  In2 = In1,
  115
  116  % Determine the serialization format.
  117  (   % An explicitly specified Media Type overrules everything else.
  118      options{media_type: MediaType} :< Options1
  119  ->  true
  120  ;   % Heuristic 1: guess based on a first chunk of the data.
  121      rdf_guess_stream(In2, 10 000, MediaTypeGuess),
  122      % Heuristic 2: the value of the HTTP Content-Type header.
  123      ignore(option{content_type: MediaTypeHttp} :< Options1),
  124      % Heuristic 3: the URI path's file name extension.
  125      ignore(uri_media_type(BaseIri, MediaTypeIri)),
  126      (   nonvar(MediaTypeHttp),
  127          \+ 'rdf_media_type_>'(MediaTypeGuess, MediaTypeHttp)
  128      ->  print_message(warning, inconsistent_media_types(http(MediaTypeHttp),guess(MediaTypeGuess)))
  129      ;   true
  130      ),
  131      (   nonvar(MediaTypeIri),
  132          \+ 'rdf_media_type_>'(MediaTypeIri, MediaTypeHttp)
  133      ->  print_message(warning, inconsistent_media_types(MediaType,MediaTypeIri))
  134      ;   true
  135      ),
  136      (   nonvar(MediaTypeHttp)
  137      ->  MediaType = MediaTypeHttp
  138      ;   nonvar(MediaTypeIri)
  139      ->  MediaType = MediaTypeIri
  140      ;   MediaType = MediaTypeGuess
  141      )
  142  ),
  143  Goal_3 =.. [Pred|Args1],
  144  append(Args1, [BaseIri], Args2),
  145  Goal_2 =.. [Pred|Args2],
  146  % Use a well-known IRI with a UUID as blank node prefix.  The UUID
  147  % is determined by the base IRI seed.
  148  md5(BaseIri, Hash),
  149  well_known_iri([Hash], BNodePrefix),
  150  % Parse according to the guessed Media Type.
  151  (   % N-Quads
  152      MediaType = media(application/'n-quads',_)
  153  ->  merge_dicts(
  154        otpions{anon_prefix: BNodePrefix, base_iri: BaseIri, format: nquads},
  155        Options1,
  156        Options2
  157      ),
  158      dict_terms(Options2, Options3),
  159      rdf_ntriples:rdf_process_ntriples(In2, Mod:Goal_2, Options3)
  160  ;   % N-Triples
  161      MediaType = media(application/'n-triples',_)
  162  ->  merge_dicts(
  163        options{anon_prefix: BNodePrefix, base_iri: BaseIri, format: ntriples},
  164        Options1,
  165        Options2
  166      ),
  167      dict_terms(Options2, Options3),
  168      rdf_ntriples:rdf_process_ntriples(In2, Mod:Goal_2, Options3)
  169  ;   % RDF/XML
  170      MediaType = media(application/'rdf+xml',_)
  171  ->  merge_dicts(options{base_iri: BaseIri, max_errors: -1}, Options1, Options2),
  172      dict_terms(Options2, Options3),
  173      rdf:process_rdf(In2, Mod:Goal_2, Options3)
  174  ;   % TriG
  175      MediaType = media(application/trig,_)
  176  ->  merge_dicts(
  177        options{
  178          anon_prefix: BNodePrefix,
  179          base_iri: BaseIri,
  180          format: trig,
  181          resources: iri
  182        },
  183        Options1,
  184        Options2
  185      ),
  186      dict_change_keys(Options2, [base_iri-base_uri], Options3),
  187      dict_terms(Options3, Options4),
  188      turtle:rdf_process_turtle(In2, Mod:Goal_2, Options4)
  189  ;   % Turtle
  190      MediaType = media(text/turtle,_)
  191  ->  merge_dicts(
  192        options{
  193          anon_prefix: BNodePrefix,
  194          base_iri: BaseIri,
  195          format: turtle,
  196          resources: iri
  197        },
  198        Options1,
  199        Options2
  200      ),
  201      dict_terms(Options2, Options3),
  202      turtle:rdf_process_turtle(In2, Mod:Goal_2, Options3)
  203  ;   % RDFa
  204      memberchk(MediaType, [media(application/'xhtml+xml',_),media(text/html,_)])
  205  ->  merge_dicts(
  206        options{anon_prefix: BNodePrefix, base: BaseIri, max_errors: -1},
  207        Options1,
  208        Options2
  209      ),
  210      dict_terms(Options2, Options3),
  211      rdfa:read_rdfa(In2, Triples, Options3),
  212      call(Mod:Goal_2, Triples, _)
  213  %;   % JSON-LD
  214  %    memberchk(MediaType, [media(application/'ld+json',_)])
  215  %->  read_jsonld(In2, Triples),
  216  %    call(Mod:Goal_2, Triples, _)
  217  ;   % An unsupported Media Type.
  218      print_message(warning, unsupported_format(MediaType,_))
  219  ).
 rdf_deref_uri(+Uri:uri, :Goal_3) is det
 rdf_deref_uri(+Uri:uri, :Goal_3, +Options:options) is det
Arguments:
Options- The following options are supported:
accept(+MediaTypes:list(compound))
The value of the HTTP Accept header, from high to low precedence. The default value is a list of all and only standardized Media Types.
  236rdf_deref_uri(Uri, Goal_3) :-
  237  rdf_deref_uri(Uri, Goal_3, options{}).
  238
  239
  240rdf_deref_uri(Uri, Goal_3, Options1) :-
  241  % ‘Accept’ header
  242  (   options{accept: MediaTypes} :< Options1
  243  ->  true
  244  ;   aggregate_all(set(MediaType0), media_type_family(MediaType0, rdf), MediaTypes)
  245  ),
  246  http_open2(Uri, In, options{accept: MediaTypes, failure: 404, metadata: Metas}),
  247  call_cleanup(
  248    (
  249      (   http_metadata_content_type(Metas, MediaType)
  250      ->  merge_dicts(Options1, options{content_type: MediaType}, Options2)
  251      ;   Options2 = Options1
  252      ),
  253      rdf_deref_stream(Uri, In, Goal_3, Options2)
  254    ),
  255    close(In)
  256  ).
  257
  258
  259
  260% GENERICS %
 rdf_media_type_>(+SuperMediaType:media_type, +SubMediaType:media_type) is semidet
Strict ordering over RDF Media Types.

An RDF Media Type A is greater than an RDF Media Type B if all valid documents in B are also valid documents in A, and there are some documents that are valid in A that are not valid in B.

  270'rdf_media_type_>'(X, Y) :-
  271  'rdf_media_type_='(X, Y), !.
  272'rdf_media_type_>'(X, Z) :-
  273  'rdf_media_type_strict>'(X, Y),
  274  'rdf_media_type_>'(Y, Z).
  275
  276'rdf_media_type_='(media(Supertype/Subtype,_),  media(Supertype/Subtype,_)).
  277
  278'rdf_media_type_strict>'(media(application/trig,_), media(text/turtle,_)).
  279'rdf_media_type_strict>'(
  280  media(text/turtle,_),
  281  media(application/'n-triples',_)
  282).
  283'rdf_media_type_strict>'(
  284  media(application/'n-quads',_),
  285  media(application/'n-triples',_)
  286)