View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_ntriples,
   36          [ rdf_read_ntriples/3,        % +Input, -Triples, +Options
   37            rdf_read_nquads/3,          % +Input, -Quads, +Options
   38            rdf_process_ntriples/3,     % +Input, :CallBack, +Options
   39
   40            read_ntriple/2,             % +Stream, -Triple
   41            read_nquad/2,               % +Stream, -Quad
   42            read_ntuple/2               % +Stream, -TripleOrQuad
   43          ]).   44:- if(exists_source(library(semweb/rdf_db))).   45:- use_module(library(semweb/rdf_db),
   46              [rdf_transaction/2,rdf_set_graph/2,rdf_assert/4]).   47:- endif.   48:- use_module(library(record),[(record)/1, op(_,_,record)]).   49
   50:- autoload(library(error),[domain_error/2]).   51:- autoload(library(memfile),
   52	    [atom_to_memory_file/2,open_memory_file/4]).   53:- autoload(library(option),[option/3,option/2]).   54:- autoload(library(uri),
   55	    [uri_file_name/2,uri_is_global/1,uri_normalized/2]).   56:- autoload(library(http/http_open),[http_open/3]).   57
   58:- use_foreign_library(foreign(ntriples)).   59
   60/** <module> Process files in the RDF N-Triples format
   61
   62The library(semweb/rdf_ntriples) provides a  fast   reader  for  the RDF
   63N-Triples and N-Quads format. N-Triples is   a simple format, originally
   64used to support the W3C RDF  test   suites.  The current format has been
   65extended   and   is   a   subset    of     the    Turtle   format   (see
   66library(semweb/turtle)).
   67
   68The API of this library is   almost identical to library(semweb/turtle).
   69This module provides a plugin  into   rdf_load/2,  making this predicate
   70support the format =ntriples= and =nquads=.
   71
   72@see http://www.w3.org/TR/n-triples/
   73@tbd Sync with RDF 1.1. specification.
   74*/
   75
   76:- predicate_options(rdf_read_ntriples/3, 3,
   77                     [ anon_prefix(any), % atom or node(_)
   78                       base_uri(atom),
   79                       error_count(-integer),
   80                       on_error(oneof([warning,error]))
   81                     ]).   82:- predicate_options(rdf_read_nquads/3, 3,
   83                     [ anon_prefix(any), % atom or node(_)
   84                       base_uri(atom),
   85                       error_count(-integer),
   86                       on_error(oneof([warning,error])),
   87                       graph(atom)
   88                     ]).   89:- predicate_options(rdf_process_ntriples/3, 3,
   90                     [ graph(atom),
   91                       pass_to(rdf_read_ntriples/3, 3)
   92                     ]).   93
   94:- meta_predicate
   95    rdf_process_ntriples(+,2,+).   96
   97
   98%!  read_ntriple(+Stream, -Triple) is det.
   99%
  100%   Read the next triple from Stream as Triple. Stream must have UTF-8
  101%   encoding.
  102%
  103%   @param  Triple is a term triple(Subject,Predicate,Object).
  104%           Arguments follow the normal conventions of the RDF
  105%           libraries.  NodeID elements are mapped to node(Id).
  106%           If end-of-file is reached, Triple is unified with
  107%           =end_of_file=.
  108%   @error  syntax_error(Message) on syntax errors
  109
  110%!  read_nquad(+Stream, -Quad) is det.
  111%
  112%   Read the next quad from Stream as Quad.  Stream must have UTF-8
  113%   encoding.
  114%
  115%   @param  Quad is a term quad(Subject,Predicate,Object,Graph).
  116%           Arguments follow the normal conventions of the RDF
  117%           libraries.  NodeID elements are mapped to node(Id).
  118%           If end-of-file is reached, Quad is unified with
  119%           =end_of_file=.
  120%   @error  syntax_error(Message) on syntax errors
  121
  122%!  read_ntuple(+Stream, -Tuple) is det.
  123%
  124%   Read the next triple or quad from  Stream as Tuple. Tuple is one
  125%   of the terms below.  See   read_ntriple/2  and  read_nquad/2 for
  126%   details.
  127%
  128%     - triple(Subject,Predicate,Object)
  129%     - quad(Subject,Predicate,Object,Graph).
  130
  131:- record nt_state(anon_prefix,
  132               graph,
  133               on_error:oneof([warning,error])=warning,
  134               format:oneof([ntriples,nquads]),
  135               error_count=0).  136
  137
  138%!  rdf_read_ntriples(+Input, -Triples, +Options) is det.
  139%!  rdf_read_nquads(+Input, -Quads, +Options) is det.
  140%
  141%   True when Triples/Quads is a list   of triples/quads from Input.
  142%   Options:
  143%
  144%     * anon_prefix(+AtomOrNode)
  145%     Prefix nodeIDs with this atom.  If AtomOrNode is the term
  146%     node(_), bnodes are returned as node(Id).
  147%     * base_uri(+Atom)
  148%     Defines the default anon_prefix as _:<baseuri>_
  149%     * on_error(Action)
  150%     One of =warning= (default) or =error=
  151%     * error_count(-Count)
  152%     If =on_error= is =warning=, unify Count with th number of
  153%     errors.
  154%     * graph(+Graph)
  155%     For rdf_read_nquads/3, this defines the graph associated
  156%     to _triples_ loaded from the input.  For rdf_read_ntriples/3
  157%     this opion is ignored.
  158%
  159%   @arg Triples is a list of rdf(Subject, Predicate, Object)
  160%   @arg Quads is a list of rdf(Subject, Predicate, Object, Graph)
  161
  162rdf_read_ntriples(Input, Triples, Options) :-
  163    rdf_read_ntuples(Input, Triples, [format(ntriples)|Options]).
  164
  165rdf_read_nquads(Input, Triples, Options) :-
  166    rdf_read_ntuples(Input, Triples, [format(nquads)|Options]).
  167
  168
  169rdf_read_ntuples(Input, Triples, Options) :-
  170    setup_call_cleanup(
  171        open_input(Input, Stream, Close),
  172        (   init_state(Input, Options, State0),
  173            read_ntuples(Stream, Triples, State0, State)
  174        ),
  175        Close),
  176    option(error_count(Count), Options, _),
  177    nt_state_error_count(State, Count).
  178
  179%!  rdf_process_ntriples(+Input, :CallBack, +Options)
  180%
  181%   Call-back interface, compatible with the   other triple readers.
  182%   In  addition  to  the  options  from  rdf_read_ntriples/3,  this
  183%   processes the option graph(Graph).
  184%
  185%   @param  CallBack is called as call(CallBack, Triples, Graph),
  186%           where Triples is a list holding a single rdf(S,P,O)
  187%           triple.  Graph is passed from the =graph= option and
  188%           unbound if this option is omitted.
  189
  190rdf_process_ntriples(Input, CallBack, Options) :-
  191    setup_call_cleanup(
  192        open_input(Input, Stream, Close),
  193        (   init_state(Input, Options, State0),
  194            process_ntriple(Stream, CallBack, State0, State)
  195        ),
  196        Close),
  197    option(error_count(Count), Options, _),
  198    nt_state_error_count(State, Count).
  199
  200
  201%!  read_ntuples(+Stream, -Triples, +State0, -State)
  202
  203read_ntuples(Stream, Triples, State0, State) :-
  204    read_ntuple(Stream, Triple0, State0, State1),
  205    (   Triple0 == end_of_file
  206    ->  Triples = [],
  207        State = State1
  208    ;   map_nodes(Triple0, Triple, State1, State2),
  209        Triples = [Triple|More],
  210        read_ntuples(Stream, More, State2, State)
  211    ).
  212
  213%!  process_ntriple(+Stream, :CallBack, +State0, -State)
  214
  215process_ntriple(Stream, CallBack, State0, State) :-
  216    read_ntuple(Stream, Triple0, State0, State1),
  217    (   Triple0 == end_of_file
  218    ->  State = State1
  219    ;   map_nodes(Triple0, Triple, State1, State2),
  220        nt_state_graph(State2, Graph),
  221        call(CallBack, [Triple], Graph),
  222        process_ntriple(Stream, CallBack, State2, State)
  223    ).
  224
  225%!  read_ntuple(+Stream, -Tuple, +State0, -State) is det.
  226%
  227%   True when Tuple is the next triple on Stream. May increment
  228%   the error count on State.
  229
  230read_ntuple(Stream, Triple, State0, State) :-
  231    nt_state_on_error(State0, error),
  232    !,
  233    read_ntuple(Stream, Triple, State0),
  234    State = State0.
  235read_ntuple(Stream, Triple, State0, State) :-
  236    catch(read_ntuple(Stream, Triple, State0), E, true),
  237    (   var(E)
  238    ->  State = State0
  239    ;   print_message(warning, E),
  240        nt_state_error_count(State0, EC0),
  241        EC is EC0+1,
  242        set_error_count_of_nt_state(EC, State0, State1),
  243        read_ntuple(Stream, Triple, State1, State)
  244    ).
  245
  246read_ntuple(Stream, Triple, State0) :-
  247    nt_state_format(State0, Format),
  248    format_read_ntuple(Format, Stream, Triple, State0).
  249
  250format_read_ntuple(ntriples, Stream, Triple, _) :-
  251    !,
  252    read_ntriple(Stream, Triple).
  253format_read_ntuple(nquads, Stream, Quad, State) :-
  254    !,
  255    read_ntuple(Stream, Tuple),
  256    to_quad(Tuple, Quad, State).
  257
  258to_quad(Quad, Quad, _) :-
  259    functor(Quad, quad, 4),
  260    !.
  261to_quad(triple(S,P,O), quad(S,P,O,Graph), State) :-
  262    nt_state_graph(State, Graph).
  263to_quad(end_of_file, end_of_file, _).
  264
  265
  266map_nodes(triple(S0,P0,O0), rdf(S,P,O), State0, State) :-
  267    map_node(S0, S, State0, State1),
  268    map_node(P0, P, State1, State2),
  269    map_node(O0, O, State2, State).
  270map_nodes(quad(S0,P0,O0,G0), rdf(S,P,O,G), State0, State) :-
  271    map_node(S0, S, State0, State1),
  272    map_node(P0, P, State1, State2),
  273    map_node(O0, O, State2, State3),
  274    map_node(G0, G, State3, State).
  275
  276map_node(node(NodeId), BNode, State, State) :-
  277    nt_state_anon_prefix(State, Prefix),
  278    atom(Prefix),
  279    !,
  280    atom_concat(Prefix, NodeId, BNode).
  281map_node(Node, Node, State, State).
  282
  283
  284%!  open_input(+Input, -Stream, -Close) is det.
  285%
  286%   Open input for reading ntriples. The  default encoding is UTF-8.
  287%   If the input has a different encoding,   Input  must be a stream
  288%   with the correct encoding and the stream type must be =text=.
  289
  290open_input(stream(Stream), Stream, Close) :-
  291    !,
  292    (   stream_property(Stream, type(binary))
  293    ->  set_stream(Stream, encoding(utf8)),
  294        Close = set_stream(Stream, type(binary))
  295    ;   stream_property(Stream, encoding(Old)),
  296        (   n3_encoding(Old)
  297        ->  true
  298        ;   domain_error(ntriples_encoding, Old)
  299        ),
  300        Close = true
  301    ).
  302open_input(Stream, Stream, Close) :-
  303    is_stream(Stream),
  304    !,
  305    open_input(stream(Stream), Stream, Close).
  306open_input(atom(Atom), Stream, close(Stream)) :-
  307    !,
  308    atom_to_memory_file(Atom, MF),
  309    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  310open_input(URL, Stream, close(Stream)) :-
  311    (   sub_atom(URL, 0, _, _, 'http://')
  312    ;   sub_atom(URL, 0, _, _, 'https://')
  313    ),
  314    !,
  315    http_open(URL, Stream, []),
  316    set_stream(Stream, encoding(utf8)).
  317open_input(URL, Stream, close(Stream)) :-
  318    uri_file_name(URL, Path),
  319    !,
  320    open(Path, read, Stream, [encoding(utf8)]).
  321open_input(File, Stream, close(Stream)) :-
  322    absolute_file_name(File, Path,
  323                       [ access(read),
  324                         extensions(['', nt, ntriples])
  325                       ]),
  326    open(Path, read, Stream, [encoding(utf8)]).
  327
  328n3_encoding(octet).
  329n3_encoding(ascii).
  330n3_encoding(iso_latin_1).
  331n3_encoding(utf8).
  332n3_encoding(text).
  333
  334%!  init_state(+Input, +Options, -State) is det.
  335
  336init_state(In, Options, State) :-
  337    (   option(base_uri(BaseURI), Options)
  338    ->  true
  339    ;   In = stream(_)
  340    ->  BaseURI = []
  341    ;   is_stream(In)
  342    ->  BaseURI = []
  343    ;   In = atom(_)
  344    ->  BaseURI = []
  345    ;   uri_is_global(In),
  346        \+ is_absolute_file_name(In)        % Avoid C:Path in Windows
  347    ->  uri_normalized(In, BaseURI)
  348    ;   uri_file_name(BaseURI, In)
  349    ),
  350    (   option(anon_prefix(Prefix), Options)
  351    ->  true
  352    ;   BaseURI == []
  353    ->  Prefix = '_:genid'
  354    ;   atom_concat('_:', BaseURI, Prefix)
  355    ),
  356    option(on_error(OnError), Options, warning),
  357    % If the format is not set explicitly we assume N-Triples.
  358    % The format option _must_ be set before make_nt_state/2.
  359    option(format(Format), Options, ntriples),
  360    rdf_db:graph(Options, Graph),
  361    (   var(Graph)
  362    ->  Graph = user
  363    ;   true
  364    ),
  365    make_nt_state([ anon_prefix(Prefix),
  366                    on_error(OnError),
  367                    format(Format),
  368                    graph(Graph)
  369                  ], State).
  370
  371
  372                 /*******************************
  373                 *          RDF-DB HOOK         *
  374                 *******************************/
  375
  376:- if(current_predicate(rdf_transaction/2)).  377:- multifile
  378    rdf_db:rdf_load_stream/3,
  379    rdf_db:rdf_file_type/2.  380
  381%!  rdf_db:rdf_load_stream(+Format, +Stream, :Options) is semidet.
  382%
  383%   Plugin rule that supports loading   the  =ntriples= and =nquads=
  384%   formats.
  385
  386rdf_db:rdf_load_stream(ntriples, Stream, _Module:Options) :-
  387    rdf_db:graph(Options, Graph),
  388    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  389                       rdf_set_graph(Graph, modified(false))
  390                    ),
  391                    parse(Graph)).
  392rdf_db:rdf_load_stream(nquads, Stream, _Module:Options) :-
  393    rdf_db:graph(Options, Graph),
  394    (   var(Graph)
  395    ->  Graph = user
  396    ;   true
  397    ),
  398    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  399                       rdf_set_graph(Graph, modified(false))
  400                    ),
  401                    parse(Graph)).
  402
  403assert_tuples([], _).
  404assert_tuples([H|T], Graph) :-
  405    assert_tuple(H, Graph),
  406    assert_tuples(T, Graph).
  407
  408assert_tuple(rdf(S,P,O), Graph) :-
  409    rdf_assert(S,P,O,Graph).
  410assert_tuple(rdf(S,P,O,Graph), _) :-
  411    rdf_assert(S,P,O,Graph).
  412
  413%!  rdf_db:rdf_file_type(+Extension, -Format)
  414%
  415%   Bind the ntriples reader to  files   with  the  extensions =nt=,
  416%   =ntriples= and =nquads=.
  417
  418rdf_db:rdf_file_type(nt,       ntriples).
  419rdf_db:rdf_file_type(ntriples, ntriples).
  420rdf_db:rdf_file_type(nq,       nquads).
  421rdf_db:rdf_file_type(nquads,   nquads).
  422:- endif.