1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2003-2023, University of Amsterdam 7 VU University Amsterdam 8 CWI, Amsterdam 9 SWI-Prolog Solutions b.v. 10 All rights reserved. 11 12 Redistribution and use in source and binary forms, with or without 13 modification, are permitted provided that the following conditions 14 are met: 15 16 1. Redistributions of source code must retain the above copyright 17 notice, this list of conditions and the following disclaimer. 18 19 2. Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in 21 the documentation and/or other materials provided with the 22 distribution. 23 24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 34 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 POSSIBILITY OF SUCH DAMAGE. 36*/ 37 38:- module(rdf_db, 39 [ rdf_version/1, % -Version 40 41 rdf/3, % ?Subject, ?Predicate, ?Object 42 rdf/4, % ?Subject, ?Predicate, ?Object, ?DB 43 rdf_has/3, % ?Subject, +Pred, ?Obj 44 rdf_has/4, % ?Subject, +Pred, ?Obj, -RealPred 45 rdf_reachable/3, % ?Subject, +Pred, ?Object 46 rdf_reachable/5, % ?Subject, +Pred, ?Object, +MaxD, ?D 47 rdf_resource/1, % ?Resource 48 rdf_subject/1, % ?Subject 49 50 rdf_member_property/2, % ?Property, ?Index 51 52 rdf_assert/3, % +Subject, +Predicate, +Object 53 rdf_assert/4, % +Subject, +Predicate, +Object, +DB 54 rdf_retractall/3, % ?Subject, ?Predicate, ?Object 55 rdf_retractall/4, % ?Subject, ?Predicate, ?Object, +DB 56 rdf_update/4, % +Subject, +Predicate, +Object, +Act 57 rdf_update/5, % +Subject, +Predicate, +Object, +Src, +Act 58 rdf_set_predicate/2, % +Predicate, +Property 59 rdf_predicate_property/2, % +Predicate, ?Property 60 rdf_current_predicate/1, % -Predicate 61 rdf_current_literal/1, % -Literal 62 rdf_transaction/1, % :Goal 63 rdf_transaction/2, % :Goal, +Id 64 rdf_transaction/3, % :Goal, +Id, +Options 65 rdf_active_transaction/1, % ?Id 66 67 rdf_monitor/2, % :Goal, +Options 68 69 rdf_save_db/1, % +File 70 rdf_save_db/2, % +File, +DB 71 rdf_load_db/1, % +File 72 rdf_reset_db/0, 73 74 rdf_node/1, % -Id 75 rdf_bnode/1, % -Id 76 rdf_is_bnode/1, % +Id 77 78 rdf_is_resource/1, % +Term 79 rdf_is_literal/1, % +Term 80 rdf_literal_value/2, % +Term, -Value 81 82 rdf_load/1, % +File 83 rdf_load/2, % +File, +Options 84 rdf_save/1, % +File 85 rdf_save/2, % +File, +Options 86 rdf_unload/1, % +File 87 rdf_unload_graph/1, % +Graph 88 89 rdf_md5/2, % +DB, -MD5 90 rdf_atom_md5/3, % +Text, +Times, -MD5 91 92 rdf_create_graph/1, % ?Graph 93 rdf_graph_property/2, % ?Graph, ?Property 94 rdf_set_graph/2, % +Graph, +Property 95 rdf_graph/1, % ?Graph 96 rdf_source/1, % ?File 97 rdf_source/2, % ?DB, ?SourceURL 98 rdf_make/0, % Reload modified databases 99 rdf_gc/0, % Garbage collection 100 101 rdf_source_location/2, % +Subject, -Source 102 rdf_statistics/1, % -Key 103 rdf_set/1, % +Term 104 rdf_generation/1, % -Generation 105 rdf_snapshot/1, % -Snapshot 106 rdf_delete_snapshot/1, % +Snapshot 107 rdf_current_snapshot/1, % +Snapshot 108 rdf_estimate_complexity/4, % +S,+P,+O,-Count 109 110 rdf_save_subject/3, % +Stream, +Subject, +DB 111 rdf_save_header/2, % +Out, +Options 112 rdf_save_footer/1, % +Out 113 114 rdf_equal/2, % ?Resource, ?Resource 115 lang_equal/2, % +Lang1, +Lang2 116 lang_matches/2, % +Lang, +Pattern 117 118 rdf_prefix/2, % :Alias, +URI 119 rdf_current_prefix/2, % :Alias, ?URI 120 rdf_register_prefix/2, % +Alias, +URI 121 rdf_register_prefix/3, % +Alias, +URI, +Options 122 rdf_unregister_prefix/1, % +Alias 123 rdf_current_ns/2, % :Alias, ?URI 124 rdf_register_ns/2, % +Alias, +URI 125 rdf_register_ns/3, % +Alias, +URI, +Options 126 rdf_global_id/2, % ?NS:Name, :Global 127 rdf_global_object/2, % +Object, :NSExpandedObject 128 rdf_global_term/2, % +Term, :WithExpandedNS 129 130 rdf_compare/3, % -Dif, +Object1, +Object2 131 rdf_match_label/3, % +How, +String, +Label 132 rdf_split_url/3, % ?Base, ?Local, ?URL 133 rdf_url_namespace/2, % +URL, ?Base 134 135 rdf_warm_indexes/0, 136 rdf_warm_indexes/1, % +Indexed 137 rdf_update_duplicates/0, 138 139 rdf_debug/1, % Set verbosity 140 141 rdf_new_literal_map/1, % -Handle 142 rdf_destroy_literal_map/1, % +Handle 143 rdf_reset_literal_map/1, % +Handle 144 rdf_insert_literal_map/3, % +Handle, +Key, +Literal 145 rdf_insert_literal_map/4, % +Handle, +Key, +Literal, -NewKeys 146 rdf_delete_literal_map/3, % +Handle, +Key, +Literal 147 rdf_delete_literal_map/2, % +Handle, +Key 148 rdf_find_literal_map/3, % +Handle, +KeyList, -Literals 149 rdf_keys_in_literal_map/3, % +Handle, +Spec, -Keys 150 rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...) 151 152 rdf_graph_prefixes/2, % ?Graph, -Prefixes 153 rdf_graph_prefixes/3, % ?Graph, -Prefixes, :Filter 154 155 (rdf_meta)/1, % +Heads 156 op(1150, fx, (rdf_meta)) 157 ]). 158:- use_module(library(semweb/rdf_prefixes), 159 [ (rdf_meta)/1, 160 register_file_prefixes/1, 161 rdf_global_id/2, 162 rdf_register_ns/2, 163 % re-exported predicates 164 rdf_global_object/2, 165 rdf_current_ns/2, 166 rdf_prefix/2, 167 rdf_global_term/2, 168 rdf_register_ns/3, 169 rdf_register_prefix/3, 170 rdf_register_prefix/2, 171 rdf_current_prefix/2, 172 rdf_unregister_prefix/1 173 ]). 174 175:- autoload(library(apply),[maplist/2,maplist/3]). 176:- use_module(library(debug),[debug/3,assertion/1]). 177:- autoload(library(error),[must_be/2,existence_error/2]). 178:- autoload(library(gensym),[gensym/2,reset_gensym/1]). 179:- autoload(library(lists), 180 [member/2,flatten/2,list_to_set/2,append/3,select/3]). 181:- autoload(library(memfile), 182 [atom_to_memory_file/2,open_memory_file/4]). 183:- autoload(library(option), 184 [option/2,option/3,merge_options/3,meta_options/3]). 185:- autoload(library(rdf),[process_rdf/3]). 186:- autoload(library(sgml), 187 [ load_structure/3, 188 xml_quote_attribute/3, 189 xml_name/1, 190 xml_quote_cdata/3, 191 xml_is_dom/1, 192 iri_xml_namespace/3, 193 iri_xml_namespace/2 194 ]). 195:- autoload(library(sgml_write),[xml_write/3]). 196:- autoload(library(uri), 197 [ uri_file_name/2, 198 uri_is_global/1, 199 uri_normalized/2, 200 uri_components/2, 201 uri_data/3, 202 uri_data/4 203 ]). 204:- autoload(library(xsdp_types),[xsdp_numeric_uri/2]). 205:- autoload(library(semweb/rdf_cache),[rdf_cache_file/3]). 206 207:- if(exists_source(library(thread))). 208:- autoload(library(thread), [concurrent/3]). 209:- endif. 210 211:- use_foreign_library(foreign(rdf_db)). 212:- public rdf_print_predicate_cloud/2. % print matrix of reachable predicates 213 214:- meta_predicate 215 rdf_transaction( ), 216 rdf_transaction( , ), 217 rdf_transaction( , , ), 218 rdf_monitor( , ), 219 rdf_save( , ), 220 rdf_load( , ). 221 222:- predicate_options(rdf_graph_prefixes/3, 3, 223 [ expand(callable+4), 224 filter(callable+3), 225 get_prefix(callable+2), 226 min_count(nonneg) 227 ]). 228:- predicate_options(rdf_load/2, 2, 229 [ base_uri(atom), 230 blank_nodes(oneof([share,noshare])), 231 cache(boolean), 232 concurrent(positive_integer), 233 db(atom), 234 format(oneof([xml,triples,turtle,trig,nquads,ntriples])), 235 graph(atom), 236 multifile(boolean), 237 if(oneof([true,changed,not_loaded])), 238 modified(-float), 239 prefixes(-list), 240 silent(boolean), 241 register_namespaces(boolean) 242 ]). 243:- predicate_options(rdf_save/2, 2, 244 [ graph(atom), 245 db(atom), 246 anon(boolean), 247 base_uri(atom), 248 write_xml_base(boolean), 249 convert_typed_literal(callable), 250 encoding(encoding), 251 document_language(atom), 252 namespaces(list(atom)), 253 xml_attributes(boolean), 254 inline(boolean) 255 ]). 256:- predicate_options(rdf_save_header/2, 2, 257 [ graph(atom), 258 db(atom), 259 namespaces(list(atom)) 260 ]). 261:- predicate_options(rdf_save_subject/3, 3, 262 [ graph(atom), 263 base_uri(atom), 264 convert_typed_literal(callable), 265 document_language(atom) 266 ]). 267:- predicate_options(rdf_transaction/3, 3, 268 [ snapshot(any) 269 ]). 270 271:- discontiguous 272 term_expansion/2. 273 274/** <module> Core RDF database 275 276The file library(semweb/rdf_db) provides the core of the SWI-Prolog RDF 277store. 278 279@deprecated New applications should use library(semweb/rdf11), which 280 provides a much more intuitive API to the RDF store, notably 281 for handling literals. The library(semweb/rdf11) runs 282 currently on top of this library and both can run side-by-side 283 in the same application. Terms retrieved from the database 284 however have a different shape and can not be exchanged without 285 precautions. 286*/ 287 288 /******************************* 289 * PREFIXES * 290 *******************************/ 291 292% the ns/2 predicate is historically defined in this module. We'll keep 293% that for compatibility reasons. 294 295:- multifile ns/2. 296:- dynamic ns/2. % ID, URL 297 298:- multifile 299 rdf_prefixes:rdf_empty_prefix_cache/2. 300 301rdf_prefixesrdf_empty_prefix_cache(_Prefix, _IRI) :- 302 rdf_empty_prefix_cache. 303 304:- rdf_meta 305 rdf(r,r,o), 306 rdf_has(r,r,o,r), 307 rdf_has(r,r,o), 308 rdf_assert(r,r,o), 309 rdf_retractall(r,r,o), 310 rdf(r,r,o,?), 311 rdf_assert(r,r,o,+), 312 rdf_retractall(r,r,o,?), 313 rdf_reachable(r,r,o), 314 rdf_reachable(r,r,o,+,?), 315 rdf_update(r,r,o,t), 316 rdf_update(r,r,o,+,t), 317 rdf_equal(o,o), 318 rdf_source_location(r,-), 319 rdf_resource(r), 320 rdf_subject(r), 321 rdf_create_graph(r), 322 rdf_graph(r), 323 rdf_graph_property(r,?), 324 rdf_set_graph(r,+), 325 rdf_unload_graph(r), 326 rdf_set_predicate(r, t), 327 rdf_predicate_property(r, -), 328 rdf_estimate_complexity(r,r,r,-), 329 rdf_print_predicate_cloud(r,+). 330 331%! rdf_equal(?Resource1, ?Resource2) 332% 333% Simple equality test to exploit goal-expansion. 334 335rdf_equal(Resource, Resource). 336 337%! lang_equal(+Lang1, +Lang2) is semidet. 338% 339% True if two RFC language specifiers denote the same language 340% 341% @see lang_matches/2. 342 343lang_equal(Lang, Lang) :- !. 344lang_equal(Lang1, Lang2) :- 345 downcase_atom(Lang1, LangCannon), 346 downcase_atom(Lang2, LangCannon). 347 348%! lang_matches(+Lang, +Pattern) is semidet. 349% 350% True if Lang matches Pattern. This implements XML language 351% matching conform RFC 4647. Both Lang and Pattern are 352% dash-separated strings of identifiers or (for Pattern) the 353% wildcard *. Identifiers are matched case-insensitive and a * 354% matches any number of identifiers. A short pattern is the same 355% as *. 356 357 358 /******************************* 359 * BASIC TRIPLE QUERIES * 360 *******************************/ 361 362%! rdf(?Subject, ?Predicate, ?Object) is nondet. 363% 364% Elementary query for triples. Subject and Predicate are atoms 365% representing the fully qualified URL of the resource. Object is 366% either an atom representing a resource or literal(Value) if the 367% object is a literal value. If a value of the form 368% NameSpaceID:LocalName is provided it is expanded to a ground 369% atom using expand_goal/2. This implies you can use this 370% construct in compiled code without paying a performance penalty. 371% Literal values take one of the following forms: 372% 373% * Atom 374% If the value is a simple atom it is the textual representation 375% of a string literal without explicit type or language 376% qualifier. 377% 378% * lang(LangID, Atom) 379% Atom represents the text of a string literal qualified with 380% the given language. 381% 382% * type(TypeID, Value) 383% Used for attributes qualified using the =|rdf:datatype|= 384% TypeID. The Value is either the textual representation or a 385% natural Prolog representation. See the option 386% convert_typed_literal(:Convertor) of the parser. The storage 387% layer provides efficient handling of atoms, integers (64-bit) 388% and floats (native C-doubles). All other data is represented 389% as a Prolog record. 390% 391% For literal querying purposes, Object can be of the form 392% literal(+Query, -Value), where Query is one of the terms below. 393% If the Query takes a literal argument and the value has a 394% numeric type numerical comparison is performed. 395% 396% * plain(+Text) 397% Perform exact match and demand the language or type qualifiers 398% to match. This query is fully indexed. 399% 400% * icase(+Text) 401% Perform a full but case-insensitive match. This query is 402% fully indexed. 403% 404% * exact(+Text) 405% Same as icase(Text). Backward compatibility. 406% 407% * substring(+Text) 408% Match any literal that contains Text as a case-insensitive 409% substring. The query is not indexed on Object. 410% 411% * word(+Text) 412% Match any literal that contains Text delimited by a non 413% alpha-numeric character, the start or end of the string. The 414% query is not indexed on Object. 415% 416% * prefix(+Text) 417% Match any literal that starts with Text. This call is intended 418% for completion. The query is indexed using the skip list of 419% literals. 420% 421% * ge(+Literal) 422% Match any literal that is equal or larger than Literal in the 423% ordered set of literals. 424% 425% * gt(+Literal) 426% Match any literal that is larger than Literal in the ordered set 427% of literals. 428% 429% * eq(+Literal) 430% Match any literal that is equal to Literal in the ordered set 431% of literals. 432% 433% * le(+Literal) 434% Match any literal that is equal or smaller than Literal in the 435% ordered set of literals. 436% 437% * lt(+Literal) 438% Match any literal that is smaller than Literal in the ordered set 439% of literals. 440% 441% * between(+Literal1, +Literal2) 442% Match any literal that is between Literal1 and Literal2 in the 443% ordered set of literals. This may include both Literal1 and 444% Literal2. 445% 446% * like(+Pattern) 447% Match any literal that matches Pattern case insensitively, 448% where the `*' character in Pattern matches zero or more 449% characters. 450% 451% Backtracking never returns duplicate triples. Duplicates can be 452% retrieved using rdf/4. The predicate rdf/3 raises a type-error 453% if called with improper arguments. If rdf/3 is called with a 454% term literal(_) as Subject or Predicate object it fails 455% silently. This allows for graph matching goals like 456% rdf(S,P,O),rdf(O,P2,O2) to proceed without errors. 457 458%! rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet. 459% 460% As rdf/3 but in addition query the graph to which the triple 461% belongs. Unlike rdf/3, this predicate does not remove duplicates 462% from the result set. 463% 464% @param Source is a term Graph:Line. If Source is instatiated, 465% passing an atom is the same as passing Atom:_. 466 467 468%! rdf_has(?Subject, +Predicate, ?Object) is nondet. 469% 470% Succeeds if the triple rdf(Subject, Predicate, Object) is true 471% exploiting the rdfs:subPropertyOf predicate as well as inverse 472% predicates declared using rdf_set_predicate/2 with the 473% =inverse_of= property. 474 475%! rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet. 476% 477% Same as rdf_has/3, but RealPredicate is unified to the actual 478% predicate that makes this relation true. RealPredicate must be 479% Predicate or an rdfs:subPropertyOf Predicate. If an inverse 480% match is found, RealPredicate is the term inverse_of(Pred). 481 482%! rdf_reachable(?Subject, +Predicate, ?Object) is nondet. 483% 484% Is true if Object can be reached from Subject following the 485% transitive predicate Predicate or a sub-property thereof, while 486% repecting the symetric(true) or inverse_of(P2) properties. 487% 488% If used with either Subject or Object unbound, it first returns 489% the origin, followed by the reachable nodes in breadth-first 490% search-order. The implementation internally looks one solution 491% ahead and succeeds deterministically on the last solution. This 492% predicate never generates the same node twice and is robust 493% against cycles in the transitive relation. 494% 495% With all arguments instantiated, it succeeds deterministically 496% if a path can be found from Subject to Object. Searching starts 497% at Subject, assuming the branching factor is normally lower. A 498% call with both Subject and Object unbound raises an 499% instantiation error. The following example generates all 500% subclasses of rdfs:Resource: 501% 502% == 503% ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource'). 504% X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ; 505% X = 'http://www.w3.org/2000/01/rdf-schema#Class' ; 506% X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ; 507% ... 508% == 509 510 511%! rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet. 512% 513% Same as rdf_reachable/3, but in addition, MaxD limits the number 514% of edges expanded and D is unified with the `distance' between 515% Subject and Object. Distance 0 means Subject and Object are the 516% same resource. MaxD can be the constant =infinite= to impose no 517% distance-limit. 518 519%! rdf_subject(?Resource) is nondet. 520% 521% True if Resource appears as a subject. This query respects the 522% visibility rules implied by the logical update view. 523% 524% @see rdf_resource/1. 525 526rdf_subject(Resource) :- 527 rdf_resource(Resource), 528 ( rdf(Resource, _, _) -> true ). 529 530%! rdf_resource(?Resource) is nondet. 531% 532% True when Resource is a resource used as a subject or object in 533% a triple. 534% 535% This predicate is primarily intended as a way to process all 536% resources without processing resources twice. The user must be 537% aware that some of the returned resources may not appear in any 538% _visible_ triple. 539 540 541 /******************************* 542 * TRIPLE MODIFICATIONS * 543 *******************************/ 544 545%! rdf_assert(+Subject, +Predicate, +Object) is det. 546% 547% Assert a new triple into the database. This is equivalent to 548% rdf_assert/4 using Graph =user=. Subject and Predicate are 549% resources. Object is either a resource or a term literal(Value). 550% See rdf/3 for an explanation of Value for typed and language 551% qualified literals. All arguments are subject to name-space 552% expansion. Complete duplicates (including the same graph and 553% `line' and with a compatible `lifespan') are not added to the 554% database. 555 556%! rdf_assert(+Subject, +Predicate, +Object, +Graph) is det. 557% 558% As rdf_assert/3, adding the predicate to the indicated named 559% graph. 560% 561% @param Graph is either the name of a graph (an atom) or a term 562% Graph:Line, where Line is an integer that denotes a line number. 563 564%! rdf_retractall(?Subject, ?Predicate, ?Object) is det. 565% 566% Remove all matching triples from the database. As 567% rdf_retractall/4 using an unbound graph. 568 569%! rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det. 570% 571% As rdf_retractall/3, also matching Graph. This is particulary 572% useful to remove all triples coming from a loaded file. See also 573% rdf_unload/1. 574 575%! rdf_update(+Subject, +Predicate, +Object, ++Action) is det. 576%! rdf_update(+Subject, +Predicate, +Object, +Graph, ++Action) is det 577% 578% Replaces one of the three (four) fields on the matching triples 579% depending on Action: 580% 581% * subject(Resource) 582% Changes the first field of the triple. 583% * predicate(Resource) 584% Changes the second field of the triple. 585% * object(Object) 586% Changes the last field of the triple to the given resource or 587% literal(Value). 588% * graph(Graph) 589% Moves the triple from its current named graph to Graph. 590% This only works with rdf_update/5 and throws an error when 591% used with rdf_update/4. 592 593 594 /******************************* 595 * COLLECTIONS * 596 *******************************/ 597 598%! rdf_member_property(?Prop, ?Index) 599% 600% Deal with the rdf:_1, ... properties. 601 602term_expansion(member_prefix(x), 603 member_prefix(Prefix)) :- 604 rdf_db:ns(rdf, NS), 605 atom_concat(NS, '_', Prefix). 606member_prefix(x). 607 608rdf_member_property(P, N) :- 609 integer(N), 610 !, 611 member_prefix(Prefix), 612 atom_concat(Prefix, N, P). 613rdf_member_property(P, N) :- 614 member_prefix(Prefix), 615 atom_concat(Prefix, Sub, P), 616 atom_number(Sub, N). 617 618 619 /******************************* 620 * ANONYMOUS SUBJECTS * 621 *******************************/ 622 623%! rdf_node(-Id) 624% 625% Generate a unique blank node identifier for a subject. 626% 627% @deprecated New code should use rdf_bnode/1. 628 629rdf_node(Resource) :- 630 rdf_bnode(Resource). 631 632%! rdf_bnode(-Id) 633% 634% Generate a unique anonymous identifier for a subject. 635 636rdf_bnode(Value) :- 637 repeat, 638 gensym('_:genid', Value), 639 \+ rdf(Value, _, _), 640 \+ rdf(_, _, Value), 641 \+ rdf(_, Value, _), 642 !. 643 644 645 646 /******************************* 647 * TYPES * 648 *******************************/ 649 650%! rdf_is_bnode(+Id) 651% 652% Tests if a resource is a blank node (i.e. is an anonymous 653% resource). A blank node is represented as an atom that starts 654% with =|_:|=. For backward compatibility reason, =|__|= is also 655% considered to be a blank node. 656% 657% @see rdf_bnode/1. 658 659%! rdf_is_resource(@Term) is semidet. 660% 661% True if Term is an RDF resource. Note that this is merely a 662% type-test; it does not mean this resource is involved in any 663% triple. Blank nodes are also considered resources. 664% 665% @see rdf_is_bnode/1 666 667rdf_is_resource(Term) :- 668 atom(Term). 669 670%! rdf_is_literal(@Term) is semidet. 671% 672% True if Term is an RDF literal object. Currently only checks for 673% groundness and the literal functor. 674 675rdf_is_literal(literal(Value)) :- 676 ground(Value). 677 678 /******************************* 679 * LITERALS * 680 *******************************/ 681 682%! rdf_current_literal(-Literal) is nondet. 683% 684% True when Literal is a currently known literal. Enumerates each 685% unique literal exactly once. Note that it is possible that the 686% literal only appears in already deleted triples. Deleted triples 687% may be locked due to active queries, transactions or snapshots 688% or may not yet be reclaimed by the garbage collector. 689 690 691%! rdf_literal_value(+Literal, -Value) is semidet. 692% 693% True when value is the appropriate Prolog representation of 694% Literal in the RDF _|value space|_. Current mapping: 695% 696% | Plain literals | Atom | 697% | Language tagged literal | Atom holding plain text | 698% | xsd:string | Atom | 699% | rdf:XMLLiteral | XML DOM Tree | 700% | Numeric XSD type | Number | 701% 702% @tbd Well, this is the long-term idea. 703% @tbd Add mode (-,+) 704 705:- rdf_meta 706 rdf_literal_value(o, -), 707 typed_value(r, +, -), 708 numeric_value(r, +, -). 709 710rdf_literal_value(literal(String), Value) :- 711 atom(String), 712 !, 713 Value = String. 714rdf_literal_value(literal(lang(_Lang, String)), String). 715rdf_literal_value(literal(type(Type, String)), Value) :- 716 typed_value(Type, String, Value). 717 718typed_value(Numeric, String, Value) :- 719 xsdp_numeric_uri(Numeric, NumType), 720 !, 721 numeric_value(NumType, String, Value). 722typed_value(xsd:string, String, String). 723typed_value(rdf:'XMLLiteral', Value, DOM) :- 724 ( atom(Value) 725 -> setup_call_cleanup( 726 ( atom_to_memory_file(Value, MF), 727 open_memory_file(MF, read, In, [free_on_close(true)]) 728 ), 729 load_structure(stream(In), DOM, [dialect(xml)]), 730 close(In)) 731 ; DOM = Value 732 ). 733 734numeric_value(xsd:integer, String, Value) :- 735 atom_number(String, Value), 736 integer(Value). 737numeric_value(xsd:float, String, Value) :- 738 atom_number(String, Number), 739 Value is float(Number). 740numeric_value(xsd:double, String, Value) :- 741 atom_number(String, Number), 742 Value is float(Number). 743numeric_value(xsd:decimal, String, Value) :- 744 atom_number(String, Value). 745 746 747 /******************************* 748 * SOURCE * 749 *******************************/ 750 751%! rdf_source_location(+Subject, -Location) is nondet. 752% 753% True when triples for Subject are loaded from Location. 754% 755% @param Location is a term File:Line. 756 757rdf_source_location(Subject, Source) :- 758 findall(Source, rdf(Subject, _, _, Source), Sources), 759 sort(Sources, Unique), 760 member(Source, Unique). 761 762 763 /******************************* 764 * GARBAGE COLLECT * 765 *******************************/ 766 767%! rdf_create_gc_thread 768% 769% Create the garbage collection thread. 770 771:- public 772 rdf_create_gc_thread/0. 773 774rdf_create_gc_thread :- 775 thread_create(rdf_gc_loop, _, 776 [ alias('__rdf_GC') 777 ]). 778 779%! rdf_gc_loop 780% 781% Take care of running the RDF garbage collection. This predicate 782% is called from a thread started by creating the RDF DB. 783 784rdf_gc_loop :- 785 catch(rdf_gc_loop(0), E, recover_gc(E)). 786 787recover_gc('$aborted') :- 788 !, 789 thread_self(Me), 790 thread_detach(Me). 791recover_gc(Error) :- 792 print_message(error, Error), 793 rdf_gc_loop. 794 795rdf_gc_loop(CPU) :- 796 repeat, 797 ( consider_gc(CPU) 798 -> rdf_gc(CPU1), 799 sleep(CPU1) 800 ; sleep(0.1) 801 ), 802 fail. 803 804%! rdf_gc(-CPU) is det. 805% 806% Run RDF GC one time. CPU is the amount of CPU time spent. We 807% update this in Prolog because portable access to thread specific 808% CPU is really hard in C. 809 810rdf_gc(CPU) :- 811 statistics(cputime, CPU0), 812 ( rdf_gc_ 813 -> statistics(cputime, CPU1), 814 CPU is CPU1-CPU0, 815 rdf_add_gc_time(CPU) 816 ; CPU = 0.0 817 ). 818 819%! rdf_gc is det. 820% 821% Run the RDF-DB garbage collector until no garbage is left and all 822% tables are fully optimized. Under normal operation a separate thread 823% with identifier =|__rdf_GC|= performs garbage collection as long as 824% it is considered `useful'. 825% 826% Using rdf_gc/0 should only be needed to ensure a fully clean 827% database for analysis purposes such as leak detection. 828 829rdf_gc :- 830 has_garbage, 831 !, 832 rdf_gc(_), 833 rdf_gc. 834rdf_gc. 835 836%! has_garbage is semidet. 837% 838% True if there is something to gain using GC. 839 840has_garbage :- 841 rdf_gc_info_(Info), 842 has_garbage(Info), 843 !. 844 845has_garbage(Info) :- arg(2, Info, Garbage), Garbage > 0. 846has_garbage(Info) :- arg(3, Info, Reindexed), Reindexed > 0. 847has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0. 848 849%! consider_gc(+CPU) is semidet. 850% 851% @param CPU is the amount of CPU time spent in the most recent 852% GC. 853 854consider_gc(_CPU) :- 855 ( rdf_gc_info_(gc_info(Triples, % Total #triples in DB 856 Garbage, % Garbage triples in DB 857 Reindexed, % Reindexed & not reclaimed 858 Optimizable, % Non-optimized tables 859 _KeepGen, % Oldest active generation 860 _LastGCGen, % Oldest active gen at last GC 861 _ReindexGen, 862 _LastGCReindexGen)) 863 -> ( (Garbage+Reindexed) * 5 > Triples 864 ; Optimizable > 4 865 ) 866 ; print_message(error, rdf(invalid_gc_info)), 867 sleep(10) 868 ), 869 !. 870 871 872 /******************************* 873 * STATISTICS * 874 *******************************/ 875 876%! rdf_statistics(?KeyValue) is nondet. 877% 878% Obtain statistics on the RDF database. Defined statistics are: 879% 880% * graphs(-Count) 881% Number of named graphs. 882% 883% * triples(-Count) 884% Total number of triples in the database. This is the number 885% of asserted triples minus the number of retracted ones. The 886% number of _visible_ triples in a particular context may be 887% different due to visibility rules defined by the logical 888% update view and transaction isolation. 889% 890% * resources(-Count) 891% Number of resources that appear as subject or object in a 892% triple. See rdf_resource/1. 893% 894% * properties(-Count) 895% Number of current predicates. See rdf_current_predicate/1. 896% 897% * literals(-Count) 898% Number of current literals. See rdf_current_literal/1. 899% 900% * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time) 901% Information about the garbage collector. 902% 903% * searched_nodes(-Count) 904% Number of nodes expanded by rdf_reachable/3 and 905% rdf_reachable/5. 906% 907% * lookup(rdf(S,P,O,G), Count) 908% Number of queries that have been performed for this particular 909% instantiation pattern. Each of S,P,O,G is either + or -. 910% Fails in case the number of performed queries is zero. 911% 912% * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize) 913% Statistics on the index for this pattern. Indices are created 914% lazily on the first relevant query. 915% 916% * triples_by_graph(Graph, Count) 917% This statistics is produced for each named graph. See 918% =triples= for the interpretation of this value. 919 920rdf_statistics(graphs(Count)) :- 921 rdf_statistics_(graphs(Count)). 922rdf_statistics(triples(Count)) :- 923 rdf_statistics_(triples(Count)). 924rdf_statistics(duplicates(Count)) :- 925 rdf_statistics_(duplicates(Count)). 926rdf_statistics(lingering(Count)) :- 927 rdf_statistics_(lingering(Count)). 928rdf_statistics(resources(Count)) :- 929 rdf_statistics_(resources(Count)). 930rdf_statistics(properties(Count)) :- 931 rdf_statistics_(predicates(Count)). 932rdf_statistics(literals(Count)) :- 933 rdf_statistics_(literals(Count)). 934rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :- 935 rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)). 936rdf_statistics(searched_nodes(Count)) :- 937 rdf_statistics_(searched_nodes(Count)). 938rdf_statistics(lookup(Index, Count)) :- 939 functor(Indexed, indexed, 16), 940 rdf_statistics_(Indexed), 941 index(Index, I), 942 Arg is I + 1, 943 arg(Arg, Indexed, Count), 944 Count \== 0. 945rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :- 946 rdf_statistics_(hash_quality(List)), 947 member(hash(Place,Size,Quality,Optimize), List), 948 index(Index, Place). 949rdf_statistics(triples_by_graph(Graph, Count)) :- 950 rdf_graph_(Graph, Count). 951 952index(rdf(-,-,-,-), 0). 953index(rdf(+,-,-,-), 1). 954index(rdf(-,+,-,-), 2). 955index(rdf(+,+,-,-), 3). 956index(rdf(-,-,+,-), 4). 957index(rdf(+,-,+,-), 5). 958index(rdf(-,+,+,-), 6). 959index(rdf(+,+,+,-), 7). 960 961index(rdf(-,-,-,+), 8). 962index(rdf(+,-,-,+), 9). 963index(rdf(-,+,-,+), 10). 964index(rdf(+,+,-,+), 11). 965index(rdf(-,-,+,+), 12). 966index(rdf(+,-,+,+), 13). 967index(rdf(-,+,+,+), 14). 968index(rdf(+,+,+,+), 15). 969 970 971 /******************************* 972 * PREDICATES * 973 *******************************/ 974 975%! rdf_current_predicate(?Predicate) is nondet. 976% 977% True when Predicate is a currently known predicate. Predicates 978% are created if a triples is created that uses this predicate or 979% a property of the predicate is set using rdf_set_predicate/2. 980% The predicate may (no longer) have triples associated with it. 981% 982% Note that resources that have =|rdf:type|= =|rdf:Property|= are 983% not automatically included in the result-set of this predicate, 984% while _all_ resources that appear as the second argument of a 985% triple _are_ included. 986% 987% @see rdf_predicate_property/2. 988 989rdf_current_predicate(P, DB) :- 990 rdf_current_predicate(P), 991 ( rdf(_,P,_,DB) 992 -> true 993 ). 994 995%! rdf_predicate_property(?Predicate, ?Property) 996% 997% Query properties of a defined predicate. Currently defined 998% properties are given below. 999% 1000% * symmetric(Bool) 1001% True if the predicate is defined to be symetric. I.e., {A} P 1002% {B} implies {B} P {A}. Setting symmetric is equivalent to 1003% inverse_of(Self). 1004% 1005% * inverse_of(Inverse) 1006% True if this predicate is the inverse of Inverse. This 1007% property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and 1008% rdf_reachable/5. 1009% 1010% * transitive(Bool) 1011% True if this predicate is transitive. This predicate is 1012% currently not used. It might be used to make rdf_has/3 imply 1013% rdf_reachable/3 for transitive predicates. 1014% 1015% * triples(Triples) 1016% Unify Triples with the number of existing triples using this 1017% predicate as second argument. Reporting the number of triples 1018% is intended to support query optimization. 1019% 1020% * rdf_subject_branch_factor(-Float) 1021% Unify Float with the average number of triples associated with 1022% each unique value for the subject-side of this relation. If 1023% there are no triples the value 0.0 is returned. This value is 1024% cached with the predicate and recomputed only after 1025% substantial changes to the triple set associated to this 1026% relation. This property is intended for path optimalisation 1027% when solving conjunctions of rdf/3 goals. 1028% 1029% * rdf_object_branch_factor(-Float) 1030% Unify Float with the average number of triples associated with 1031% each unique value for the object-side of this relation. In 1032% addition to the comments with the =rdf_subject_branch_factor= 1033% property, uniqueness of the object value is computed from the 1034% hash key rather than the actual values. 1035% 1036% * rdfs_subject_branch_factor(-Float) 1037% Same as =rdf_subject_branch_factor=, but also considering 1038% triples of `subPropertyOf' this relation. See also rdf_has/3. 1039% 1040% * rdfs_object_branch_factor(-Float) 1041% Same as =rdf_object_branch_factor=, but also considering 1042% triples of `subPropertyOf' this relation. See also rdf_has/3. 1043% 1044% @see rdf_set_predicate/2. 1045 1046rdf_predicate_property(P, Prop) :- 1047 var(P), 1048 !, 1049 rdf_current_predicate(P), 1050 rdf_predicate_property_(P, Prop). 1051rdf_predicate_property(P, Prop) :- 1052 rdf_predicate_property_(P, Prop). 1053 1054%! rdf_set_predicate(+Predicate, +Property) is det. 1055% 1056% Define a property of the predicate. This predicate currently 1057% supports the following properties: 1058% 1059% - symmetric(+Boolean) 1060% Set/unset the predicate as being symmetric. Using 1061% symmetric(true) is the same as inverse_of(Predicate), 1062% i.e., creating a predicate that is the inverse of 1063% itself. 1064% - transitive(+Boolean) 1065% Sets the transitive property. 1066% - inverse_of(+Predicate2) 1067% Define Predicate as the inverse of Predicate2. An inverse 1068% relation is deleted using inverse_of([]). 1069% 1070% The `transitive` property is currently not used. The `symmetric` 1071% and `inverse_of` properties are considered by rdf_has/3,4 and 1072% rdf_reachable/3. 1073% 1074% @tbd Maintain these properties based on OWL triples. 1075 1076 1077 /******************************* 1078 * SNAPSHOTS * 1079 *******************************/ 1080 1081%! rdf_snapshot(-Snapshot) is det. 1082% 1083% Take a snapshot of the current state of the RDF store. Later, 1084% goals may be executed in the context of the database at this 1085% moment using rdf_transaction/3 with the =snapshot= option. A 1086% snapshot created outside a transaction exists until it is 1087% deleted. Snapshots taken inside a transaction can only be used 1088% inside this transaction. 1089 1090%! rdf_delete_snapshot(+Snapshot) is det. 1091% 1092% Delete a snapshot as obtained from rdf_snapshot/1. After this 1093% call, resources used for maintaining the snapshot become subject 1094% to garbage collection. 1095 1096%! rdf_current_snapshot(?Term) is nondet. 1097% 1098% True when Term is a currently known snapshot. 1099% 1100% @bug Enumeration of snapshots is slow. 1101 1102rdf_current_snapshot(Term) :- 1103 current_blob(Term, rdf_snapshot). 1104 1105 1106 /******************************* 1107 * TRANSACTION * 1108 *******************************/ 1109 1110%! rdf_transaction(:Goal) is semidet. 1111% 1112% Same as rdf_transaction(Goal, user, []). See rdf_transaction/3. 1113 1114%! rdf_transaction(:Goal, +Id) is semidet. 1115% 1116% Same as rdf_transaction(Goal, Id, []). See rdf_transaction/3. 1117 1118%! rdf_transaction(:Goal, +Id, +Options) is semidet. 1119% 1120% Run Goal in an RDF transaction. Compared to the ACID model, 1121% RDF transactions have the following properties: 1122% 1123% 1. Modifications inside the transactions become all atomically 1124% visible to the outside world if Goal succeeds or remain 1125% invisible if Goal fails or throws an exception. I.e., 1126% the _atomicy_ property is fully supported. 1127% 2. _Consistency_ is not guaranteed. Later versions may 1128% implement consistency constraints that will be checked 1129% serialized just before the actual commit of a transaction. 1130% 3. Concurrently executing transactions do not infuence each 1131% other. I.e., the _isolation_ property is fully supported. 1132% 4. _Durability_ can be activated by loading 1133% library(semweb/rdf_persistency). 1134% 1135% Processed options are: 1136% 1137% * snapshot(+Snapshot) 1138% Execute Goal using the state of the RDF store as stored in 1139% Snapshot. See rdf_snapshot/1. Snapshot can also be the 1140% atom =true=, which implies that an anonymous snapshot is 1141% created at the current state of the store. Modifications 1142% due to executing Goal are only visible to Goal. 1143 1144rdf_transaction(Goal) :- 1145 rdf_transaction(Goal, user, []). 1146rdf_transaction(Goal, Id) :- 1147 rdf_transaction(Goal, Id, []). 1148 1149%! rdf_active_transaction(?Id) is nondet. 1150% 1151% True if Id is the identifier of a transaction in the context of 1152% which this call is executed. If Id is not instantiated, 1153% backtracking yields transaction identifiers starting with the 1154% innermost nested transaction. Transaction identifier terms are 1155% not copied, need not be ground and can be instantiated during 1156% the transaction. 1157 1158rdf_active_transaction(Id) :- 1159 rdf_active_transactions_(List), 1160 member(Id, List). 1161 1162%! rdf_monitor(:Goal, +Options) 1163% 1164% Call Goal if specified actions occur on the database. 1165 1166rdf_monitor(Goal, Options) :- 1167 monitor_mask(Options, 0xffff, Mask), 1168 rdf_monitor_(Goal, Mask). 1169 1170monitor_mask([], Mask, Mask). 1171monitor_mask([H|T], Mask0, Mask) :- 1172 update_mask(H, Mask0, Mask1), 1173 monitor_mask(T, Mask1, Mask). 1174 1175update_mask(-X, Mask0, Mask) :- 1176 !, 1177 monitor_mask(X, M), 1178 Mask is Mask0 /\ \M. 1179update_mask(+X, Mask0, Mask) :- 1180 !, 1181 monitor_mask(X, M), 1182 Mask is Mask0 \/ M. 1183update_mask(X, Mask0, Mask) :- 1184 monitor_mask(X, M), 1185 Mask is Mask0 \/ M. 1186 1187%! monitor_mask(Name, Mask) 1188% 1189% Mask bit for the monitor events. Note that this must be kept 1190% consistent with the enum broadcast_id defined in rdf_db.c 1191 1192 % C-defined broadcasts 1193monitor_mask(assert, 0x0001). 1194monitor_mask(assert(load), 0x0002). 1195monitor_mask(retract, 0x0004). 1196monitor_mask(update, 0x0008). 1197monitor_mask(new_literal, 0x0010). 1198monitor_mask(old_literal, 0x0020). 1199monitor_mask(transaction, 0x0040). 1200monitor_mask(load, 0x0080). 1201monitor_mask(create_graph, 0x0100). 1202monitor_mask(reset, 0x0200). 1203 % prolog defined broadcasts 1204monitor_mask(parse, 0x1000). 1205monitor_mask(unload, 0x1000). % FIXME: Duplicate 1206 % mask for all 1207monitor_mask(all, 0xffff). 1208 1209%rdf_broadcast(Term, MaskName) :- 1210%% monitor_mask(MaskName, Mask), 1211%% rdf_broadcast_(Term, Mask). 1212 1213 1214 /******************************* 1215 * WARM * 1216 *******************************/ 1217 1218%! rdf_warm_indexes 1219% 1220% Warm all indexes. See rdf_warm_indexes/1. 1221 1222rdf_warm_indexes :- 1223 findall(Index, rdf_index(Index), Indexes), 1224 rdf_warm_indexes(Indexes). 1225 1226rdf_index(s). 1227rdf_index(p). 1228rdf_index(o). 1229rdf_index(sp). 1230rdf_index(o). 1231rdf_index(po). 1232rdf_index(spo). 1233rdf_index(g). 1234rdf_index(sg). 1235rdf_index(pg). 1236 1237%! rdf_warm_indexes(+Indexes) is det. 1238% 1239% Create the named indexes. Normally, the RDF database creates 1240% indexes on lazily the first time they are needed. This predicate 1241% serves two purposes: it provides an explicit way to make sure 1242% that the required indexes are present and creating multiple 1243% indexes at the same time is more efficient. 1244 1245 1246 /******************************* 1247 * DUPLICATES * 1248 *******************************/ 1249 1250%! rdf_update_duplicates is det. 1251% 1252% Update the duplicate administration of the RDF store. This marks 1253% every triple that is potentionally a duplicate of another as 1254% duplicate. Being potentially a duplicate means that subject, 1255% predicate and object are equivalent and the life-times of the 1256% two triples overlap. 1257% 1258% The duplicates marks are used to reduce the administrative load 1259% of avoiding duplicate answers. Normally, the duplicates are 1260% marked using a background thread that is started on the first 1261% query that produces a substantial amount of duplicates. 1262 1263:- public 1264 rdf_update_duplicates_thread/0. 1265 1266%! rdf_update_duplicates_thread 1267% 1268% Start a thread to initialize the duplicate administration. 1269 1270rdf_update_duplicates_thread :- 1271 thread_create(rdf_update_duplicates, _, 1272 [ detached(true), 1273 alias('__rdf_duplicate_detecter') 1274 ]). 1275 1276%! rdf_update_duplicates is det. 1277% 1278% Update the duplicate administration. If this adminstration is 1279% up-to-date, each triples that _may_ have a duplicate is flagged. 1280% The predicate rdf/3 uses this administration to speedup checking 1281% for duplicate answers. 1282% 1283% This predicate is normally executed from a background thread 1284% named =__rdf_duplicate_detecter= which is created when a query 1285% discovers that checking for duplicates becomes too expensive. 1286 1287 1288 /******************************* 1289 * QUICK BINARY LOAD/SAVE * 1290 *******************************/ 1291 1292%! rdf_save_db(+File) is det. 1293%! rdf_save_db(+File, +Graph) is det. 1294% 1295% Save triples into File in a quick-to-load binary format. If Graph 1296% is supplied only triples flagged to originate from that database 1297% are added. Files created this way can be loaded using 1298% rdf_load_db/1. 1299 1300:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]). 1301 1302rdf_save_db(File) :- 1303 current_prolog_flag(rdf_triple_format, Version), 1304 setup_call_cleanup( 1305 open(File, write, Out, [type(binary)]), 1306 ( set_stream(Out, record_position(false)), 1307 rdf_save_db_(Out, _, Version) 1308 ), 1309 close(Out)). 1310 1311 1312rdf_save_db(File, Graph) :- 1313 current_prolog_flag(rdf_triple_format, Version), 1314 setup_call_cleanup( 1315 open(File, write, Out, [type(binary)]), 1316 ( set_stream(Out, record_position(false)), 1317 rdf_save_db_(Out, Graph, Version) 1318 ), 1319 close(Out)). 1320 1321 1322%! rdf_load_db_no_admin(+File, +Id, -Graphs) is det. 1323% 1324% Load triples from a .trp file without updating the source 1325% administration. Id is handled to monitor action. Graphs is 1326% a list of graph-names encountered in File. 1327 1328rdf_load_db_no_admin(File, Id, Graphs) :- 1329 open(File, read, In, [type(binary)]), 1330 set_stream(In, record_position(false)), 1331 call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)). 1332 1333 1334%! check_loaded_cache(+Graph, +Graphs, +Modified) is det. 1335% 1336% Verify the loaded cache file and optionally fix the modification 1337% time (new versions save this along with the snapshot). 1338% 1339% @tbd What to do if there is a cache mismatch? Delete the loaded 1340% graphs and fail? 1341 1342check_loaded_cache(DB, [DB], _Modified) :- !. 1343check_loaded_cache(DB, Graphs, _) :- 1344 print_message(warning, rdf(inconsistent_cache(DB, Graphs))). 1345 1346 1347%! rdf_load_db(+File) is det. 1348% 1349% Load triples from a file created using rdf_save_db/2. 1350 1351rdf_load_db(File) :- 1352 uri_file_name(URL, File), 1353 rdf_load_db_no_admin(File, URL, _Graphs). 1354 1355 1356 /******************************* 1357 * LOADING RDF * 1358 *******************************/ 1359 1360:- multifile 1361 rdf_open_hook/8, 1362 rdf_open_decode/4, % +Encoding, +File, -Stream, -Cleanup 1363 rdf_load_stream/3, % +Format, +Stream, +Options 1364 rdf_file_type/2, % ?Extension, ?Format 1365 rdf_storage_encoding/2, % ?Extension, ?Encoding 1366 url_protocol/1. % ?Protocol 1367 1368%! rdf_load(+FileOrList) is det. 1369% 1370% Same as rdf_load(FileOrList, []). See rdf_load/2. 1371 1372%! rdf_load(+FileOrList, :Options) is det. 1373% 1374% Load RDF data. Options provides additional processing options. 1375% Defined options are: 1376% 1377% * blank_nodes(+ShareMode) 1378% How to handle equivalent blank nodes. If =share= (default), 1379% equivalent blank nodes are shared in the same resource. 1380% 1381% * base_uri(+URI) 1382% URI that is used for rdf:about="" and other RDF constructs 1383% that are relative to the base uri. Default is the source 1384% URL. 1385% 1386% * concurrent(+Jobs) 1387% If FileOrList is a list of files, process the input files 1388% using Jobs threads concurrently. Default is the mininum 1389% of the number of cores and the number of inputs. Higher 1390% values can be useful when loading inputs from (slow) 1391% network connections. Using 1 (one) does not use 1392% separate worker threads. 1393% 1394% * format(+Format) 1395% Specify the source format explicitly. Normally this is 1396% deduced from the filename extension or the mime-type. The 1397% core library understands the formats xml (RDF/XML) and 1398% triples (internal quick load and cache format). Plugins, 1399% such as library(semweb/turtle) extend the set of recognised 1400% extensions. 1401% 1402% * graph(?Graph) 1403% Named graph in which to load the data. It is *not* allowed 1404% to load two sources into the same named graph. If Graph is 1405% unbound, it is unified to the graph into which the data is 1406% loaded. The default graph is a =|file://|= URL when loading 1407% a file or, if the specification is a URL, its normalized 1408% version without the optional _|#fragment|_. 1409% 1410% * if(Condition) 1411% When to load the file. One of =true=, =changed= (default) or 1412% =not_loaded=. 1413% 1414% * modified(-Modified) 1415% Unify Modified with one of =not_modified=, cached(File), 1416% last_modified(Stamp) or =unknown=. 1417% 1418% * cache(Bool) 1419% If =false=, do not use or create a cache file. 1420% 1421% * register_namespaces(Bool) 1422% If =true= (default =false=), register =xmlns= namespace 1423% declarations or Turtle =|@prefix|= prefixes using 1424% rdf_register_prefix/3 if there is no conflict. 1425% 1426% * silent(+Bool) 1427% If =true=, the message reporting completion is printed using 1428% level =silent=. Otherwise the level is =informational=. See 1429% also print_message/2. 1430% 1431% * prefixes(-Prefixes) 1432% Returns the prefixes defined in the source data file as a list 1433% of pairs. 1434% 1435% * multifile(+Boolean) 1436% Indicate that the addressed graph may be populated with 1437% triples from multiple sources. This disables caching and 1438% avoids that an rdf_load/2 call affecting the specified 1439% graph cleans the graph. 1440% 1441% Other options are forwarded to process_rdf/3. By default, 1442% rdf_load/2 only loads RDF/XML from files. It can be extended to 1443% load data from other formats and locations using plugins. The 1444% full set of plugins relevant to support different formats and 1445% locations is below: 1446% 1447% == 1448% :- use_module(library(semweb/turtle)). % Turtle and TriG 1449% :- use_module(library(semweb/rdf_ntriples)). 1450% :- use_module(library(semweb/rdf_zlib_plugin)). 1451% :- use_module(library(semweb/rdf_http_plugin)). 1452% :- use_module(library(http/http_ssl_plugin)). 1453% == 1454% 1455% @see rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and 1456% library(semweb/rdf_cache) 1457 1458:- dynamic 1459 rdf_loading/3. % Graph, Queue, Thread 1460 1461rdf_load(Spec) :- 1462 rdf_load(Spec, []). 1463 1464:- if(\+current_predicate(concurrent/3)). 1465concurrent(_, Goals, _) :- 1466 forall(member(G, Goals), call(G)). 1467:- endif. 1468 1469% Note that we kill atom garbage collection. This improves performance 1470% with about 15% loading the LUBM Univ_50 benchmark. 1471 1472rdf_load(Spec, M:Options) :- 1473 must_be(list, Options), 1474 current_prolog_flag(agc_margin, Old), 1475 setup_call_cleanup( 1476 set_prolog_flag(agc_margin, 0), 1477 rdf_load_noagc(Spec, M, Options), 1478 set_prolog_flag(agc_margin, Old)). 1479 1480rdf_load_noagc(List, M, Options) :- 1481 is_list(List), 1482 !, 1483 flatten(List, Inputs), % Compatibility: allow nested lists 1484 maplist(must_be(ground), Inputs), 1485 length(Inputs, Count), 1486 load_jobs(Count, Jobs, Options), 1487 ( Jobs =:= 1 1488 -> forall(member(Spec, Inputs), 1489 rdf_load_one(Spec, M, Options)) 1490 ; maplist(load_goal(Options, M), Inputs, Goals), 1491 concurrent(Jobs, Goals, []) 1492 ). 1493rdf_load_noagc(One, M, Options) :- 1494 must_be(ground, One), 1495 rdf_load_one(One, M, Options). 1496 1497load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)). 1498 1499load_jobs(_, Jobs, Options) :- 1500 option(concurrent(Jobs), Options), 1501 !, 1502 must_be(positive_integer, Jobs). 1503load_jobs(Count, Jobs, _) :- 1504 current_prolog_flag(cpu_count, CPUs), 1505 CPUs > 0, 1506 !, 1507 Jobs is max(1, min(CPUs, Count)). 1508load_jobs(_, 1, _). 1509 1510 1511rdf_load_one(Spec, M, Options) :- 1512 source_url(Spec, Protocol, SourceURL), 1513 load_graph(SourceURL, Graph, Options), 1514 setup_call_cleanup( 1515 with_mutex(rdf_load_file, 1516 rdf_start_load(SourceURL, Loading)), 1517 rdf_load_file(Loading, Spec, SourceURL, Protocol, 1518 Graph, M, Options), 1519 rdf_end_load(Loading)). 1520 1521%! rdf_start_load(+SourceURL, -WhatToDo) is det. 1522%! rdf_end_load(+WhatToDo) is det. 1523%! rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph, 1524%! +Module, +Options) is det. 1525% 1526% Of these three predicates, rdf_load_file/7 does the real work. 1527% The others deal with the possibility that the graph is being 1528% loaded by another thread. In that case, we wait for the other 1529% thread to complete the work. 1530% 1531% @tbd What if both threads disagree on what is loaded into the 1532% graph? 1533% @see Code is modelled closely after how concurrent loading 1534% is handled in SWI-Prolog's boot/init.pl 1535 1536rdf_start_load(SourceURL, queue(Queue)) :- 1537 rdf_loading(SourceURL, Queue, LoadThread), 1538 \+ thread_self(LoadThread), 1539 !, 1540 debug(rdf(load), '~p is being loaded by thread ~w; waiting ...', 1541 [ SourceURL, LoadThread]). 1542rdf_start_load(SourceURL, Ref) :- 1543 thread_self(Me), 1544 message_queue_create(Queue), 1545 assertz(rdf_loading(SourceURL, Queue, Me), Ref). 1546 1547rdf_end_load(queue(_)) :- !. 1548rdf_end_load(Ref) :- 1549 clause(rdf_loading(_, Queue, _), _, Ref), 1550 erase(Ref), 1551 thread_send_message(Queue, done), 1552 message_queue_destroy(Queue). 1553 1554rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :- 1555 !, 1556 catch(thread_get_message(Queue, _), _, true). 1557rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :- 1558 debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]), 1559 statistics(cputime, T0), 1560 rdf_open_input(SourceURL, Protocol, Graph, 1561 In, Cleanup, Modified, Format, Options), 1562 supported_format(Format, Cleanup), 1563 return_modified(Modified, Options), 1564 ( Modified == not_modified 1565 -> Action = none 1566 ; Modified = cached(CacheFile) 1567 -> do_unload(Graph), 1568 catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail), 1569 check_loaded_cache(Graph, Graphs, Modified), 1570 Action = load 1571 ; option(base_uri(BaseURI), Options, Graph), 1572 ( var(BaseURI) 1573 -> BaseURI = SourceURL 1574 ; true 1575 ), 1576 once(phrase(derived_options(Options, NSList), Extra)), 1577 merge_options([ base_uri(BaseURI), 1578 graph(Graph), 1579 format(Format) 1580 | Extra 1581 ], Options, RDFOptions), 1582 ( option(multifile(true), Options) 1583 -> true 1584 ; do_unload(Graph) 1585 ), 1586 graph_modified(Modified, ModifiedStamp), 1587 rdf_set_graph_source(Graph, SourceURL, ModifiedStamp), 1588 call_cleanup(rdf_load_stream(Format, In, M:RDFOptions), 1589 Cleanup), 1590 save_cache(Graph, SourceURL, Options), 1591 register_file_prefixes(NSList), 1592 format_action(Format, Action) 1593 ), 1594 rdf_statistics_(triples(Graph, Triples)), 1595 report_loaded(Action, SourceURL, Graph, Triples, T0, Options). 1596 1597supported_format(Format, _Cleanup) :- 1598 rdf_file_type(_, Format), 1599 !. 1600supported_format(Format, Cleanup) :- 1601 call(Cleanup), 1602 existence_error(rdf_format_plugin, Format). 1603 1604format_action(triples, load) :- !. 1605format_action(_, parsed). 1606 1607save_cache(Graph, SourceURL, Options) :- 1608 option(cache(true), Options, true), 1609 rdf_cache_file(SourceURL, write, CacheFile), 1610 !, 1611 catch(save_cache(Graph, CacheFile), E, 1612 print_message(warning, E)). 1613save_cache(_, _, _). 1614 1615derived_options([], _) --> 1616 []. 1617derived_options([H|T], NSList) --> 1618 ( { H == register_namespaces(true) 1619 ; H == (register_namespaces = true) 1620 } 1621 -> [ namespaces(NSList) ] 1622 ; [] 1623 ), 1624 derived_options(T, NSList). 1625 1626graph_modified(last_modified(Stamp), Stamp). 1627graph_modified(unknown, Stamp) :- 1628 get_time(Stamp). 1629 1630return_modified(Modified, Options) :- 1631 option(modified(M0), Options), 1632 !, 1633 M0 = Modified. 1634return_modified(_, _). 1635 1636 1637 /******************************* 1638 * INPUT HANDLING * 1639 *******************************/ 1640 1641/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1642This section deals with pluggable input sources. The task of the input 1643layer is 1644 1645 * Decide on the graph-name 1646 * Decide on the source-location 1647 * Decide whether loading is needed (if-modified) 1648 * Decide on the serialization in the input 1649 1650The protocol must ensure minimal overhead, in particular for network 1651protocols. E.g. for HTTP we want to make a single call on the server and 1652use If-modified-since to verify that we need not reloading this file. 1653- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 1654 1655%! rdf_open_input(+SourceURL, +Protocol, +Graph, 1656%! -Stream, -Cleanup, -Modified, -Format, +Options) 1657% 1658% Open an input source. 1659% 1660% Options processed: 1661% 1662% * graph(Graph) 1663% * db(Graph) 1664% * if(Condition) 1665% * cache(Cache) 1666% * format(Format) 1667% 1668% @param Modified is one of =not_modified=, last_modified(Time), 1669% cached(CacheFile) or =unknown= 1670 1671rdf_open_input(SourceURL, Protocol, Graph, 1672 Stream, Cleanup, Modified, Format, Options) :- 1673 ( option(multifile(true), Options) 1674 -> true 1675 ; option(if(If), Options, changed), 1676 ( If == true 1677 -> true 1678 ; rdf_graph_source_(Graph, SourceURL, HaveModified) 1679 -> true 1680 ; option(cache(true), Options, true), 1681 rdf_cache_file(SourceURL, read, CacheFile) 1682 -> time_file(CacheFile, HaveModified) 1683 ; true 1684 ) 1685 ), 1686 option(format(Format), Options, _), 1687 open_input_if_modified(Protocol, SourceURL, HaveModified, 1688 Stream, Cleanup, Modified0, Format, Options), 1689 ( Modified0 == not_modified 1690 -> ( nonvar(CacheFile) 1691 -> Modified = cached(CacheFile) 1692 ; Modified = not_modified 1693 ) 1694 ; Modified = Modified0 1695 ). 1696 1697 1698%! source_url(+Spec, -Class, -SourceURL) is det. 1699% 1700% Determine class and url of the source. Class is one of 1701% 1702% * stream(Stream) 1703% * file 1704% * a url-protocol (e.g., =http=) 1705 1706source_url(stream(In), stream(In), SourceURL) :- 1707 !, 1708 ( stream_property(In, file_name(File)) 1709 -> to_url(File, SourceURL) 1710 ; gensym('stream://', SourceURL) 1711 ). 1712source_url(Stream, Class, SourceURL) :- 1713 is_stream(Stream), 1714 !, 1715 source_url(stream(Stream), Class, SourceURL). 1716source_url(Spec, Protocol, SourceURL) :- 1717 compound(Spec), 1718 !, 1719 source_file(Spec, Protocol, SourceURL). 1720source_url(FileURL, Protocol, SourceURL) :- % or return FileURL? 1721 uri_file_name(FileURL, File), 1722 !, 1723 source_file(File, Protocol, SourceURL). 1724source_url(SourceURL0, Protocol, SourceURL) :- 1725 is_url(SourceURL0, Protocol, SourceURL), 1726 !. 1727source_url(File, Protocol, SourceURL) :- 1728 source_file(File, Protocol, SourceURL). 1729 1730source_file(Spec, file(SExt), SourceURL) :- 1731 findall(Ext, valid_extension(Ext), Exts), 1732 absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]), 1733 storage_extension(_Plain, SExt, File), 1734 uri_file_name(SourceURL, File). 1735 1736to_url(URL, URL) :- 1737 uri_is_global(URL), 1738 !. 1739to_url(File, URL) :- 1740 absolute_file_name(File, Path), 1741 uri_file_name(URL, Path). 1742 1743storage_extension(Plain, SExt, File) :- 1744 file_name_extension(Plain, SExt, File), 1745 SExt \== '', 1746 rdf_storage_encoding(SExt, _), 1747 !. 1748storage_extension(File, '', File). 1749 1750%! load_graph(+SourceURL, -Graph, +Options) is det. 1751% 1752% Graph is the graph into which we load the data. Tries these 1753% options: 1754% 1755% 1. The graph(Graph) option 1756% 2. The db(Graph) option (backward compatibility) 1757% 3. The base_uri(BaseURI) option 1758% 4. The source URL 1759 1760load_graph(_Source, Graph, Options) :- 1761 option(multifile(true), Options), 1762 !, 1763 ( ( option(graph(Graph), Options) 1764 -> true 1765 ; option(db(Graph), Options) 1766 ), 1767 ground(Graph) 1768 -> true 1769 ; throw(error(existence_error(option, graph), 1770 context(_, "rdf_load/2: using multifile requires graph"))) 1771 ). 1772load_graph(Source, Graph, Options) :- 1773 ( option(graph(Graph), Options) 1774 ; option(db(Graph), Options) 1775 ), 1776 !, 1777 load_graph2(Source, Graph, Options). 1778load_graph(Source, Graph, Options) :- 1779 load_graph2(Source, Graph, Options). 1780 1781load_graph2(_, Graph, _) :- 1782 ground(Graph), 1783 !. 1784load_graph2(_Source, Graph, Options) :- 1785 option(base_uri(Graph), Options), 1786 Graph \== [], 1787 ground(Graph), 1788 !. 1789load_graph2(Source, Graph, _) :- 1790 load_graph(Source, Graph). 1791 1792load_graph(SourceURL, BaseURI) :- 1793 file_name_extension(BaseURI, Ext, SourceURL), 1794 rdf_storage_encoding(Ext, _), 1795 !. 1796load_graph(SourceURL, SourceURL). 1797 1798 1799open_input_if_modified(stream(In), SourceURL, _, In, true, 1800 unknown, Format, _) :- 1801 !, 1802 ( var(Format) 1803 -> guess_format(SourceURL, Format) 1804 ; true 1805 ). 1806open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup, 1807 Modified, Format, _) :- 1808 !, 1809 uri_file_name(SourceURL, File), 1810 ( SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)), 1811 time_file(File, LastModified), 1812 ( nonvar(HaveModified), 1813 HaveModified >= LastModified 1814 -> Modified = not_modified, 1815 Cleanup = true 1816 ; storage_open(SExt, File, Stream, Cleanup), 1817 Modified = last_modified(LastModified), 1818 ( var(Format) 1819 -> guess_format(Plain, Format) 1820 ; true 1821 ) 1822 ). 1823open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup, 1824 Modified, Format, Options) :- 1825 !, 1826 open_input_if_modified(file(''), SourceURL, HaveModified, 1827 Stream, Cleanup, 1828 Modified, Format, Options). 1829open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup, 1830 Modified, Format, Options) :- 1831 rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup, 1832 Modified, Format, Options). 1833 1834guess_format(File, Format) :- 1835 file_name_extension(_, Ext, File), 1836 ( rdf_file_type(Ext, Format) 1837 -> true 1838 ; Format = xml, 1839 print_message(warning, rdf(guess_format(Ext))) 1840 ). 1841 1842%! storage_open(+Extension, +File, -Stream, -Cleanup) 1843% 1844% Open the low-level storage. Note that the file is opened as 1845% binary. This is the same as for HTTP resources. The correct 1846% encoding will be set by the XML parser or the Turtle parser. 1847 1848storage_open('', File, Stream, close(Stream)) :- 1849 !, 1850 open(File, read, Stream, [type(binary)]). 1851storage_open(Ext, File, Stream, Cleanup) :- 1852 rdf_storage_encoding(Ext, Encoding), 1853 rdf_open_decode(Encoding, File, Stream, Cleanup). 1854 1855valid_extension(Ext) :- 1856 rdf_file_type(Ext, _). 1857valid_extension(Ext) :- 1858 rdf_storage_encoding(Ext, _). 1859 1860%! is_url(@Term, -Scheme, -URL) is semidet. 1861% 1862% True if Term is an atom denoting URL of the given Scheme. URL is 1863% normalized (see uri_normalized/2) and a possible fragment 1864% identifier (#fragment) is removed. This predicate only succeeds 1865% if the scheme is registered using the multifile hook 1866% url_protocol/1. 1867 1868is_url(URL, Scheme, FetchURL) :- 1869 atom(URL), 1870 uri_is_global(URL), 1871 uri_normalized(URL, URL1), % case normalization 1872 uri_components(URL1, Components), 1873 uri_data(scheme, Components, Scheme0), 1874 url_protocol(Scheme0), 1875 !, 1876 Scheme = Scheme0, 1877 uri_data(fragment, Components, _, Components1), 1878 uri_components(FetchURL, Components1). 1879 1880url_protocol(file). % built-in 1881 1882%! rdf_file_type(+Extension, -Format) is semidet. 1883% 1884% True if Format is the format belonging to the given file 1885% extension. This predicate is multifile and can thus be extended 1886% by plugins. 1887 1888rdf_file_type(xml, xml). 1889rdf_file_type(rdf, xml). 1890rdf_file_type(rdfs, xml). 1891rdf_file_type(owl, xml). 1892rdf_file_type(htm, xhtml). 1893rdf_file_type(html, xhtml). 1894rdf_file_type(xhtml, xhtml). 1895rdf_file_type(trp, triples). 1896 1897 1898%! rdf_file_encoding(+Extension, -Format) is semidet. 1899% 1900% True if Format describes the storage encoding of file. 1901 1902rdf_storage_encoding('', plain). 1903 1904 1905%! rdf_load_stream(+Format, +Stream, :Options) 1906% 1907% Load RDF data from Stream. 1908% 1909% @tbd Handle mime-types? 1910 1911rdf_load_stream(xml, Stream, Options) :- 1912 !, 1913 graph(Options, Graph), 1914 rdf_transaction(load_stream(Stream, Options), 1915 parse(Graph)). 1916rdf_load_stream(xhtml, Stream, M:Options) :- 1917 !, 1918 graph(Options, Graph), 1919 rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]), 1920 parse(Graph)). 1921rdf_load_stream(triples, Stream, Options) :- 1922 !, 1923 graph(Options, Graph), 1924 rdf_load_db_(Stream, Graph, _Graphs). 1925 1926load_stream(Stream, M:Options) :- 1927 process_rdf(Stream, assert_triples, M:Options), 1928 option(graph(Graph), Options), 1929 rdf_graph_clear_modified_(Graph). 1930 1931 1932%! report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options) 1933 1934report_loaded(none, _, _, _, _, _) :- !. 1935report_loaded(Action, Source, DB, Triples, T0, Options) :- 1936 statistics(cputime, T1), 1937 Time is T1 - T0, 1938 ( option(silent(true), Options) 1939 -> Level = silent 1940 ; Level = informational 1941 ), 1942 print_message(Level, 1943 rdf(loaded(Action, Source, DB, Triples, Time))). 1944 1945 1946%! rdf_unload(+Source) is det. 1947% 1948% Identify the graph loaded from Source and use rdf_unload_graph/1 1949% to erase this graph. 1950% 1951% @deprecated For compatibility, this predicate also accepts a 1952% graph name instead of a source specification. 1953% Please update your code to use 1954% rdf_unload_graph/1. 1955 1956rdf_unload(Spec) :- 1957 source_url(Spec, _Protocol, SourceURL), 1958 rdf_graph_source_(Graph, SourceURL, _), 1959 !, 1960 rdf_unload_graph(Graph). 1961rdf_unload(Graph) :- 1962 atom(Graph), 1963 rdf_graph(Graph), 1964 !, 1965 warn_deprecated_unload(Graph), 1966 rdf_unload_graph(Graph). 1967rdf_unload(_). 1968 1969:- dynamic 1970 warned/0. 1971 1972warn_deprecated_unload(_) :- 1973 warned, 1974 !. 1975warn_deprecated_unload(Graph) :- 1976 assertz(warned), 1977 print_message(warning, rdf(deprecated(rdf_unload(Graph)))). 1978 1979 1980%! rdf_unload_graph(+Graph) is det. 1981% 1982% Remove Graph from the RDF store. Succeeds silently if the named 1983% graph does not exist. 1984 1985rdf_unload_graph(Graph) :- 1986 must_be(atom, Graph), 1987 ( rdf_graph(Graph) 1988 -> rdf_transaction(do_unload(Graph), unload(Graph)) 1989 ; true 1990 ). 1991 1992do_unload(Graph) :- 1993 ( rdf_graph_(Graph, Triples), 1994 Triples > 0 1995 -> rdf_retractall(_,_,_,Graph) 1996 ; true 1997 ), 1998 rdf_destroy_graph(Graph). 1999 2000 /******************************* 2001 * GRAPH QUERIES * 2002 *******************************/ 2003 2004%! rdf_create_graph(+Graph) is det. 2005% 2006% Create an RDF graph without triples. Succeeds silently if the 2007% graph already exists. 2008 2009 2010%! rdf_graph(?Graph) is nondet. 2011% 2012% True when Graph is an existing graph. 2013 2014rdf_graph(Graph) :- 2015 rdf_graph_(Graph, _Triples). 2016 2017%! rdf_source(?Graph, ?SourceURL) is nondet. 2018% 2019% True if named Graph is loaded from SourceURL. 2020% 2021% @deprecated Use rdf_graph_property(Graph, source(SourceURL)). 2022 2023rdf_source(Graph, SourceURL) :- 2024 rdf_graph(Graph), 2025 rdf_graph_source_(Graph, SourceURL, _Modified). 2026 2027%! rdf_source(?Source) 2028% 2029% True if Source is a loaded source. 2030% 2031% @deprecated Use rdf_graph/1 or rdf_source/2. 2032 2033rdf_source(SourceURL) :- 2034 rdf_source(_Graph, SourceURL). 2035 2036%! rdf_make 2037% 2038% Reload all loaded files that have been modified since the last 2039% time they were loaded. 2040 2041rdf_make :- 2042 findall(Source-Graph, modified_graph(Source, Graph), Modified), 2043 forall(member(Source-Graph, Modified), 2044 catch(rdf_load(Source, [graph(Graph), if(changed)]), E, 2045 print_message(error, E))). 2046 2047modified_graph(SourceURL, Graph) :- 2048 rdf_graph(Graph), 2049 rdf_graph_source_(Graph, SourceURL, Modified), 2050 \+ sub_atom(SourceURL, 0, _, _, 'stream://'), 2051 Modified > 0. 2052 2053%! rdf_graph_property(?Graph, ?Property) is nondet. 2054% 2055% True when Property is a property of Graph. Defined properties 2056% are: 2057% 2058% * hash(Hash) 2059% Hash is the (MD5-)hash for the content of Graph. 2060% * modified(Boolean) 2061% True if the graph is modified since it was loaded or 2062% rdf_set_graph/2 was called with modified(false). 2063% * source(Source) 2064% The graph is loaded from the Source (a URL) 2065% * source_last_modified(?Time) 2066% Time is the last-modified timestamp of Source at the moment 2067% the graph was loaded from Source. 2068% * triples(Count) 2069% True when Count is the number of triples in Graph. 2070% 2071% Additional graph properties can be added by defining rules for 2072% the multifile predicate property_of_graph/2. Currently, the 2073% following extensions are defined: 2074% 2075% - library(semweb/rdf_persistency) 2076% - persistent(Boolean) 2077% Boolean is =true= if the graph is persistent. 2078 2079rdf_graph_property(Graph, Property) :- 2080 rdf_graph(Graph), 2081 property_of_graph(Property, Graph). 2082 2083:- multifile 2084 property_of_graph/2. 2085 2086property_of_graph(hash(Hash), Graph) :- 2087 rdf_md5(Graph, Hash). 2088property_of_graph(modified(Boolean), Graph) :- 2089 rdf_graph_modified_(Graph, Boolean, _). 2090property_of_graph(source(URL), Graph) :- 2091 rdf_graph_source_(Graph, URL, _). 2092property_of_graph(source_last_modified(Time), Graph) :- 2093 rdf_graph_source_(Graph, _, Time), 2094 Time > 0.0. 2095property_of_graph(triples(Count), Graph) :- 2096 rdf_graph_(Graph, Count). 2097 2098%! rdf_set_graph(+Graph, +Property) is det. 2099% 2100% Set properties of Graph. Defined properties are: 2101% 2102% * modified(false) 2103% Set the modified state of Graph to false. 2104 2105rdf_set_graph(Graph, modified(Modified)) :- 2106 must_be(oneof([false]), Modified), 2107 rdf_graph_clear_modified_(Graph). 2108 2109 2110%! save_cache(+DB, +Cache) is det. 2111% 2112% Save triples belonging to DB in the file Cache. 2113 2114save_cache(DB, Cache) :- 2115 current_prolog_flag(rdf_triple_format, Version), 2116 setup_call_cleanup( 2117 catch(open(Cache, write, CacheStream, [type(binary)]), _, fail), 2118 rdf_save_db_(CacheStream, DB, Version), 2119 close(CacheStream)). 2120 2121%! assert_triples(+Triples, +Source) 2122% 2123% Assert a list of triples into the database. Foir security 2124% reasons we check we aren't inserting anything but nice RDF 2125% triples. 2126 2127assert_triples([], _). 2128assert_triples([rdf(S,P,O)|T], DB) :- 2129 !, 2130 rdf_assert(S, P, O, DB), 2131 assert_triples(T, DB). 2132assert_triples([H|_], _) :- 2133 throw(error(type_error(rdf_triple, H), _)). 2134 2135 2136 /******************************* 2137 * RESET * 2138 *******************************/ 2139 2140%! rdf_reset_db 2141% 2142% Remove all triples from the RDF database and reset all its 2143% statistics. 2144% 2145% @bug This predicate checks for active queries, but this check is 2146% not properly synchronized and therefore the use of this 2147% predicate is unsafe in multi-threaded contexts. It is 2148% mainly used to run functionality tests that need to 2149% start with an empty database. 2150 2151rdf_reset_db :- 2152 reset_gensym('_:genid'), 2153 rdf_reset_db_. 2154 2155 2156 /******************************* 2157 * SAVE RDF * 2158 *******************************/ 2159 2160%! rdf_save(+Out) is det. 2161% 2162% Same as rdf_save(Out, []). See rdf_save/2 for details. 2163 2164%! rdf_save(+Out, :Options) is det. 2165% 2166% Write RDF data as RDF/XML. Options is a list of one or more of 2167% the following options: 2168% 2169% * graph(+Graph) 2170% Save only triples associated to the given named Graph. 2171% 2172% * anon(Bool) 2173% If =false= (default =true=) do not save blank nodes that do 2174% not appear (indirectly) as object of a named resource. 2175% 2176% * base_uri(URI) 2177% BaseURI used. If present, all URIs that can be 2178% represented relative to this base are written using 2179% their shorthand. See also =write_xml_base= option. 2180% 2181% * convert_typed_literal(:Convertor) 2182% Call Convertor(-Type, -Content, +RDFObject), providing 2183% the opposite for the convert_typed_literal option of 2184% the RDF parser. 2185% 2186% * document_language(+Lang) 2187% Initial =|xml:lang|= saved with rdf:RDF element. 2188% 2189% * encoding(Encoding) 2190% Encoding for the output. Either utf8 or iso_latin_1. 2191% 2192% * inline(+Bool) 2193% If =true= (default =false=), inline resources when 2194% encountered for the first time. Normally, only bnodes 2195% are handled this way. 2196% 2197% * namespaces(+List) 2198% Explicitly specify saved namespace declarations. See 2199% rdf_save_header/2 option namespaces for details. 2200% 2201% * sorted(+Boolean) 2202% If =true= (default =false=), emit subjects sorted on 2203% the full URI. Useful to make file comparison easier. 2204% 2205% * write_xml_base(Bool) 2206% If =false=, do _not_ include the =|xml:base|= 2207% declaration that is written normally when using the 2208% =base_uri= option. 2209% 2210% * xml_attributes(+Bool) 2211% If =false= (default =true=), never use xml attributes to 2212% save plain literal attributes, i.e., always used an XML 2213% element as in =|<name>Joe</name>|=. 2214% 2215% @param Out Location to save the data. This can also be a 2216% file-url (=|file://path|=) or a stream wrapped 2217% in a term stream(Out). 2218% @see rdf_save_db/1 2219 2220:- thread_local 2221 named_anon/2, % +Resource, -Id 2222 inlined/1. % +Resource 2223 2224rdf_save(File) :- 2225 rdf_save2(File, []). 2226 2227rdf_save(Spec, M:Options0) :- 2228 is_list(Options0), 2229 !, 2230 meta_options(save_meta_option, M:Options0, Options), 2231 to_file(Spec, File), 2232 rdf_save2(File, Options). 2233rdf_save(Spec, _:DB) :- 2234 atom(DB), % backward compatibility 2235 !, 2236 to_file(Spec, File), 2237 rdf_save2(File, [graph(DB)]). 2238 2239save_meta_option(convert_typed_literal). 2240 2241to_file(URL, File) :- 2242 atom(URL), 2243 uri_file_name(URL, File), 2244 !. 2245to_file(File, File). 2246 2247rdf_save2(File, Options) :- 2248 option(encoding(Encoding), Options, utf8), 2249 valid_encoding(Encoding), 2250 open_output(File, Encoding, Out, Close), 2251 flag(rdf_db_saved_subjects, OSavedSubjects, 0), 2252 flag(rdf_db_saved_triples, OSavedTriples, 0), 2253 call_cleanup(rdf_do_save(Out, Options), 2254 Reason, 2255 cleanup_save(Reason, 2256 File, 2257 OSavedSubjects, 2258 OSavedTriples, 2259 Close)). 2260 2261open_output(stream(Out), Encoding, Out, Cleanup) :- 2262 !, 2263 stream_property(Out, encoding(Old)), 2264 ( ( Old == Encoding 2265 ; Old == wchar_t % Internal encoding 2266 ) 2267 -> Cleanup = true 2268 ; set_stream(Out, encoding(Encoding)), 2269 Cleanup = set_stream(Out, encoding(Old)) 2270 ). 2271open_output(File, Encoding, Out, 2272 close(Out)) :- 2273 open(File, write, Out, [encoding(Encoding)]). 2274 2275valid_encoding(Enc) :- 2276 ( xml_encoding_name(Enc, _) 2277 -> true 2278 ; throw(error(domain_error(encoding, Enc), _)) 2279 ). 2280 2281 2282cleanup_save(Reason, 2283 File, 2284 OSavedSubjects, 2285 OSavedTriples, 2286 Close) :- 2287 call(Close), 2288 flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects), 2289 flag(rdf_db_saved_triples, SavedTriples, OSavedTriples), 2290 retractall(named_anon(_, _)), 2291 retractall(inlined(_)), 2292 ( Reason == exit 2293 -> print_message(informational, 2294 rdf(saved(File, SavedSubjects, SavedTriples))) 2295 ; format(user_error, 'Reason = ~w~n', [Reason]) 2296 ). 2297 2298rdf_do_save(Out, Options0) :- 2299 rdf_save_header(Out, Options0, Options), 2300 graph(Options, DB), 2301 ( option(sorted(true), Options, false) 2302 -> ( var(DB) 2303 -> setof(Subject, rdf_subject(Subject), Subjects) 2304 ; findall(Subject, rdf(Subject, _, _, DB:_), SubjectList), 2305 sort(SubjectList, Subjects) 2306 ), 2307 forall(member(Subject, Subjects), 2308 rdf_save_non_anon_subject(Out, Subject, Options)) 2309 ; forall(rdf_subject_in_graph(Subject, DB), 2310 rdf_save_non_anon_subject(Out, Subject, Options)) 2311 ), 2312 rdf_save_footer(Out), 2313 !. % dubious cut; without the 2314 % cleanup handlers isn't called!? 2315 2316%! rdf_subject_in_graph(-Subject, ?DB) is nondet. 2317% 2318% True when Subject is a subject in the graph DB. If DB is unbound, 2319% all subjects are enumerated. Otherwise we have two options: 2320% enumerate all subjects and filter by graph or collect all triples of 2321% the graph and get the unique subjects. The first is attractive if 2322% the graph is big compared to the DB, also because it does not 2323% require memory, the second if the graph is small compared to the DB. 2324 2325rdf_subject_in_graph(Subject, DB) :- 2326 var(DB), 2327 !, 2328 rdf_subject(Subject). 2329rdf_subject_in_graph(Subject, DB) :- 2330 rdf_statistics(triples(AllTriples)), 2331 rdf_graph_property(DB, triples(DBTriples)), 2332 DBTriples > AllTriples // 10, 2333 !, 2334 rdf_resource(Subject), 2335 ( rdf(Subject, _, _, DB:_) 2336 -> true 2337 ). 2338rdf_subject_in_graph(Subject, DB) :- 2339 findall(Subject, rdf(Subject, _, _, DB:_), SubjectList), 2340 list_to_set(SubjectList, Subjects), 2341 member(Subject, Subjects). 2342 2343 2344graph(Options0, DB) :- 2345 strip_module(Options0, _, Options), 2346 ( memberchk(graph(DB0), Options) 2347 -> DB = DB0 2348 ; memberchk(db(DB0), Options) 2349 -> DB = DB0 2350 ; true % leave unbound 2351 ). 2352 2353 2354%! rdf_save_header(+Fd, +Options) 2355% 2356% Save XML document header, doctype and open the RDF environment. 2357% This predicate also sets up the namespace notation. 2358% 2359% Save an RDF header, with the XML header, DOCTYPE, ENTITY and 2360% opening the rdf:RDF element with appropriate namespace 2361% declarations. It uses the primitives from section 3.5 to 2362% generate the required namespaces and desired short-name. Options 2363% is one of: 2364% 2365% * graph(+URI) 2366% Only search for namespaces used in triples that belong to the 2367% given named graph. 2368% 2369% * namespaces(+List) 2370% Where List is a list of namespace abbreviations. With this 2371% option, the expensive search for all namespaces that may be 2372% used by your data is omitted. The namespaces =rdf= and =rdfs= 2373% are added to the provided List. If a namespace is not 2374% declared, the resource is emitted in non-abreviated form. 2375 2376rdf_save_header(Out, Options) :- 2377 rdf_save_header(Out, Options, _). 2378 2379rdf_save_header(Out, Options, OptionsOut) :- 2380 is_list(Options), 2381 !, 2382 option(encoding(Enc), Options, utf8), 2383 xml_encoding(Enc, Encoding), 2384 format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]), 2385 format(Out, '<!DOCTYPE rdf:RDF [', []), 2386 header_namespaces(Options, NSIdList), 2387 nsmap(NSIdList, NsMap), 2388 append(Options, [nsmap(NsMap)], OptionsOut), 2389 forall(member(Id=URI, NsMap), 2390 ( xml_quote_attribute(URI, NSText0, Enc), 2391 xml_escape_parameter_entity(NSText0, NSText), 2392 format(Out, '~N <!ENTITY ~w \'~w\'>', [Id, NSText]) 2393 )), 2394 format(Out, '~N]>~n~n', []), 2395 format(Out, '<rdf:RDF', []), 2396 ( member(Id, NSIdList), 2397 format(Out, '~N xmlns:~w="&~w;"~n', [Id, Id]), 2398 fail 2399 ; true 2400 ), 2401 ( option(base_uri(Base), Options), 2402 option(write_xml_base(true), Options, true) 2403 -> xml_quote_attribute(Base, BaseText, Enc), 2404 format(Out, '~N xml:base="~w"~n', [BaseText]) 2405 ; true 2406 ), 2407 ( memberchk(document_language(Lang), Options) 2408 -> format(Out, '~N xml:lang="~w"', [Lang]) 2409 ; true 2410 ), 2411 format(Out, '>~n', []). 2412rdf_save_header(Out, FileRef, OptionsOut) :- % compatibility 2413 atom(FileRef), 2414 rdf_save_header(Out, [graph(FileRef)], OptionsOut). 2415 2416xml_encoding(Enc, Encoding) :- 2417 ( xml_encoding_name(Enc, Encoding) 2418 -> true 2419 ; throw(error(domain_error(rdf_encoding, Enc), _)) 2420 ). 2421 2422xml_encoding_name(ascii, 'US-ASCII'). 2423xml_encoding_name(iso_latin_1, 'ISO-8859-1'). 2424xml_encoding_name(utf8, 'UTF-8'). 2425 2426%! nsmap(+NSIds, -Map:list(id=uri)) is det. 2427% 2428% Create a namespace-map that is compatible to xml_write/2 2429% for dealing with XML-Literals 2430 2431nsmap([], []). 2432nsmap([Id|T0], [Id=URI|T]) :- 2433 ns(Id, URI), 2434 nsmap(T0, T). 2435 2436%! xml_escape_parameter_entity(+In, -Out) is det. 2437% 2438% Escape % as % for entity declarations. 2439 2440xml_escape_parameter_entity(In, Out) :- 2441 sub_atom(In, _, _, _, '%'), 2442 !, 2443 atom_codes(In, Codes), 2444 phrase(escape_parent(Codes), OutCodes), 2445 atom_codes(Out, OutCodes). 2446xml_escape_parameter_entity(In, In). 2447 2448escape_parent([]) --> []. 2449escape_parent([H|T]) --> 2450 ( { H == 37 } 2451 -> "%" 2452 ; [H] 2453 ), 2454 escape_parent(T). 2455 2456 2457%! header_namespaces(Options, -List) 2458% 2459% Get namespaces we will define as entities 2460 2461header_namespaces(Options, List) :- 2462 memberchk(namespaces(NSL0), Options), 2463 !, 2464 sort([rdf,rdfs|NSL0], List). 2465header_namespaces(Options, List) :- 2466 graph(Options, DB), 2467 used_namespace_entities(List, DB). 2468 2469%! rdf_graph_prefixes(?Graph, -List:ord_set) is det. 2470%! rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det. 2471% 2472% List is a sorted list of prefixes (namepaces) in Graph. Options 2473% defined are: 2474% 2475% * filter(:Filter) 2476% optional Filter argument is used to filter the results. It 2477% is called with 3 additional arguments: 2478% 2479% == 2480% call(Filter, Where, Prefix, URI) 2481% == 2482% 2483% The Where argument gives the location of the prefix ans is 2484% one of =subject=, =predicate=, =object= or =type=. The 2485% Prefix argument is the potentionally new prefix and URI is 2486% the full URI that is being processed. 2487% 2488% * expand(:Goal) 2489% Hook to generate the graph. Called using 2490% 2491% == 2492% call(Goal,S,P,O,Graph) 2493% == 2494% 2495% * min_count(+Count) 2496% Only include prefixes that appear at least N times. Default 2497% is 1. Declared prefixes are always returned if found at 2498% least one time. 2499% 2500% * get_prefix(:GetPrefix) 2501% Predicate to extract the candidate prefix from an IRI. Default 2502% is iri_xml_namespace/2. 2503 2504 2505:- thread_local 2506 graph_prefix/3. 2507:- meta_predicate 2508 rdf_graph_prefixes( , , ). 2509 2510rdf_graph_prefixes(Graph, List) :- 2511 rdf_graph_prefixes(Graph, List, []). 2512 2513rdf_graph_prefixes(Graph, List, M:QOptions) :- 2514 is_list(QOptions), 2515 !, 2516 meta_options(is_meta, M:QOptions, Options), 2517 option(filter(Filter), Options, true), 2518 option(expand(Expand), Options, rdf_db), 2519 option(min_count(MinCount), Options, 1), 2520 option(get_prefix(GetPrefix), Options, iri_xml_namespace), 2521 call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix), 2522 retractall(graph_prefix(_,_,_))), 2523 sort(Prefixes, List). 2524rdf_graph_prefixes(Graph, List, M:Filter) :- 2525 rdf_graph_prefixes(Graph, List, M:[filter(Filter)]). 2526 2527is_meta(filter). 2528is_meta(expand). 2529is_meta(get_prefix). 2530 2531 2532prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :- 2533 ( call(Expand, S, P, O, Graph), 2534 add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)), 2535 add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)), 2536 add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)), 2537 fail 2538 ; true 2539 ), 2540 findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes). 2541 2542add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :- 2543 \+ rdf_is_bnode(S), 2544 call(GetPrefix, S, Full), 2545 Full \== '', 2546 !, 2547 ( graph_prefix(Full, MinCount, _) 2548 -> true 2549 ; Filter == true 2550 -> add_ns(Full, Context) 2551 ; call(Filter, Where, Full, S) 2552 -> add_ns(Full, Context) 2553 ; true 2554 ). 2555add_ns(_, _, _, _, _, _). 2556 2557add_ns(Full, Context) :- 2558 graph_prefix(Full, _, Contexts), 2559 memberchk(Context, Contexts), 2560 !. 2561add_ns(Full, Context) :- 2562 retract(graph_prefix(Full, C0, Contexts)), 2563 !, 2564 C1 is C0+1, 2565 asserta(graph_prefix(Full, C1, [Context|Contexts])). 2566add_ns(Full, _) :- 2567 ns(_, Full), 2568 !, 2569 asserta(graph_prefix(Full, _, _)). 2570add_ns(Full, Context) :- 2571 asserta(graph_prefix(Full, 1, [Context])). 2572 2573 2574add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :- 2575 atom(O), 2576 !, 2577 add_ns(object, GetPrefix, Filter, O, MinCount, Context). 2578add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :- 2579 atom(Type), 2580 !, 2581 add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)). 2582add_ns_obj(_, _, _, _, _). 2583 2584 2585%! used_namespace_entities(-List, ?Graph) is det. 2586% 2587% Return the namespace aliases that are actually used in Graph. In 2588% addition, this predicate creates ns<N> aliases for namespaces 2589% used in predicates because RDF/XML cannot write predicates other 2590% than as an XML name. 2591 2592used_namespace_entities(List, Graph) :- 2593 decl_used_predicate_ns(Graph), 2594 used_namespaces(List, Graph). 2595 2596used_namespaces(List, DB) :- 2597 rdf_graph_prefixes(DB, FullList), 2598 ns_abbreviations(FullList, List0), 2599 sort([rdf|List0], List). 2600 2601ns_abbreviations([], []). 2602ns_abbreviations([H0|T0], [H|T]) :- 2603 ns(H, H0), 2604 !, 2605 ns_abbreviations(T0, T). 2606ns_abbreviations([_|T0], T) :- 2607 ns_abbreviations(T0, T). 2608 2609 2610/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 2611For every URL used as a predicate we *MUST* define a namespace as we 2612cannot use names holding /, :, etc. as XML identifiers. 2613- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 2614 2615:- thread_local 2616 predicate_ns/2. 2617 2618decl_used_predicate_ns(DB) :- 2619 retractall(predicate_ns(_,_)), 2620 ( rdf_current_predicate(P, DB), 2621 decl_predicate_ns(P), 2622 fail 2623 ; true 2624 ). 2625 2626decl_predicate_ns(Pred) :- 2627 predicate_ns(Pred, _), 2628 !. 2629decl_predicate_ns(Pred) :- 2630 rdf_global_id(NS:Local, Pred), 2631 xml_name(Local), 2632 !, 2633 assert(predicate_ns(Pred, NS)). 2634decl_predicate_ns(Pred) :- 2635 atom_codes(Pred, Codes), 2636 append(NSCodes, LocalCodes, Codes), 2637 xml_codes(LocalCodes), 2638 !, 2639 ( NSCodes \== [] 2640 -> atom_codes(NS, NSCodes), 2641 ( ns(Id, NS) 2642 -> assert(predicate_ns(Pred, Id)) 2643 ; between(1, infinite, N), 2644 atom_concat(ns, N, Id), 2645 \+ ns(Id, _) 2646 -> rdf_register_ns(Id, NS), 2647 print_message(informational, 2648 rdf(using_namespace(Id, NS))) 2649 ), 2650 assert(predicate_ns(Pred, Id)) 2651 ; assert(predicate_ns(Pred, -)) % no namespace used 2652 ). 2653 2654xml_codes([]). 2655xml_codes([H|T]) :- 2656 xml_code(H), 2657 xml_codes(T). 2658 2659xml_code(X) :- 2660 code_type(X, csym), 2661 !. 2662xml_code(0'-). % Match 0'- 2663 2664 2665%! rdf_save_footer(Out:stream) is det. 2666% 2667% Finish XML generation and write the document footer. 2668% 2669% @see rdf_save_header/2, rdf_save_subject/3. 2670 Out) (:- 2672 retractall(named_anon(_, _)), 2673 retractall(inlined(_)), 2674 format(Out, '</rdf:RDF>~n', []). 2675 2676%! rdf_save_non_anon_subject(+Out, +Subject, +Options) 2677% 2678% Save an object. Anonymous objects not saved if anon(false) 2679% is present in the Options list. 2680 2681rdf_save_non_anon_subject(_Out, Subject, Options) :- 2682 rdf_is_bnode(Subject), 2683 ( memberchk(anon(false), Options) 2684 ; graph(Options, DB), 2685 rdf_db(_, _, Subject, DB) 2686 ), 2687 !. 2688rdf_save_non_anon_subject(Out, Subject, Options) :- 2689 rdf_save_subject(Out, Subject, Options), 2690 flag(rdf_db_saved_subjects, X, X+1). 2691 2692 2693%! rdf_save_subject(+Out, +Subject:resource, +Options) is det. 2694% 2695% Save the triples associated to Subject to Out. Options: 2696% 2697% * graph(+Graph) 2698% Only save properties from Graph. 2699% * base_uri(+URI) 2700% * convert_typed_literal(:Goal) 2701% * document_language(+XMLLang) 2702% 2703% @see rdf_save/2 for a description of these options. 2704 2705rdf_save_subject(Out, Subject, Options) :- 2706 is_list(Options), 2707 !, 2708 option(base_uri(BaseURI), Options, '-'), 2709 ( rdf_save_subject(Out, Subject, BaseURI, 0, Options) 2710 -> format(Out, '~n', []) 2711 ; throw(error(rdf_save_failed(Subject), 'Internal error')) 2712 ). 2713rdf_save_subject(Out, Subject, DB) :- 2714 ( var(DB) 2715 -> rdf_save_subject(Out, Subject, []) 2716 ; rdf_save_subject(Out, Subject, [graph(DB)]) 2717 ). 2718 2719 2720%! rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI, 2721%! +Indent:int, +Options) is det. 2722% 2723% Save properties of Subject. 2724% 2725% @param Indent Current indentation 2726 2727rdf_save_subject(_, Subject, _, _, _) :- 2728 inlined(Subject), 2729 !. 2730rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :- 2731 do_save_subject(Out, Subject, BaseURI, Indent, Options). 2732 2733do_save_subject(Out, Subject, BaseURI, Indent, Options) :- 2734 graph(Options, DB), 2735 findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0), 2736 sort(Atts0, Atts), % remove duplicates 2737 length(Atts, L), 2738 ( length(Atts0, L0), 2739 Del is L0-L, 2740 Del > 0 2741 -> print_message(informational, 2742 rdf(save_removed_duplicates(Del, Subject))) 2743 ; true 2744 ), 2745 rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options), 2746 flag(rdf_db_saved_triples, X, X+L). 2747 2748rdf_db(Subject, Pred, Object, DB) :- 2749 var(DB), 2750 !, 2751 rdf(Subject, Pred, Object). 2752rdf_db(Subject, Pred, Object, DB) :- 2753 rdf(Subject, Pred, Object, DB:_). 2754 2755%! rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI, 2756%! +Atts:list(Pred=Obj), +Indent:int, +Options) is det. 2757% 2758% Save triples defined by Atts on Subject. 2759 2760rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :- 2761 rdf_equal(rdf:type, RdfType), 2762 select(RdfType=Type, Atts, Atts1), 2763 \+ rdf_is_bnode(Type), 2764 rdf_id(Type, BaseURI, TypeId), 2765 xml_is_name(TypeId), 2766 !, 2767 format(Out, '~*|<', [Indent]), 2768 rdf_write_id(Out, TypeId), 2769 save_about(Out, BaseURI, Subject, Options), 2770 save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options). 2771rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :- 2772 format(Out, '~*|<rdf:Description', [Indent]), 2773 save_about(Out, BaseURI, Subject, Options), 2774 save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options). 2775 2776xml_is_name(_NS:Atom) :- 2777 !, 2778 xml_name(Atom). 2779xml_is_name(Atom) :- 2780 xml_name(Atom). 2781 2782%! save_about(+Out, +BaseURI, +Subject, +Options) is det. 2783% 2784% Save the rdf:about. If Subject is a blank node, save the nodeID 2785% if any. 2786 2787save_about(Out, _BaseURI, Subject, _Options) :- 2788 rdf_is_bnode(Subject), 2789 !, 2790 ( named_anon(Subject, NodeID) 2791 -> format(Out, ' rdf:nodeID="~w"', [NodeID]) 2792 ; true 2793 ). 2794save_about(Out, BaseURI, Subject, Options) :- 2795 option(encoding(Encoding), Options, utf8), 2796 rdf_value(Subject, BaseURI, QSubject, Encoding), 2797 format(Out, ' rdf:about="~w"', [QSubject]). 2798 2799%! save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options) 2800% 2801% Save the attributes. Short literal attributes are saved in the 2802% tag. Others as the content of the description element. The 2803% begin tag has already been filled. 2804 2805save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :- 2806 split_attributes(Atts, InTag, InBody, Options), 2807 SubIndent is Indent + 2, 2808 save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options), 2809 ( InBody == [] 2810 -> format(Out, '/>~n', []) 2811 ; format(Out, '>~n', []), 2812 save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options), 2813 format(Out, '~N~*|</', [Indent]), 2814 rdf_write_id(Out, Element), 2815 format(Out, '>~n', []) 2816 ). 2817 2818%! split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options) 2819% 2820% Split attribute (Name=Value) list into attributes for the head 2821% and body. Attributes can only be in the head if they are literal 2822% and appear only one time in the attribute list. 2823 2824split_attributes(Atts, [], Atts, Options) :- 2825 option(xml_attributes(false), Options), 2826 !. 2827split_attributes(Atts, HeadAttr, BodyAttr, _) :- 2828 duplicate_attributes(Atts, Dupls, Singles), 2829 simple_literal_attributes(Singles, HeadAttr, Rest), 2830 append(Dupls, Rest, BodyAttr). 2831 2832%! duplicate_attributes(+Attrs, -Duplicates, -Singles) 2833% 2834% Extract attributes that appear more than onces as we cannot 2835% dublicate an attribute in the head according to the XML rules. 2836 2837duplicate_attributes([], [], []). 2838duplicate_attributes([H|T], Dupls, Singles) :- 2839 H = (Name=_), 2840 named_attributes(Name, T, D, R), 2841 D \== [], 2842 append([H|D], Dupls2, Dupls), 2843 !, 2844 duplicate_attributes(R, Dupls2, Singles). 2845duplicate_attributes([H|T], Dupls2, [H|Singles]) :- 2846 duplicate_attributes(T, Dupls2, Singles). 2847 2848named_attributes(_, [], [], []) :- !. 2849named_attributes(Name, [H|T], D, R) :- 2850 ( H = (Name=_) 2851 -> D = [H|DT], 2852 named_attributes(Name, T, DT, R) 2853 ; R = [H|RT], 2854 named_attributes(Name, T, D, RT) 2855 ). 2856 2857%! simple_literal_attributes(+Attributes, -Inline, -Body) 2858% 2859% Split attributes for (literal) attributes to be used in the 2860% begin-tag and ones that have to go into the body of the description. 2861 2862simple_literal_attributes([], [], []). 2863simple_literal_attributes([H|TA], [H|TI], B) :- 2864 in_tag_attribute(H), 2865 !, 2866 simple_literal_attributes(TA, TI, B). 2867simple_literal_attributes([H|TA], I, [H|TB]) :- 2868 simple_literal_attributes(TA, I, TB). 2869 2870in_tag_attribute(_=literal(Text)) :- 2871 atom(Text), % may not have lang qualifier 2872 atom_length(Text, Len), 2873 Len < 60. 2874 2875%! save_attributes2(+List, +BaseURI, +TagOrBody, +Stream, +Indent, +Options) 2876% 2877% Save a list of attributes. 2878 2879save_attributes2([], _, _, _, _, _). 2880save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :- 2881 save_attribute(Where, H, BaseURI, Out, Indent, Options), 2882 save_attributes2(T, BaseURI, Where, Out, Indent, Options). 2883 2884save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, Options) :- 2885 AttIndent is Indent + 2, 2886 rdf_id(Name, BaseURI, NameText), 2887 option(encoding(Encoding), Options, utf8), 2888 xml_quote_attribute(Value, QVal, Encoding), 2889 format(Out, '~N~*|', [AttIndent]), 2890 rdf_write_id(Out, NameText), 2891 format(Out, '="~w"', [QVal]). 2892save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :- 2893 !, 2894 rdf_id(Name, BaseURI, NameText), 2895 ( memberchk(convert_typed_literal(Converter), Options), 2896 call(Converter, Type, Content, Literal0) 2897 -> Literal = type(Type, Content) 2898 ; Literal = Literal0 2899 ), 2900 save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options). 2901save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :- 2902 rdf_is_bnode(Value), 2903 !, 2904 rdf_id(Name, BaseURI, NameText), 2905 format(Out, '~N~*|<', [Indent]), 2906 rdf_write_id(Out, NameText), 2907 ( named_anon(Value, NodeID) 2908 -> format(Out, ' rdf:nodeID="~w"/>', [NodeID]) 2909 ; ( rdf(S1, Name, Value), 2910 rdf(S2, P2, Value), 2911 (S1 \== S2 ; Name \== P2) 2912 -> predicate_property(named_anon(_,_), number_of_clauses(N)), 2913 atom_concat('bn', N, NodeID), 2914 assertz(named_anon(Value, NodeID)) 2915 ; true 2916 ), 2917 SubIndent is Indent + 2, 2918 ( rdf_collection(Value) 2919 -> save_about(Out, BaseURI, Value, Options), 2920 format(Out, ' rdf:parseType="Collection">~n', []), 2921 rdf_save_list(Out, Value, BaseURI, SubIndent, Options) 2922 ; format(Out, '>~n', []), 2923 rdf_save_subject(Out, Value, BaseURI, SubIndent, Options) 2924 ), 2925 format(Out, '~N~*|</', [Indent]), 2926 rdf_write_id(Out, NameText), 2927 format(Out, '>~n', []) 2928 ). 2929save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :- 2930 option(inline(true), Options), 2931 has_attributes(Value, Options), 2932 \+ inlined(Value), 2933 !, 2934 assertz(inlined(Value)), 2935 rdf_id(Name, BaseURI, NameText), 2936 format(Out, '~N~*|<', [Indent]), 2937 rdf_write_id(Out, NameText), 2938 SubIndent is Indent + 2, 2939 ( rdf_collection(Value) 2940 -> save_about(Out, BaseURI, Value, Options), 2941 format(Out, ' rdf:parseType="Collection">~n', []), 2942 rdf_save_list(Out, Value, BaseURI, SubIndent, Options) 2943 ; format(Out, '>~n', []), 2944 do_save_subject(Out, Value, BaseURI, SubIndent, Options) 2945 ), 2946 format(Out, '~N~*|</', [Indent]), 2947 rdf_write_id(Out, NameText), 2948 format(Out, '>~n', []). 2949save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :- 2950 option(encoding(Encoding), Options, utf8), 2951 rdf_value(Value, BaseURI, QVal, Encoding), 2952 rdf_id(Name, BaseURI, NameText), 2953 format(Out, '~N~*|<', [Indent]), 2954 rdf_write_id(Out, NameText), 2955 format(Out, ' rdf:resource="~w"/>', [QVal]). 2956 2957has_attributes(URI, Options) :- 2958 graph(Options, DB), 2959 rdf_db(URI, _, _, DB), 2960 !. 2961 2962%! save_body_literal(+Literal, +NameText, +BaseURI, 2963%! +Out, +Indent, +Options). 2964 2965save_body_literal(lang(Lang, Value), 2966 NameText, BaseURI, Out, Indent, Options) :- 2967 !, 2968 format(Out, '~N~*|<', [Indent]), 2969 rdf_write_id(Out, NameText), 2970 ( memberchk(document_language(Lang), Options) 2971 -> write(Out, '>') 2972 ; rdf_id(Lang, BaseURI, LangText), 2973 format(Out, ' xml:lang="~w">', [LangText]) 2974 ), 2975 save_attribute_value(Value, Out, Options), 2976 write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>'). 2977save_body_literal(type(Type, DOM), 2978 NameText, _BaseURI, Out, Indent, Options) :- 2979 rdf_equal(Type, rdf:'XMLLiteral'), 2980 !, 2981 ( atom(DOM) 2982 -> format(Out, '~N~*|<', [Indent]), 2983 rdf_write_id(Out, NameText), 2984 format(Out, ' rdf:parseType="Literal">~w</', [DOM]), 2985 rdf_write_id(Out, NameText), write(Out, '>') 2986 ; save_xml_literal(DOM, NameText, Out, Indent, Options) 2987 ). 2988save_body_literal(type(Type, Value), 2989 NameText, BaseURI, Out, Indent, Options) :- 2990 !, 2991 format(Out, '~N~*|<', [Indent]), 2992 rdf_write_id(Out, NameText), 2993 option(encoding(Encoding), Options, utf8), 2994 rdf_value(Type, BaseURI, QVal, Encoding), 2995 format(Out, ' rdf:datatype="~w">', [QVal]), 2996 save_attribute_value(Value, Out, Options), 2997 write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>'). 2998save_body_literal(Literal, 2999 NameText, _, Out, Indent, Options) :- 3000 atomic(Literal), 3001 !, 3002 format(Out, '~N~*|<', [Indent]), 3003 rdf_write_id(Out, NameText), 3004 write(Out, '>'), 3005 save_attribute_value(Literal, Out, Options), 3006 write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>'). 3007save_body_literal(DOM, 3008 NameText, BaseURI, Out, Indent, Options) :- 3009 rdf_equal(Type, rdf:'XMLLiteral'), 3010 save_body_literal(type(Type, DOM), 3011 NameText, BaseURI, Out, Indent, Options). 3012 3013save_attribute_value(Value, Out, Options) :- % strings 3014 ( atom(Value) 3015 ; string(Value) 3016 ), 3017 !, 3018 option(encoding(Encoding), Options, utf8), 3019 xml_quote_cdata(Value, QVal, Encoding), 3020 write(Out, QVal). 3021save_attribute_value(Value, Out, _Options) :- % numbers 3022 number(Value), 3023 !, 3024 writeq(Out, Value). % quoted: preserve floats 3025save_attribute_value(Value, _Out, _Options) :- 3026 throw(error(save_attribute_value(Value), _)). 3027 3028%! save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det. 3029% 3030% Save an XMLLiteral value. We already emitted 3031% 3032% == 3033% <prop parseType="literal" 3034% == 3035% 3036% but not the terminating =|>|=. We need to establish the 3037% namespaces used in the DOM. The namespaces in the rdf document 3038% are in the nsmap-option of Options. 3039 3040save_xml_literal(DOM, Attr, Out, Indent, Options) :- 3041 xml_is_dom(DOM), 3042 !, 3043 memberchk(nsmap(NsMap), Options), 3044 id_to_atom(Attr, Atom), 3045 xml_write(Out, 3046 element(Atom, ['rdf:parseType'='Literal'], DOM), 3047 [ header(false), 3048 indent(Indent), 3049 nsmap(NsMap) 3050 ]). 3051save_xml_literal(NoDOM, _, _, _, _) :- 3052 must_be(xml_dom, NoDOM). 3053 3054id_to_atom(NS:Local, Atom) :- 3055 !, 3056 atomic_list_concat([NS,Local], :, Atom). 3057id_to_atom(ID, ID). 3058 3059 3060%! rdf_collection(+URI) is semidet. 3061% 3062% True if URI represents an RDF list that fits the RDF 3063% parseType=collection syntax. This means it is a linked list of 3064% bnode-cells with a rdf:first that is a resource, optionally a 3065% rdf:type that is an rdf:list and the list ends in an rdf:nil. 3066 3067:- rdf_meta 3068 rdf_collection(r), 3069 collection_p(r,r). 3070 3071rdf_collection(rdf:nil) :- !. 3072rdf_collection(Cell) :- 3073 rdf_is_bnode(Cell), 3074 findall(F, rdf(Cell, rdf:first, F), [_]), 3075 findall(F, rdf(Cell, rdf:rest, F), [Rest]), 3076 forall(rdf(Cell, P, V), 3077 collection_p(P, V)), 3078 rdf_collection(Rest). 3079 3080collection_p(rdf:first, V) :- atom(V). 3081collection_p(rdf:rest, _). 3082collection_p(rdf:type, rdf:'List'). 3083 3084 3085%! rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options) 3086 3087rdf_save_list(_, List, _, _, _) :- 3088 rdf_equal(List, rdf:nil), 3089 !. 3090rdf_save_list(Out, List, BaseURI, Indent, Options) :- 3091 rdf_has(List, rdf:first, First), 3092 ( rdf_is_bnode(First) 3093 -> nl(Out), 3094 rdf_save_subject(Out, First, BaseURI, Indent, Options) 3095 ; option(encoding(Encoding), Options, utf8), 3096 rdf_value(First, BaseURI, QVal, Encoding), 3097 format(Out, '~N~*|<rdf:Description rdf:about="~w"/>', 3098 [Indent, QVal]) 3099 ), 3100 flag(rdf_db_saved_triples, X, X+3), 3101 ( rdf_has(List, rdf:rest, List2), 3102 \+ rdf_equal(List2, rdf:nil) 3103 -> rdf_save_list(Out, List2, BaseURI, Indent, Options) 3104 ; true 3105 ). 3106 3107 3108%! rdf_id(+Resource, +BaseURI, -NSLocal) 3109% 3110% Generate a NS:Local name for Resource given the indicated 3111% default namespace. This call is used for elements. 3112 3113rdf_id(Id, BaseURI, Local) :- 3114 assertion(atom(BaseURI)), 3115 atom_concat(BaseURI, Local, Id), 3116 sub_atom(Local, 0, 1, _, #), 3117 !. 3118rdf_id(Id, _, NS:Local) :- 3119 iri_xml_namespace(Id, Full, Local), 3120 ns(NS, Full), 3121 !. 3122rdf_id(Id, _, NS:Local) :- 3123 ns(NS, Full), 3124 Full \== '', 3125 atom_concat(Full, Local, Id), 3126 !. 3127rdf_id(Id, _, Id). 3128 3129 3130%! rdf_write_id(+Out, +NSLocal) is det. 3131% 3132% Write an identifier. We cannot use native write on it as both NS 3133% and Local can be operators. 3134 3135rdf_write_id(Out, NS:Local) :- 3136 !, 3137 format(Out, '~w:~w', [NS, Local]). 3138rdf_write_id(Out, Atom) :- 3139 write(Out, Atom). 3140 3141%! rdf_value(+Resource, +BaseURI, -Text, +Encoding) 3142% 3143% According to "6.4 RDF URI References" of the RDF Syntax 3144% specification, a URI reference is UNICODE string not containing 3145% control sequences, represented as UTF-8 and then as escaped 3146% US-ASCII. 3147 3148rdf_value(Base, Base, '', _) :- !. 3149rdf_value(V, Base, Text, Encoding) :- 3150 atom_concat(Base, Local, V), 3151 sub_atom(Local, 0, _, _, #), 3152 !, 3153 xml_quote_attribute(Local, Text, Encoding). 3154rdf_value(V, _, Text, Encoding) :- 3155 ns(NS, Full), 3156 atom_concat(Full, Local, V), 3157 xml_is_name(Local), 3158 !, 3159 xml_quote_attribute(Local, QLocal, Encoding), 3160 atomic_list_concat(['&', NS, (';'), QLocal], Text). 3161rdf_value(V, _, Q, Encoding) :- 3162 xml_quote_attribute(V, Q, Encoding). 3163 3164 3165 /******************************* 3166 * MATCH AND COMPARE * 3167 *******************************/ 3168 3169%! rdf_compare(-Dif, +Object1, +Object2) is det. 3170% 3171% Compare two object terms. Where SPARQL defines a partial 3172% ordering, we define a complete ordering of terms. The ordering 3173% is defines as: 3174% 3175% - Blank nodes < IRIs < Literals 3176% - Numeric literals < other literals 3177% - Numeric literals are compared by value and then by type, 3178% where Integer < Decimal < Double 3179% - Other literals are compare lexically, case insensitive. 3180% If equal, uppercase preceeds lowercase. If still equal, 3181% the types are compared lexically. 3182 3183%! rdf_match_label(+How, +Pattern, +Label) is semidet. 3184% 3185% True if Label matches Pattern according to How. How is one of 3186% `icase`, `substring`, `word`, `prefix` or `like`. For backward 3187% compatibility, `exact` is a synonym for `icase`. 3188 3189 3190 /******************************* 3191 * DEPRECATED MATERIAL * 3192 *******************************/ 3193 3194%! rdf_split_url(+Prefix, +Local, -URL) is det. 3195%! rdf_split_url(-Prefix, -Local, +URL) is det. 3196% 3197% Split/join a URL. This functionality is moved to library(sgml). 3198% 3199% @deprecated Use iri_xml_namespace/3. Note that the argument 3200% order is iri_xml_namespace(+IRI, -Namespace, -Localname). 3201 3202rdf_split_url(Prefix, Local, URL) :- 3203 atomic(URL), 3204 !, 3205 iri_xml_namespace(URL, Prefix, Local). 3206rdf_split_url(Prefix, Local, URL) :- 3207 atom_concat(Prefix, Local, URL). 3208 3209%! rdf_url_namespace(+URL, -Namespace) 3210% 3211% Namespace is the namespace of URL. 3212% 3213% @deprecated Use iri_xml_namespace/2 3214 3215rdf_url_namespace(URL, Prefix) :- 3216 iri_xml_namespace(URL, Prefix). 3217 3218 3219 /******************************* 3220 * LITERALS * 3221 *******************************/ 3222 3223%! rdf_new_literal_map(-Map) is det. 3224% 3225% Create a new literal map, returning an opaque handle. 3226 3227%! rdf_destroy_literal_map(+Map) is det. 3228% 3229% Destroy a literal map. After this call, further use of the Map 3230% handle is illegal. Additional synchronisation is needed if maps 3231% that are shared between threads are destroyed to guarantee the 3232% handle is no longer used. In some scenarios 3233% rdf_reset_literal_map/1 provides a safe alternative. 3234 3235%! rdf_reset_literal_map(+Map) is det. 3236% 3237% Delete all content from the literal map. 3238 3239%! rdf_insert_literal_map(+Map, +Key, +Value) is det. 3240% 3241% Add a relation between Key and Value to the map. If this 3242% relation already exists no action is performed. 3243 3244%! rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det. 3245% 3246% As rdf_insert_literal_map/3. In addition, if Key is a new key in 3247% Map, unify KeyCount with the number of keys in Map. This serves 3248% two purposes. Derived maps, such as the stem and metaphone maps 3249% need to know about new keys and it avoids additional foreign 3250% calls for doing the progress in rdf_litindex.pl. 3251 3252%! rdf_delete_literal_map(+Map, +Key) is det. 3253% 3254% Delete Key and all associated values from the map. 3255 3256%! rdf_delete_literal_map(+Map, +Key, +Value) is det. 3257% 3258% Delete the association between Key and Value from the map. 3259 3260%! rdf_find_literal_map(+Map, +KeyList, -ValueList) is det. 3261% 3262% Unify ValueList with an ordered set of values associated to all 3263% keys from KeyList. Each key in KeyList is either an atom, an 3264% integer or a term not(Key). If not-terms are provided, there 3265% must be at least one positive keywords. The negations are tested 3266% after establishing the positive matches. 3267 3268%! rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det. 3269% 3270% Realises various queries on the key-set: 3271% 3272% * all 3273% 3274% Unify Answer with an ordered list of all keys. 3275% * key(+Key) 3276% 3277% Succeeds if Key is a key in the map and unify Answer with the 3278% number of values associated with the key. This provides a fast 3279% test of existence without fetching the possibly large 3280% associated value set as with rdf_find_literal_map/3. 3281% 3282% * prefix(+Prefix) 3283% Unify Answer with an ordered set of all keys that have the 3284% given prefix. See section 3.1 for details on prefix matching. 3285% Prefix must be an atom. This call is intended for 3286% auto-completion in user interfaces. 3287% 3288% * ge(+Min) 3289% Unify Answer with all keys that are larger or equal to the 3290% integer Min. 3291% 3292% * le(+Max) 3293% Unify Answer with all keys that are smaller or equal to the integer 3294% Max. 3295% 3296% * between(+Min, +Max) Unify 3297% Answer with all keys between Min and Max (including). 3298 3299%! rdf_statistics_literal_map(+Map, -KeyValue) 3300% 3301% Query some statistics of the map. Provides KeyValue are: 3302% 3303% * size(-Keys, -Relations) 3304% Unify Keys with the total key-count of the index and Relation 3305% with the total Key-Value count. 3306 3307 3308 3309 /******************************* 3310 * MISC * 3311 *******************************/ 3312 3313%! rdf_version(-Version) is det. 3314% 3315% True when Version is the numerical version-id of this library. 3316% The version is computed as 3317% 3318% Major*10000 + Minor*100 + Patch. 3319 3320%! rdf_set(+Term) is det. 3321% 3322% Set properties of the RDF store. Currently defines: 3323% 3324% * hash(+Hash, +Parameter, +Value) 3325% Set properties for a triple index. Hash is one of =s=, 3326% =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=. Parameter 3327% is one of: 3328% 3329% - size 3330% Value defines the number of entries in the hash-table. 3331% Value is rounded _down_ to a power of 2. After setting 3332% the size explicitly, auto-sizing for this table is 3333% disabled. Setting the size smaller than the current 3334% size results in a =permission_error= exception. 3335% 3336% - average_chain_len 3337% Set maximum average collision number for the hash. 3338% 3339% - optimize_threshold 3340% Related to resizing hash-tables. If 0, all triples are 3341% moved to the new size by the garbage collector. If more 3342% then zero, those of the last Value resize steps remain at 3343% their current location. Leaving cells at their current 3344% location reduces memory fragmentation and slows down 3345% access. 3346 3347%! rdf_md5(+Graph, -MD5) is det. 3348% 3349% True when MD5 is the MD5 hash for all triples in graph. The MD5 3350% digest itself is represented as an atom holding a 32-character 3351% hexadecimal string. The library maintains the digest 3352% incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4] 3353% and rdf_retractall/[3,4]. Checking whether the digest has 3354% changed since the last rdf_load/[1,2] call provides a practical 3355% means for checking whether the file needs to be saved. 3356% 3357% @deprecated New code should use rdf_graph_property(Graph, 3358% hash(Hash)). 3359 3360%! rdf_generation(-Generation) is det. 3361% 3362% True when Generation is the current generation of the database. 3363% Each modification to the database increments the generation. It 3364% can be used to check the validity of cached results deduced from 3365% the database. Committing a non-empty transaction increments the 3366% generation by one. 3367% 3368% When inside a transaction, Generation is unified to a term 3369% _TransactionStartGen_ + _InsideTransactionGen_. E.g., 4+3 means 3370% that the transaction was started at generation 4 of the global 3371% database and we have created 3 new generations inside the 3372% transaction. Note that this choice of representation allows for 3373% comparing generations using Prolog arithmetic. Comparing a 3374% generation in one transaction with a generation in another 3375% transaction is meaningless. 3376 3377%! rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity) 3378% 3379% Return the number of alternatives as indicated by the database 3380% internal hashed indexing. This is a rough measure for the number 3381% of alternatives we can expect for an rdf_has/3 call using the 3382% given three arguments. When called with three variables, the 3383% total number of triples is returned. This estimate is used in 3384% query optimisation. See also rdf_predicate_property/2 and 3385% rdf_statistics/1 for additional information to help optimizers. 3386 3387%! rdf_debug(+Level) is det. 3388% 3389% Set debugging to Level. Level is an integer 0..9. Default is 3390% 0 no debugging. 3391 3392%! rdf_atom_md5(+Text, +Times, -MD5) is det. 3393% 3394% Computes the MD5 hash from Text, which is an atom, string or list of 3395% character codes. Times is an integer >= 1. When > 0, the MD5 3396% algorithm is repeated Times times on the generated hash. This can be 3397% used for password encryption algorithms to make generate-and-test 3398% loops slow. 3399% 3400% @deprecated Obviously, password hash primitives do not belong in 3401% this library. The library(crypto) from the \const{ssl} package 3402% provides extensive support for hashes. The \const{clib} package 3403% provides library(crypt) to access the OS (Unix) password hash 3404% implementation as well as lightweight implementations of several 3405% popular hashes. 3406 3407 3408 /******************************* 3409 * MESSAGES * 3410 *******************************/ 3411 3412:- multifile 3413 prolog:message//1. 3414 3415prologmessage(rdf(Term)) --> 3416 message(Term). 3417 3418message(loaded(How, What, BaseURI, Triples, Time)) --> 3419 how(How), 3420 source(What), 3421 into(What, BaseURI), 3422 in_time(Triples, Time). 3423message(save_removed_duplicates(N, Subject)) --> 3424 [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ]. 3425message(saved(File, SavedSubjects, SavedTriples)) --> 3426 [ 'Saved ~D triples about ~D subjects into ~p'- 3427 [SavedTriples, SavedSubjects, File] 3428 ]. 3429message(using_namespace(Id, NS)) --> 3430 [ 'Using namespace id ~w for ~w'-[Id, NS] ]. 3431message(inconsistent_cache(DB, Graphs)) --> 3432 [ 'RDF cache file for ~w contains the following graphs'-[DB], nl, 3433 '~t~8|~p'-[Graphs] 3434 ]. 3435message(guess_format(Ext)) --> 3436 [ 'Unknown file-extension: ~w. Assuming RDF/XML'-[Ext] ]. 3437message(meta(not_expanded(G))) --> 3438 [ 'rdf_meta/1: ~p is not expanded'-[G] ]. 3439message(deprecated(rdf_unload(Graph))) --> 3440 [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ]. 3441 3442 3443how(load) --> [ 'Loaded' ]. 3444how(parsed) --> [ 'Parsed' ]. 3445 3446source(SourceURL) --> 3447 { uri_file_name(SourceURL, File), 3448 !, 3449 file_base_name(File, Base) % TBD: relative file? 3450 }, 3451 [ ' "~w"'-[Base] ]. 3452source(SourceURL) --> 3453 [ ' "~w"'-[SourceURL] ]. 3454 3455into(_, _) --> []. % TBD 3456 3457in_time(Triples, ParseTime) --> 3458 [ ' in ~2f sec; ~D triples'-[ParseTime, Triples] 3459 ]