. (utf8) 2:- module( 3 rdf_clean, 4 [ 5 rdf_clean_quad/3, % +BaseIri, +SwiQuad, -Quad 6 rdf_clean_triple/3, % +BaseIri, +SwiTriple, -Triple 7 rdf_clean_tuple/3 % +BaseIri, +SwiTuple, -Tuple 8 ] 9).
15:- use_module(library(semweb/rdf11), []). 16 17:- use_module(library(hash_ext)). 18:- use_module(library(rdf_prefix)). 19:- use_module(library(rdf_term)). 20:- use_module(library(uri_ext)). 21 22:- rdf_meta 23 rdf_clean_quad(+, t, -), rdf_clean_triple(+, t, -), rdf_clean_tuple(+, t, -), rdf_clean_lexical_form(r, +, -).
BNodePrefix must uniquely denote the document scope in which the blank node occurs. For this we use the BaseIri argument.
39rdf_clean_bnode(BaseIri, BNode, Iri) :-
40 % The SWI-Prolog RDF parsers create long blank node labels that do
41 % not conform to serialization grammars (e.g.,
42 % â_:http://www.gutenberg.org/feeds/catalog.rdf.bz2#_:Description2â).
43 % We use MD5 hashes to (1) at least limit the maximum length a blank
44 % node label can have, (2) ensure that the blank node label does not
45 % violate serialization grammars, while (3) retaining the feature
46 % that the same blank node in the source document receives the same
47 % Skolemized well-known IRI.
48 md5(BaseIri-BNode, Hash),
49 well_known_iri([Hash], Iri).
56rdf_clean_graph(G1, G3) :-
57 rdf11:post_graph(G2, G1),
58 ( G2 == user
59 -> rdf11:rdf_default_graph(G3)
60 ; rdf11:rdf_default_graph(G2)
61 -> G3 = G2
62 ; rdf_clean_iri(G2, G3)
63 ).
rdf_base_iri(BaseIri), uri_resolve(Iri1, BaseIri, Iri2).
82rdf_clean_iri(Iri, Iri) :-
83 atom(Iri).
89% language-tagged string 90rdf_clean_lexical_form(rdf:langString, Lex, _) :- !, 91 throw(error(rdf_error(missing_language_tag,Lex),rdf_clean_lexical_form/3)). 92% typed literal 93rdf_clean_lexical_form(D, Lex1, Lex2) :- 94 rdf_lexical_value(D, Lex1, Value), 95 rdf_lexical_value(D, Lex2, Value), 96 % Emit a warning if the lexical form is not canonical. 97 ( Lex1 \== Lex2 98 -> print_message( 99 warning, 100 error( 101 rdf_error(non_canonical_lexical_form,D,Lex1,Lex2), 102 rdf_clean_lexical_form/3 103 ) 104 ) 105 ; true 106 ).
112% language-tagged string (rdf:langString) 113rdf_clean_literal(literal(lang(LTag1,Lex)), literal(lang(LTag2,Lex))) :- !, 114 downcase_atom(LTag1, LTag2), 115 % Emit a warning if the language tag is not canonical. 116 ( LTag1 \== LTag2 117 -> print_message( 118 warning, 119 error( 120 rdf_error(non_canonical_language_tag,LTag1), 121 rdf_clean_literal/2 122 ) 123 ) 124 ; true 125 ). 126% typed literal 127rdf_clean_literal(literal(type(D1,Lex1)), literal(type(D2,Lex2))) :- !, 128 rdf_clean_iri(D1, D2), 129 rdf_clean_lexical_form(D2, Lex1, Lex2). 130% simple literal (RDF 1.0): quickly clean this to a typed literal (RDF 1.1). 131rdf_clean_literal(literal(Lex), literal(type(D,Lex))) :- 132 rdf_equal(xsd:string, D).
138rdf_clean_node(BaseIri, Node1, Node2) :- 139 rdf_clean_nonliteral(BaseIri, Node1, Node2), !. 140rdf_clean_node(_, Literal1, Literal2) :- 141 rdf_clean_literal(Literal1, Literal2).
149% blank node 150rdf_clean_nonliteral(BaseIri, BNode, Iri) :- 151 rdf_is_bnode(BNode), !, 152 rdf_clean_bnode(BaseIri, BNode, Iri). 153% IRI 154rdf_clean_nonliteral(_, Iri1, Iri2) :- 155 rdf_is_iri(Iri1), !, 156 rdf_clean_iri(Iri1, Iri2).
162rdf_clean_quad(BaseIri, rdf(S1,P1,O1,G1), tp(S2,P2,O2,G2)) :-
163 rdf_clean_triple(BaseIri, rdf(S1,P1,O1), tp(S2,P2,O2)),
164 rdf_clean_graph(G1, G2).
170rdf_clean_triple(BaseIri, rdf(S1,P1,O1), tp(S2,P2,O2)) :-
171 rdf_clean_nonliteral(BaseIri, S1, S2),
172 rdf_clean_iri(P1, P2),
173 rdf_clean_node(BaseIri, O1, O2).
179% triple 180rdf_clean_tuple(BaseIri, rdf(S,P,O), Triple) :- !, 181 rdf_clean_triple(BaseIri, rdf(S,P,O), Triple). 182% quadruple 183rdf_clean_tuple(BaseIri, Quad, CleanQuad) :- 184 rdf_clean_quad(BaseIri, Quad, CleanQuad)
RDF cleaning
*/