1:- module(sindice,
2 [ sindice_url/3
3 , si_with_graph/4
4 , si_with_result/5
5 , si_facet/2
6 ]).
119:- meta_predicate si_with_graph(+,+,-,0). 120:- meta_predicate si_with_result(+,+,-,-,0). 121:- meta_predicate rdf_call_with_graph(+,+,-,0). 122
123:- rdf_meta rdf_number(r,r,?).
124:- rdf_meta rdf_number(r,r,?,?).
125
126
127:- use_module(library('semweb/rdf_db')). 128:- use_module(library('semweb/rdf_http_plugin')). 129:- use_module(library(aggregate)). 130:- use_module(library(dcg_core)). 131
132
133:- rdf_register_prefix(sindice,'http://sindice.com/vocab/search#'). 134:- rdf_register_prefix(si_field,'http://sindice.com/vocab/fields#'). 135:- rdf_register_prefix(si_search,'http://api.sindice.com/v3/search?').
154sindice_url(Req,Opts,URL) :-
155 phrase( request_params(Req) >> seqmap(option_params,Opts), Params,[]),
156 parse_url(URL,
157 [ protocol(http)
158 , host('api.sindice.com')
159 , path('/v3/search')
160 , search(Params)]).
168si_with_graph(Req,Opts,Graph,Goal) :-
169 sindice_url(Req,Opts,URL),
170 rdf_call_with_graph(URL,[],Graph,Goal).
171
173request_params(keyword(K)) --> {must_be(atomic,K)}, [q=K].
174request_params(keywords(KS)) --> {atomic_list_concat(KS,' ',K)}, [q=K].
175request_params(uri(URI)) --> [q=Query],
176 { URI=_:_ -> rdf_global_id(URI,Query)
177 ; must_be(atomic,URI), Query=URI
178 }.
179
181option_params(fields(Fields)) --> seqmap(field,Fields).
182option_params(sort_by_data(B)) --> [sortbydata(B)].
183option_params(page(P)) --> [page(P)].
184option_params(count(N)) --> [count(N)].
185option_params(from(I,N)) --> [start(I),count(N)].
186
187field(F) --> [field(F)].
188
190sindice_opt(keyword,text,q).
191sindice_opt(ntriple,_,nq).
192sindice_opt(filter,_,fq).
203si_with_result(Req,Opts,I/N,R,Goal) :-
204 205 ( select_option(fields(Fs),Opts,Opts1)
206 -> union([rank],Fs,Fs1),
207 Opts2=[fields(Fs1)|Opts1]
208 ; Opts2=Opts 209 ),
210
211 ( var(I) 212 -> catch( autopaged_result(Req,Opts2,1,I/N,R,Goal),no_more, fail)
213 ; succ(I0,I), 214 si_with_graph(Req,[from(I0,1)|Opts2],G,
215 ( rdf_number(_,sindice:totalResults,N,G),
216 rdf_number(R,sindice:rank,I,G),
217 call(Goal)))
218 ).
219
221autopaged_result(Req,Opts,P,Progress,R,Goal) :-
222 ( si_with_graph(Req,[page(P)|Opts],G,page_result(Progress,R,G,Goal))
223 ; succ(P,P1), autopaged_result(Req,Opts,P1,Progress,R,Goal)
224 ).
225
231page_result(I/N,R,G,Goal) :-
232 rdf_number(_,sindice:totalResults,N,G),
233 aggregate(set(I-R)-max(I),rdf_number(R,sindice:rank,I,G),Results-Last),
234 ( member(I-R,Results), call(Goal)
235 ; Last>=N -> throw(no_more)
236 ).
256si_facet(R, link(L)) :- rdf(R,sindice:link,L).
257si_facet(R, cache(C)) :- rdf(R,sindice:cache,literal(C)).
258si_facet(R, rank(I)) :- rdf_number(R,sindice:rank,I).
259si_facet(R, title(T)) :- rdf(R,dc:title,literal(T)).
260si_facet(R, class(C)) :- rdf(R,si_field:class,literal(C)).
261si_facet(R, predicate(P)) :- rdf(R,si_field:predicate,literal(P)).
262si_facet(R, formats(Fs)) :- setof(F,rdf(R,si_field:format,literal(F)),Fs).
263si_facet(R, explicit_content_size(I)) :- rdf_number(R,sindice:explicit_content_size,I).
264si_facet(R, explicit_content_length(I)) :- rdf_number(R,sindice:explicit_content_length,I).
265
266
268
269rdf_number(S,P,Num) :-
270 ( var(Num)
271 -> rdf(S,P,literal(Atom)), atom_number(Atom,Num)
272 ; atom_number(Atom,Num), rdf(S,P,literal(Atom))
273 ).
274rdf_number(S,P,Num,G) :-
275 ( var(Num)
276 -> rdf(S,P,literal(Atom),G), atom_number(Atom,Num)
277 ; atom_number(Atom,Num), rdf(S,P,literal(Atom),G)
278 ).
279
280rdf_call_with_graph(URL,Opts,Graph,Goal) :-
281 setup_call_cleanup( rdf_load(URL,[graph(Graph)|Opts]), call(Goal),
282 rdf_unload_graph(Graph))
Inteface to Sindice semantic web search engine
This module provides the ability to formulate queries to the Sindice semantic web search engine, and to analyse the results obtained. It is based on an original module by Yves Raimond, but mostly rewritten by Samer Abdallah.
Sindice queries have serveral components:
Other parameters determine what and how much information is returned:
Results
Results are retreived as a named RDF graph. To interpret this, it is necessary to understand the Sindice ontology. The results consist of a set of resources of the class sindice:Result. Each item has the following properties:
As well as information about each item, the results also contain data about the search itself, which is represented as a resource of class sindice:Query, and data about the returned page, represented as a resource of class sindice:Page. The sindice:Query has the following properties
literal(integer)
literal(integer)
The sindice:Page has the following properties:
Running queries
The core predicate for running a Sindice query is si_with_graph/4, which formulates a query from a term of type
si_request
and a list of options, and then loads into the RDF store, temporarily, a named graph containing the results. The last argument to si_with_graph/4 is a goal which is called with the results graph in context. The graph is only available to this goal, and is unloaded after si_with_graph/4 finished. You may use any RDF-related predicates to interrogate the graph.On top of this is built a high-abstraction: si_with_result/5, which hides the details of large, multi-page result sets and calls a supplied goal once (disjunctively) for each result, automatically issuing multiple Sindice requests to iterate through multiple pages. You may interrogate the properties of each result only within the supplied goal. For convenience, the si_facet/2 allows a number of properties to be extracted from the RDF graph with type conversions from RDF literals to Prolog values where appropriate.
Building queries
The three main parts of a Sindice query are represented by a term of type
si_request
, which has several forms. Currently, these areA resource can be an atomic URI or a Prefix:Suffix term as understood by rdf_global_id/2. Eventually, Sindice's full query syntax, including ntriple queries and Boolean operators, will be implemented.
@seealso http://sindice.com/ http://sindice.com/developers/queryLanguage#QueryLanguage
Samer Abdallah, UCL, University of London; Yves Raimond, C4DM, Queen Mary, University of London /