1:- module( pubmed, [ pubmed_cited_by/2, pubmed_cited_by/3,
2 pubmed_cites/2, pubmed_cites/3,
3 pubmed_summary_info/3,
4 pubmed_search/2, pubmed_search/3,
5 pubmed_summary_display/1, pubmed_summary_display/2, pubmed_summary_display/3,
6 pubmed_version/2
7 ]
8 ).
31:- ensure_loaded( library(sgml) ). 32 33% Section: defaults, shortcuts. 34 35eutils( 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/' ). 36 37default_names( Names ) :- 38 Names = ['Author','Title','Source','Pages','PubDate', 39 'Volume','Issue','ISSN','PmcRefCount', 40 'PubType','FullJournalName']. 41 42pubmed_search_defaults( [verbose(false),retmax(100),tmp_keep(false)] ). 43 44pubmed_summary_display_defaults( [display(['Title','Author']),names(Names)] ) :- 45 default_names( Names ). 46 47% Section: interface predicates
53pubmed_version( 0:0:4, date(2013,11,2) ).
pubmed_search( +STerm, -Ids, [] )
.
*/
60pubmed_search( STerm, Ids ) :-
61 pubmed_search( STerm, Ids, [] ).
STerm
.
In this, conjunction is marked by , (comma) and
disjunction by ; (semi-column). '-' pair terms are considered as
Key-Value and interpreted as Value[Key] in the query.
List are thought to be flat conjoint search terms with no pair values in them which are
interpreted by pubmed also as OR operations.
(See example below.)
Known keys are : journal
, pdat
. au
, All Fields
The predicate constructs a query that is posted via the http API provided
by NCBI (http://www.ncbi.nlm.nih.gov/books/NBK25500/).
Options
can be a single term or list of terms from :
retmax(RetMax)
the maximum number of records that will be returned def: 100verbose(Verbose)
if Verbose == true
then the predicate verbose
tmp_file(Tmp)
file to use, or when Tmp
is variable the file that was used
to receive the results from pubmed.tmp_keep(Keep)
keep the file with the xml result iff Keep==true
qtranslation(QTrans)
return in QTrans
the actual query ran on the
the pubmed server.
For instance, taking an example from the url we show how to find
all breast cancer articles that were published in Science in 2008.?- St = (journal=science,[breast,cancer],pdat=2008), pubmed_search( St, Ids, [verbose(true),qtranslation(QTrans)] ), length( Ids, Len ), write( number_of:Len ), nl. http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmax=100&term=science\[journal\]+AND+breast+cancer+AND+2008\[pdat\] process_create(path(curl),[-o,/tmp/pl_13858_1,http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmax=100&term=science\[journal\]+AND+breast+cancer+AND+2008\[pdat\]],[]) % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 3008 0 3008 0 0 3585 0 --:--:-- --:--:-- --:--:-- 4641 tmp_file(/tmp/pl_13858_1) number_of:6 St = (journal=science, [breast, cancer], pdat=2008), Ids = ['19008416', '18927361', '18787170', '18487186', '18239126', '18239125'], QTrans = ['("Science"[Journal] OR "Science (80- )"[Journal] OR "J Zhejiang Univ Sci"[Journal]) AND ("breast neoplasms"[MeSH Terms] OR ("breast"[All Fields] AND "neoplasms"[All Fields]) OR "breast neoplasms"[All Fields] OR ("breast"[All Fields] AND "cancer"[All Fields]) OR "breast cancer"[All Fields]) AND 2008[pdat]'], Len = 6. ?- date(Date), pubmed_search( prolog, Ids ), length( Ids, Len ), write( number_of:Len ), nl. number_of:100 Date = date(2012, 7, 10), Ids = ['22586414', '22462194', '22215819', '21980276', '21499053', '21353661', '20123506', '20123505', '19408879'|...], Len = 100. ?- date(Date), pubmed_search( prolog, Ids, retmax(200) ), length( Ids, Len ), write( number_of:Len ), nl. number_of:120 Date = date(2012, 7, 10), Ids = ['22586414', '22462194', '22215819', '21980276', '21499053', '21353661', '20123506', '20123505', '19408879'|...], Len = 120.
*/
130pubmed_search( Sterm, Ids, OptsIn ) :-
131 to_list( OptsIn, Opts ),
132 pubmed_search_defaults( Defs ),
133 append( Opts, Defs, All ),
134 eutils( Eutils ),
135 ( ground(Sterm) -> true; type_error(ground,Sterm) ),
136 search_term_to_query( Sterm, Query ),
137 memberchk( retmax(Ret), All ),
138 atomic_list_concat( [Eutils,'esearch.fcgi?db=pubmed&retmax=',Ret,'&term=',Query], Url ),
139 memberchk_optional( tmp_file(Tmp), All ),
140 memberchk( verbose(Verb), All ),
141 true_writes( Verb, Url ),
142 get_url_in_tmp( Url, Verb, Tmp ),
143 true_writes( Verb, tmp_file(Tmp) ),
144 load_xml_file( Tmp, Xml ),
145 ( (memberchk(qtranslation(QTrans),All),
146 QT = 'QueryTranslation',
147 search_element_in_list(Xml,QT,[],element(_,_,QTrans))) -> true; true
148 ),
149 all_subs_in_xml_single( Xml, 'IdList', 'Id', NastyIds ),
150 flatten( NastyIds, Ids ),
151 memberchk_optional( tmp_keep(Keep), All),
152 true_atom_keeps_file( Keep, Tmp ).
pubmed_summary_display( Ids, _Summary, [] ).
*/
159pubmed_summary_display( Ids ) :-
160 pubmed_summary_display( Ids, _Summary, [] ).
pubmed_summary_display( Ids, Summary, [] ).
*/
166pubmed_summary_display( Ids, Summary ) :-
167 pubmed_summary_display( Ids, Summary, [] ).
display(Disp)
A list of article information keys that will displayed one on a line for each Id in Ids
.?- date(Date), pubmed_search((programming,'Prolog'), Ids), Ids = [A,B,C|_], pubmed_summary_display( [A,B,C] ). ---- 0:22215819 [Evaluating bacterial gene-finding HMM structures as probabilistic logic programs.] [Mørk S,Holmes I] ---- 1:21980276 [War of ontology worlds: mathematics, computer code, or Esperanto?] [Rzhetsky A,Evans JA] ---- 2:15360781 [Medical expert systems developed in j.MD, a Java based expert system shell: application in clinical laboratories.] [Van Hoof V,Wormek A,Schleutermann S,Schumacher T,Lothaire O,Trendelenburg C] ---- Date = date(2012, 7, 10), Ids = ['22215819', '21980276', '15360781', '11809317', '9783213', '9293715', '9390313', '8996790', '15048396'|...], A = '22215819', B = '21980276', C = '15360781'.
?- pubmed_cited_by( 20195494, These ), pubmed_summary_display( These, _, [display(['Title','Author','PubDate'])] ).
*/
209pubmed_summary_display( Ids, Summary, OptsIn ) :- 210 to_list( OptsIn, Opts ), 211 pubmed_summary_display_defaults( Defs ), 212 append( Opts, Defs, All ), 213 memberchk( display(Disp), All ), 214 pubmed_summary_info( Ids, Summary, Opts ), 215 pubmed_summary_display_info( Summary, Disp ). 216 217pubmed_summary_display_info( Summary, Disp ) :- 218 write( '----' ), nl, 219 nth0( N, Summary, Id-Rec ), 220 write( N:Id ), nl, 221 findall( _, (member(D,Disp),member(D-Val,Rec),write(Val),nl), _ ), 222 write( '----' ), nl, fail. 223pubmed_summary_display_info( _Summary, _Disp ).
pubmed_cited_by( Id, Ids, [] )
.
230pubmed_cited_by( Id, Ids) :-
231 pubmed_cited_by( Id, Ids, [] ).
verbose(Verb)
be verbose?- date(D), pubmed_cited_by( 12075665, By ). D = date(2012, 7, 9), By = ['19497389'].
*/
248pubmed_cited_by( Id, Ids, OptsIn ) :-
249 to_list( OptsIn, Opts ),
250 ( memberchk(verbose(Verb),Opts) -> true; Verb = false ),
251 eutils( Eutils ),
252 Query = 'elink.fcgi?report=xml&mode=text&tool=curl&db=PubMed&cmd=neighbor&linkname=pubmed_pubmed_citedin&id=',
253 atomic_list_concat( [Eutils,Query,Id], Url ),
254 get_url_in_tmp( Url, Verb, Tmp ),
255 load_xml_file( Tmp, Xml ),
256 once( search_element_in_list( Xml, 'LinkSetDb', [], element(_,_,LXml) ) ),
257 findall( CId, search_element_in_list(LXml,'Id',[],element(_,_,[CId])),Ids ),
258 delete_file( Tmp ).
pubmed_cites( Id, Ids, [] )
.
264pubmed_cites( Id, Ids ) :-
265 pubmed_cites( Id, Ids, [] ).
verbose(Verb)
be verbose?- date(D), pubmed_cites( 20195494, Ids ), length( Ids, Len ), write( D:Len ), nl, fail. date(2012,8,15):35
*/
284pubmed_cites( Id, Ids, OptsIn ) :-
285 to_list( OptsIn, Opts ),
286 ( memberchk(verbose(Verb),Opts) -> true; Verb = false ),
287 eutils( Eutils ),
288 % Query = 'elink.fcgi?report=xml&mode=text&tool=curl&db=pmc&DbFrom=pubmed&Cmd=link&linkname=pubmed_pmc_refs&id=',
289 Query = 'elink.fcgi?report=xml&mode=text&tool=curl&db=PubMed&Cmd=neighbor&linkname=pubmed_pubmed_refs&id=',
290 atomic_list_concat( [Eutils,Query,Id], Url ),
291 get_url_in_tmp( Url, Verb, Tmp ),
292 load_xml_file( Tmp, Xml ),
293 once( search_element_in_list( Xml, 'LinkSetDb', [], element(_,_,LXml) ) ),
294 findall( CId, search_element_in_list(LXml,'Id',[],element(_,_,[CId])),Ids ),
295 delete_file( Tmp ).
Results
. Id can also be a list of Ids in which case
the result is a list of Id-Results pairs.
Options is a single term, or list of the following terms:
names(Names)
list of names to be found in the xml file.retmax(Retmax)
the maximum number of records that will be returned def: 100tmp_file(Tmp)
temporary file to be used for saving xml files. If Tmp is a variable, or option is missing, a temporary file is created with tmp_file_stream/3.tmp_keep(Keep)
if true, keep the temporary xml file, otherwise, and by default, delete it.verbose(Verb)
When true
be verbose.?- date(Date), Opts = names(['Author','PmcRefCount','Title']), pubmed_summary_info( 12075665, Results, Opts ), write( date:Date ), nl, member( R, Results ), write( R ), nl, fail. date:date(2012,7,9) Author-[Kemp GJ,Angelopoulos N,Gray PM] Title-[Architecture of a mediator for a bioinformatics database federation.] Source-[IEEE Trans Inf Technol Biomed] Pages-[116-22] PubDate-[2002 Jun] Volume-[6] Issue-[2] ISSN-[1089-7771] PmcRefCount-[1] PubType-[Journal Article] FullJournalName-[IEEE transactions on information technology in biomedicine : a publication of the IEEE Engineering in Medicine and Biology Society] false.
*/
339pubmed_summary_info( Ids, Results, Opts ) :- 340 % fixme use _defaults 341 ( memberchk(names(Names),Opts) -> true; default_names(Names) ), 342 ( memberchk(tmp_file(Tmp),Opts) -> true; true ), 343 ( is_list(Ids) -> 344 findall( Id-Res, (member(Id,Ids),summary_info(Tmp,Id,Names,Res,Opts)), Results ) 345 ; 346 summary_info( Tmp, Ids, Names, Results, Opts ) 347 ). 348 349% Section: non-interface predicates... 350summary_info( Tmp, Id, WhichIn, Results, Opts ) :- 351 to_list( WhichIn, Which ), 352 eutils( Eutils ), 353 ( memberchk(retmax(RMax),Opts) -> true; RMax = 100 ), 354 atomic_list_concat( ['esummary.fcgi?report=xml&mode=text&tool=wget&retmax=',RMax,'&db=PubMed&id='], Query ), 355 atomic_list_concat( [Eutils,Query,Id], Url ), 356 get_url_in_tmp( Url, false, Tmp ), % fixme 357 load_xml_file( Tmp, Xml ), 358 Elem = element(_,_,[Entry]), 359 findall( Name-Info, ( member(Name,Which), 360 findall( Entry, 361 search_element_in_list(Xml, 'Item', ['Name'=Name], Elem ), 362 Info ) 363 ), Results ), 364 ( memberchk(tmp_keep(true),Opts) -> true; delete_file(Tmp) ). 365 366% Section: auxiliaries
372search_term_to_query( (A,B), Query ) :- 373 !, 374 search_term_to_query( A, Aq ), 375 search_term_to_query( B, Bq ), 376 atomic_list_concat( [Aq,'+AND+',Bq], Query ). 377search_term_to_query( (A;B), Query ) :- 378 !, 379 search_term_to_query( A, Aq ), 380 search_term_to_query( B, Bq ), 381 atomic_list_concat( [Aq,'OR',Bq], '+', Query ). 382search_term_to_query( (A=B), Query ) :- 383 !, 384 atomic_list_concat( [B,'\\[',A,'\\]'], Query ). 385search_term_to_query( C, Query ) :- 386 to_list( C, Clist ), 387 atomic_list_concat( Clist, '+', Query ).
394memberchk_optional( Elem, List ) :- 395 memberchk( Elem, List ), 396 !. 397memberchk_optional( _Elem, _List ). 398 399true_atom_keeps_file( Keep, _File ) :- 400 Keep == true, 401 !. 402true_atom_keeps_file( _Keep, File ) :- 403 delete_file( File ). 404 405to_list( Either, List ) :- 406 ( (var(Either);(Either\=[_H|_T],Either\==[]) ) -> 407 List = [Either] 408 ; 409 List = Either 410 ). 411 412true_writes( true, Report ) :- 413 !, 414 write( Report ), nl. 415true_writes( _Opts, _Report ).
421get_url_in_tmp(URL, Verb, File) :- 422 ( var(File) -> 423 tmp_file_stream(text, File, Stream), 424 close(Stream) 425 ; 426 true 427 ), 428 ( Verb==true -> Args = ['-o',File,URL] ; Args = ['-s','-o',File,URL] ), 429 true_writes( Verb, process_create(path(curl),Args,[]) ), 430 process_create( path(curl), Args, [] ), 431 exists_file( File ). 432 433/* 434http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=science[journal]+AND+breast+cancer+AND+2008[pdat] 435 436 */ 437 438all_subs_in_xml_single( Xml, Single, SubSel, Subs ) :- 439 once( search_element_in_list( Xml, Single, [], element(_,_,Nest) ) ), 440 findall( Sub, 441 search_element_in_list(Nest,SubSel,[],element(_,_,Sub)), 442 Subs ).
hostip.pl
.449search_element_in_list([Content|MoreContent], Name, ListAttributes, Element) :- 450 ( search_element(Content, Name, ListAttributes, Element) 451 ; search_element_in_list(MoreContent, Name, ListAttributes, Element) 452 ). 453 454search_element(HTML, Name, ListAttributes, HTML) :- 455 arg(1, HTML, Name), 456 arg(2, HTML, HTML_Attributi), 457 forall(member(Attribute, ListAttributes), 458 memberchk(Attribute, HTML_Attributi)). 459search_element(HTML, Name, ListAttributes, Element) :- 460 arg(3, HTML, Contents), 461 search_element_in_list(Contents, Name, ListAttributes, Element)
connect to publication services at pubmed
A simple library for communicating with pubmed publications. Currently allows
id(s)
It requires the curl executable to be in the path. Only tested on Linux. It is being developed on SWI-Prolog 6.1.8 but should also work on Yap Prolog.