1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    2%    Authors:       Nicos Angelopoulos
    3%    E-mail:        Nicos Angelopoulos http://stoics.org.uk/~nicos/sware/contact.html
    4%    Copyright (C): Nicos Angelopoulos, 2015-2023
    5%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    6/*
    7   This program is free software; you can redistribute it and/or
    8    modify it under the terms of the MIT license
    9
   10    This program is distributed in the hope that it will be useful,
   11    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   13
   14*/
   15:- module( bio_db, [
   16                % This interface has now being split according to 
   17                % biological organisms, see files in cell/
   18                % 1. housekeeping:
   19                % bio_db/0,
   20                bio_db_close/1,
   21                bio_db_db_predicate/1,
   22                bio_db_data_predicate/4,
   23                bio_db_info/2,
   24                bio_db_info/3,
   25                bio_db_info/4,
   26                bio_db_interface/1,
   27                bio_db_interface/2,
   28                bio_db_install/2, bio_db_install/3,
   29                bio_db_organism/1, bio_db_organism/2, bio_db_organism/3,
   30                bio_db_organism_alias/2,
   31                bio_db_paths/0,
   32                bio_db_source/2,
   33                bio_db_version/2,
   34                bio_db_citation/2,
   35                bio_db_close_connections/0,
   36                % 2 derived
   37                % A.symbols
   38                is_symbol/2,
   39                ncbi_symb/3,
   40                % B. gene ontology
   41                go_id/2,          % +/-Go, -/+Int
   42                go_id/3,          % +GoOrInt, -Go, -Int
   43                % C. string edges
   44                org_edge_strg_symb/4  % ?Org, ?Symb1, ?Symb2, -W
   45             ] ).   46
   47:- dynamic( bio_db_handle/5 ).   48
   49:- dynamic( '$bio_db_handle'/2 ). % this is needed for the asserted server preds 
   50
   51
   52% auto-load (& other system) libraries
   53:- use_module(library(lists)).   54:- use_module(library(apply)).   55:- use_module(library(debug)).     % /1,3.
   56:- use_module(library(archive)).   % archive_extract/3.
   57:- use_module(library(filesex)).   58
   59:- use_module(library(lib)).   60
   61:- ensure_loaded('../src/bio_db_data_predicate').   62
   63:- lib(source(bio_db), homonyms(true)).   64
   65:- lib(stoics_lib:date_two_digit_dotted/1).   66:- lib(go_id/2).   67:- lib(is_symbol/2).   68:- lib(ncbi_symb/3).   69:- lib(org_edge_strg_symb/4).   70
   71:- lib(end(bio_db)).   72
   73% :- initialization( lib(& bio_db, load_main(false)), after_load ).
   74:- initialization( lib(@(bio_db)), after_load ).
 bio_db_organism(?Org)
Colloquial name for organisms supported by bio_db.

Human is considered the default organism and returned first.

?- bio_db_organism(Org).
Org = human ;
Org = chicken ;
Org = mouse ;
Org = multi ;
Org = pig.
author
- nicos angelopoulos
version
- 0:2 2019/4/8
- 0:3 2022/12/29, changed to colloquials and added chicken, were hs and mouse.
- 0:4 2023/6/3, added pig

*/

   98bio_db_organism(human).      % defaulty
   99bio_db_organism(chicken).    % 2022/12/21
  100bio_db_organism(mouse).
  101bio_db_organism(multi).      % 2023/9/15
  102bio_db_organism(pig).        % 2023/6/2
 bio_db_organism(?KnownAs, ?Canon)
 bio_db_organism(?Known, ?Token, ?Canon)
Canon is the canonical, colloquial, representation of Known which is either a known bio_db_organism/1, an alias to one or a organism token. Token is the token used in bio_db predicate, file and directory names for this organism.
?- bio_db_organism(Known,Org), write(Known:Org), nl, fail.
hs:human
gallus:chicken
gallus_gallus:chicken
gg6a:chicken
human:human
chicken:chicken
mouse:mouse
galg:chicken
homs:human
musm:mouse
suss:pig
mult:multi

?- bio_db_organism(human, Org).
Org = hs.

?- bio_db_organism(KnownAs, hs).
KnownAs = human ;
KnownAs = hs ;
false.
author
- nicos angelopoulos
version
- 0.2 2019/5/2
- 0.3 2022/12/25, added /3 version, and added many aliases

*/

  140bio_db_organism( Alias, Org ) :-
  141     bio_db_organism( Alias, _Token, Org ).
  142
  143bio_db_organism( Alias, Token, Org ) :-
  144    ( ground(Alias) -> Backtrack = false; Backtrack = true ),
  145    bio_db_organism_alias( Alias, Org ),
  146    ( Backtrack == false -> !; true ),
  147    bio_db_organism_token( Org, Token ).
  148bio_db_organism( Org, Token, Canon ) :-
  149    ( ground(Org) -> Backtrack = false; Backtrack = true ),
  150    bio_db_organism( Org ),
  151    bio_db_organism_token( Org, Token ),
  152    ( Backtrack == false -> !; true ),
  153    Canon = Org.
  154bio_db_organism( TokenIs, Token, Canon ) :-
  155    ( ground(TokenIs) -> Backtrack = false; Backtrack = true ),
  156    bio_db_organism_token( Canon, TokenIs ),
  157    ( Backtrack == false -> !; true ),
  158    Token = TokenIs.
  159
  160bio_db_organism_known( A, T, O ) :-
  161     bio_db_organism( A, T, O ),
  162     !.
  163bio_db_organism_known( A, T, O ) :-
  164     throw( un_known(bio_db_organism(A,T,O)) ).
  165     
  166
  167bio_db_organism_token(chicken, galg).
  168bio_db_organism_token(  human, homs).
  169bio_db_organism_token(  mouse, musm).
  170bio_db_organism_token(  multi, mult).
  171bio_db_organism_token(    pig, suss).
 bio_db_organism_alias(?Alias, -Org)
Alias is a known and supported alternative name for the canonical Org name for an organism.
?- bio_db_organism_alias( human, hs ).
true.

Note this used to be bio_db_organism/2 which has now (19.05.02) changed.

author
- nicos angelopoulos
version
- 0:1 2019/5/2
- 0:2 2022/12/20, gallus also known as chicken and gallus_gallus */
  189bio_db_organism_alias( hs, human ).
  190bio_db_organism_alias( gallus, chicken ).
  191bio_db_organism_alias( gallus_gallus, chicken ).
  192bio_db_organism_alias( gg6a, chicken ).
  193
  194% this search path can be added to requires
  195% bio_db_map/2,
  196% ncbi_homs_ensp_unip/2,
  197% ncbi_homs_ensp_ensg/2,
  198
  199/* was:
  200bio_db_interface_atom( prolog ).
  201bio_db_interface_atom( prosqlite ).
  202bio_db_interface_atom( berkeley ).
  203*/
  204bio_db_interface_atom( Iface ) :-
  205    bio_db_interface_extensions( Iface, _ ).
  206
  207bio_db_interface_initialisation( null ). % so it exists, fixme: should nt this be prolog ?
  208bio_db_interface_initialisation( prosqlite ) :-
  209    use_module( library(prosqlite) ).
  210bio_db_interface_initialisation( berkeley ) :-
  211    use_module( library(bdb) ).
  212bio_db_interface_initialisation( rocks ) :-
  213    use_module( library(rocksdb) ).
  214
  215bio_db_default_interface( prolog ).
  216
  217:- Opts = [access(read_write),type(atom),keep(true)],
  218   bio_db_default_interface( Def ),
  219   create_prolog_flag( bio_db_interface, Def, Opts ).  220
  221:- Opts = [access(read_write),type(atom),keep(true)],
  222   create_prolog_flag( bio_db_pl_from_zip, user, Opts ).  % true/false/user
  223
  224:- Opts = [access(read_write),type(atom),keep(true)],
  225   create_prolog_flag( bio_db_del_zip, user, Opts ).  % true/false/user, only asked for pl files
  226
  227:- use_module( library(lib) ).  228:- lib( source(bio_db), homonyms(true) ).  229
  230:- lib(options).  231:- lib(pack_errors).  232
  233:- lib(stoics_lib:at_con/3).  234:- lib(stoics_lib:portray_clauses/2).  235:- lib(stoics_lib:url_file/2).  236:- lib(stoics_lib:message_report/3).  237
  238:- lib(ui_yes_no/5).  239:- lib(bio_db_map/2).  240:- ensure_loaded( '../auxil/build_repo/lib/bio_db_pl_info' ).   % /2.
  241:- lib( end(bio_db) ).  242
  243stoics( 'http://stoics.org.uk/~nicos/sware/packs/bio_db_repo/data' ).

Access, use and manage big, biological datasets.

Bio_db gives access to pre-packed biological databases and simplifies management and translation of biological data to Prolog friendly formats.

There are currently 2 major types of data supported: maps, and graphs. Maps define product mappings, translations and memberships, while graphs define interactions which can be visualised as weighed graphs (see bio_db_data_predicate/4 for a full list of statically generated list of bio_db data predicates).

There are 2 prolog flags (see current_prolog_flag/2) that can control the behaviour of the library: bio_db_qcompile (def: true) and bio_db_interface (def: prolog). When the first one is set to false, it can disable the compilation to

Bio_db itself does include any of the datasets. You can either download the separate pack(bio_db_repo) which contains all of the Prolog datasets. bio_db_repo will install all the Prolog database files. The single tar and gzipped file is 246 Mb in size and the fully expanded version of a Prolog installation can take up to 3.1Gb. The precise size depends on how many tables are accessed at least once (each producing an expanded .pl and a .qlf file). As of version 4.0 there are 91 associated data predicates serving 55444729 records. This pack can be installed as per usual via

?- pack(bio_db_repo).

If you do not install all datasets, each data table will be auto-downloaded the first time you try to access some of its data. Auto-downloading works transparently to the user, where a data set is downloaded by simply calling the predicate.

For example

?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
% prolog DB:table hgnc:hgnc_homs_symb_hgnc/2 is not installed, do you want to download (Y/n) ?
% Trying to get: url_file(http://www.stoics.org.uk/bio_db_repo/data/maps/hgnc/hgnc_homs_symb_hgnc.pl,/usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_symb_hgnc.pl)
% Loading prolog db: /usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_symb_hgnc.pl
Hgnc = 19295.

?- bio_db_interface( prosqlite ).
% Setting bio_db_interface prolog_flag, to: prosqlite
true.

?- hgnc_homs_prev_symb( Prv, Symb ).
% prosqlite DB:table hgnc:hgnc_homs_prev_symb/2 is not installed, do you want to download (Y/n) ?
% Trying to get: url_file(http://www.stoics.org.uk/bio_db_repo/data/maps/hgnc/hgnc_homs_prev_symb.sqlite,/usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_prev_symb.sqlite)
false.

?- hgnc_homs_prev_symb( Prv, Symb ).
% prosqlite DB:table hgnc:hgnc_homs_prev_symb/2 is not installed, do you want to download (Y/n) ?
% Trying to get: url_file(http://www.stoics.org.uk/bio_db_repo/data/maps/hgnc/hgnc_homs_prev_symb.sqlite,/usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_prev_symb.sqlite)
% Loading prosqlite db: /usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_prev_symb.sqlite
Prv = 'A1BG-AS',
Symb = 'A1BG-AS1' .

See bio_db_data_predicate/4 for a way to enumerate all data predicates. The source of which is in src/bio_db_data_predicate.pl which also includes in the comments the cell structure.

As of version 2.0 bio_db is formed of a number of hierarchically organised cells that can be loaded independently. This is because there now too many predicates and is also a devise for better supporting organism specific data. There are currently two main cells, hs (human) and mouse. Each sub-celled by data source of origin.

?- use_module(library(bio_db)).

Loads the whole interface (all cells), without the user needing to be aware of anything. The only difference is that the user will not be able to see all the module predicates at the first line of file pack(bio_db/prolog/bio_db.pl)).

?- lib(bio_db).

Also loads everything.

?- lib(& bio_db).

Loads the skeleton of the module (cells usually laod the module dependencies like this).

?- lib(& bio_db(hs)).

Loads hs cell (and skeleton). hs comprises of a number of sub-cells.

?- lib(& bio_db(hs(hgnc))).

Loads the hs/hgnc primary cell (and the skeleton).

In both the above loads, the following becomes available, however, the former load also loads additional predicates for human, but non hgnc based.

?- hgnc_homs_hgnc_symb( Hgnc, 'LMTK3' ).
Hgnc = 19295.

The following

?- use_module( pack('bio_db/cell/hs/hgnc') ).

also loads just the HGNC part of the human section of bio_db, but it is not a recommended way to do so.

Organisms

galg
Gallus gallus (red junglefowl), colloquial: chicken
homs
Homo sapiens, colloquial: human
mult
covers multiple organisms, longer form: multi
musm
Mus musculus, colloquial: mouse
suss
Sus scrofa (wild boar or Eurasian boar) colloquial: pig

Databases

Ensembl=ense
Homo sapiens genes and proteins. Genes and trascripts mappings along with mapping to genomic location (latter not included in release yet)
HGNC=hgnc
Hugo Gene Nomenclature Committee, http://www.genenames.org/
NCBI=ncbi
NCBI
Uniprot=unip
Protein database.
String=strg
Protein-Protein interactions data base
MGI=mgim
Mouse Genome Informatics, mouse specific datasets (last M for marker, their identifier)

For each database, a relation token with the same name, maps the field is the unique identifier of that database.

Other relation tokens

symb
HGNC gene symbol (short, unique name for genes)
name
(HGNC) gene name (long, less standarised version of gene name)
prev
HGNC previous gene symbol
syno
HGNC gene symbol synonym
ensg
ensembl gene
enst
ensembl transcript
ensp
ensembl protein
gonm
GO name of a term
pros
Prosite protein family information
rnuc
RNA nucleic sequence ID to HGNC symbol.
unig
uniprotein gene id
sprt
Swiss-Prot part of Uniprot (high quality, curated)
trem
TrEMBL part of Uniprot (non curated)
mgim
MGI Marker (identifier for Mouse Genome Informatics Markers)
cgnc
Chicken gene nomenclature committee
taxo
taxonomy id (NCBI)
scnm
scientific names for species (NCBI)
gbnm
genbank common name (NCBI)

The name convention for map predicates is

   ?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
   Hgnc = 1,
   Symb = 'A12M1~withdrawn' ;
   Hgnc = 2,
   Symb = 'A12M2~withdrawn' .

   ?- hgnc_homs_hgnc_symb( 19295, Symb ).
   Symb = 'LMTK3'.

   ?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
  Hgnc = 19295.

Where the first hgnc corresponds to the source database, the second token, homs, identifies the organism, the third and fourth tokens are the fields of the map. Above, the second hgnc

The last part of the predicate name corresponds to the second (or all other) argument(s), which here is the unique Symbol assigned to a gene by HGNC. In the current version of bio_db, all tokens in map filenames are 4 characters long. Map data for predicate Pname from database DB are looked for in DB(Pname.Ext) (see bio_db_paths/0). Extension, Ext, depends on the current bio_db database interface (see bio_db_interface/1), and it is sqlite if the interface is prosqlite and pl otherwise.

The name convention for graphs is

  ?- strg_homs_edge_symb( Symb1, Symb2, W ).
  S1 = 'A1BG',
  S2 = 'ABAT',
  W = 360 ;
  S1 = 'A1BG',
  S2 = 'ABCC6',
  W = 158 .

The first part indicates the database and the second one the organism/species. Graph data for predicate Pname from database DB are looked for in bio_db_data(graphs/DB/Pname.Ext) (see bio_db_paths/1). Extension, Ext, depends on the current bio_db database interface (see bio_db_interface/1), and it is sqlite if the interface is prosqlite and pl otherwise.

Bio_db supports four db interfaces: prolog, prosqlite, berkeley and rocks. The first one is via Prolog fact bases, which is the default. The second is an interface to SQLite via pack(prosqlite) while the third and fourth work with the SWI-Prolog packs bdb and rocksdb. The underlying mechanisms are entirely transparent to the user. In order to use the sqlite data sources pack(prosqlite) needs to be installed via the pack manager

 ?- pack_install( prosqlite ).

The user can control which interface is in use with the bio_db_interface/1 predicate.

 ?- bio_db_interface( Curr ).
 Curr = prolog.

 ?- bio_db_interface( prosqlite ).

 ?- bio_db_interface( Curr ).
 Curr = prosqlite.

The type of the interface of a bio_db data predicate is determined by the interface at the time of first call.

Once the user has initiated the serving of a predicate via calling a goal to it, it is then possible to have access to information about the dataset such as download date and sourle url.

?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
Hgnc = 1,
Symb = 'A12M1~withdrawn' .

?- bio_db_info( hgnc_homs_hgnc_symb/2, Key, Value ), write( Key-Value ), nl, fail.
interface-prolog
source_url-ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz
datetime-datetime(2018,11,27,12,32,11)
data_types-data_types(integer,atom)
unique_lengths-unique_lengths(46023,46023,46023)
relation_type-relation_type(1,1)
header-row(HGNC ID,Approved Symbol)
false

As of version 2.0 there are two flags that can automate some of the interactions.

:- set_prolog_flag(bio_db_pl_from_zip, user).
:- set_prolog_flag(bio_db_del_zip, user).

In both cases the recognised values for the flags are: [user,true,false]. User is for prompting the user and true is progressing with an implicit yes answer. The first flag automates conversion from .pl.zip to .pl (which will be the case for the first time you access any dataset if you have installed bio_db_repo), and the second controls the deletion of the zip file once the .pl file has been created.

As of version 4.0 there are 91 associated data predicates serving 55444729 records.

Thanks to Jan Wielemaker for a retractall fix and for code for fast loading of precompiled fact bases (and indeed for the changes in SWI that made this possible).

author
- nicos angelopoulos
version
- 0.5 2016/09/11
- 0.7 2016/10/21, experimenting with distros in github
- 0.9 2017/03/10, small changes for pack(requires) -> pack(lib) v1.1
- 1.0 2017/10/09, to coincide with ppdp paper presentation
- 2.1 2018/11/27, introduces cells and mouse data (and fixed dependency of 2.0)
- 2.4 2019/04/02, test: bio_db_stats, new mouse db predicates, iface: bio_db_data_predicate/4
- 2.5 2019/04/22, edge_strg_symb/4; bio_db_organism/1,2; go_id/2,3
- 2.6 2019/05/08, changed to organism alias interface; evidence in gont maps
- 2.7 2019/05/12, edge_strg_symb/4 -> org_edge_strg_symb/4
- 3.0 2019/05/15, paper submission
- 3.1 2020/03/09, fixed lib; no unigene
- 3.2 2020/09/18, include mouse ense + fixes/updates on building scripts
- 3:4 2021/05/10, removed edge_gont_includes/2 (reciprocal of is_a), and edge_gont_consists_of/2 (reciprocal of part_of/2)
- 3:6 2021/12/04, fixed pack_errors and map_ense_mouse_enst_chrl/5; bio_db_stats.pl version 0.2
- 4:1 2022/12/29, huge re-config of data predicate names + reac-tome (maps) + chicken
- 4:2 2023/06/06, support for pig
- 4:3 2023/10/05, mult for multi organisms; vgnc database; ncbi taxonomy db; build-reorganisation
See also
- doc/Releases.txt for version details
- bio_db_data_predicate/4 for a way to enumerate all data predicates
- cell/ for the definitions of the data predicates

*/

 bio_db_paths
Initialisation call- setting up path aliases.

There are two main directory repositories the predicate deals with: (a) the bio_db installed databases root (alias bio_db_data), and (b) the root of downloaded databases (alias bio_db_downloads). Optionally a top directory of which both (a) and (b) are subdirs can be defined (alias bio_db). The default value for alias bio_db is a made-up pack directory pack(bio_db_repo). The default for bio_db_data is sub directory data of alias bio_db, while bio_db_downloads defaults to sub directory downloads of the alias bio_db. The canonical subdirectory name for (a) is data and for (b) is downloads.

pack(bio_db_repo) can also be installed as a complete package from SWI's manager.

?- pack_install( bio_db_repo ).

This will install all the Prolog database files. The single tar and gzipped file is 246 Mb in size and the fully expanded version of a Prolog installation can take up to 3.1Gb. The precise size depends on how many tables are accessed at least once (each producing an expanded .pl and a .qlf file).

Directory locations for (a) and (b) above can be given as either prolog flags with key bio_db_root and bio_dn_root respectively or via environment variables BioDbRoot and BioDnRoot.

Installed root alias(bio_db_data) contains sub-dirs

graphs
for graphs; string and reactome
maps
for all the supported maps

The above are mapped to aliases bio_graphs and bio_maps respectively. Within each of these sub-directories there is further structure based on the database the set was originated.

Downloaded root alias(bio_db_downloads) may contain sub-dirs

hgnc
data from HGNC database
ncbi
data from NCBI database
reactome
data from Reactome database
string
data from string database
uniprot
protein data from EBI
ense
ensembl database

Alias bio_db_downloads is only useful if you are downloading data files directly from the supported databases.

See

?- absolute_file_name( packs(bio_db(auxil)), Auxil ), ls( Auxil ).

for examples of how these can be used.

For most users these aliases are not needed as the library manages them automatically.

To be done
- transfer datasets and downloads to new pack location when running on newly installed SWI version upgrade.

*/

  643bio_db_paths :-
  644    bio_db_paths_root,
  645    bio_db_paths_installed,
  646    bio_db_paths_installed_sub,
  647    bio_db_paths_downloaded.
  648
  649bio_db_paths_root :-
  650    bio_db_setting( bio_db_root, Root ),
  651    !,
  652    bio_db_path_new( bio_db, Root ).
  653bio_db_paths_root.
  654
  655bio_db_paths_installed :-
  656    bio_db_setting( bio_db_data_root, DbRoot ),
  657    !,
  658    bio_db_path_new( bio_db_data, DbRoot ).
  659bio_db_paths_installed :-
  660    user:file_search_path( bio_db, BioDb ),
  661    os_path_1( BioDb, data, BioDbData ),
  662    % exists_directory( DbRoot ),
  663    !,
  664    bio_db_path_new( bio_db_data, BioDbData ).
  665bio_db_paths_installed :-
  666    throw( missing_setting(bio_db_data_root) ).
  667
  668bio_db_paths_downloaded :-
  669    bio_db_setting( bio_db_downloads_root, DnRoot ),
  670    !,
  671    bio_db_path_new( bio_db_downloads, DnRoot ).
  672bio_db_paths_downloaded :-
  673    user:file_search_path( bio_db_downloads_root, BioRoot ),
  674    os_path_1( BioRoot, downloads, DnRoot ),
  675    exists_directory( DnRoot ),
  676    !,
  677    bio_db_path_new( bio_db_downloads, DnRoot ).
  678
  679bio_db_paths_installed_sub :-
  680    user:file_search_path( bio_db_data, DbRoot ),
  681    findall( Sub, bio_db_sub(Sub), Subs ),
  682    maplist( bio_db_paths_installed_sub(DbRoot), Subs ),
  683    !.
  684
  685bio_db_paths_installed_sub( DbRoot, Sub ) :-
  686    os_path_1( DbRoot, Sub, AbsSub ),
  687    % exists_directory( Abs ),
  688    % directory_files( Abs, DbSubs ), % os_dirs
  689    ( atom_concat(SubSingular,'s',Sub) -> true; SubSingular = Sub ),
  690    atom_concat( bio_, SubSingular, BioDbSub ),
  691    bio_db_path_new( BioDbSub, AbsSub ),
  692    % os_path_1( AbsSub, Db, AbsDb )
  693    % bio_db_path_new( Db, AbsDb ),
  694    % bio_db_source( Sub, Db ),
  695    findall( DbSub, bio_db_source(Sub,DbSub), DbSubs ),
  696    maplist( bio_db_paths_installed_sub_dbs(AbsSub,Sub), DbSubs ),
  697    !.
  698bio_db_paths_installed_sub( _DbRoot, _Sub ).
  699
  700bio_db_paths_installed_sub_dbs( Abs, Sub, Db ) :-
  701    bio_db_source( Sub, Db ),
  702    os_path_1( Abs, Db, Full ),
  703    bio_db_path_new( Db, Full ).
  704% bio_db_paths_installed_sub_dbs( _Abs, _Sub ).
  705
  706bio_db_setting( PlSet, Value ) :-
  707    current_prolog_flag( PlSet, Value ),
  708    debug( bio_db, 'bio_db setting via flag: ~w, set to: ~w', [PlSet,Value] ),
  709    !.
  710bio_db_setting( PlSet, Value ) :-
  711    atomic_list_concat( Parts, '_', PlSet ),
  712    maplist( upcase_first, Parts, Arts ),
  713    atomic_list_concat( Arts, EnvVar ),
  714    getenv( EnvVar, Value ),
  715    debug( bio_db, 'bio_db setting via env: ~w, setting: ~w, set to: ~w', [EnvVar,PlSet,Value] ),
  716    !.
  717bio_db_setting( PlSet, Value ) :-
  718    bio_db_setting_default( PlSet, Value ).
  719
  720bio_db_path_new( Alias, New ) :-
  721    user:file_search_path( Alias, Old ),
  722    bio_db_path_new_exists( Alias, Old, New ),
  723    !.
  724bio_db_path_new( Alias, Path ) :-
  725    debug( bio_db, 'Asserting search alias: ~w, to ~p', [Alias,Path] ),
  726    assert( user:file_search_path(Alias,Path) ).
  727
  728bio_db_path_new_exists( _Alias, Old, Old ) :-
  729    !.
  730bio_db_path_new_exists( Alias, Old, New ) :-
  731    throw( fixme(alias_exists(Alias,Old,New)) ).
  732
  733bio_db_path_exists( Alias ) :- % fixme: is this called from anywhere ?
  734    throw( fixme(bio_db_paths_installed/0,search_path_exists(Alias)) ).
  735
  736upcase_first( Atom, Upped ) :-
  737    sub_atom( Atom, 0, 1, _, Flw ), 
  738    upcase_atom( Flw, Fup ),
  739    atom_length( Atom, Len ),
  740    Ken is Len - 1,
  741    sub_atom( atom, 1, Ken, 0, Tail ),
  742    atom_concat( Fup, Tail, Upped ).
 bio_db_version(-Vers, -Date)
Version Mj:Mn:Fx, and release date date(Y,M,D).
?- bio_db_version( V, D ).
V = 4:3:0,
D = date(2023, 10, 5).
author
- Nicos Angelopoulos
version
- 4:3 2023/10/5, imported first vgnc predicates, multi-organism preds support, & restructure of build preds
See also
- bio_db_data_predicate/4 (which should be generated for each new version)
- doc/Releases.txt for more detail on change log
- module documentation for brief comments on versioning history of this pack

*/

  761bio_db_version( 4:3:0, date(2023,10,5) ).   
 bio_db_citation(-Atom, -Bibterm)
This predicate succeeds once for each publication related to this library. Atom is the atom representation suitable for printing while Bibterm is a bibtex(Type,Key,Pairs) term of the same publication. Produces all related publications on backtracking.
  771bio_db_citation( Atom, bibtex(Type,Key,Pairs) ) :-
  772  Atom = 'Accessing biological data as Prolog facts.\nNicos Angelopoulos and Jan Wielemaker. In Proceedings of 19th International Symposium on Principles and Practice of Declarative Programming, Namur, Belgium, October, 2017 (PPDP\'17), 10 pages.',
  773  Type = inproceedings,
  774  Key  = 'AngelopoulosN_GiamasG_2015',
  775  Pairs = [
  776               title  = 'Accessing biological data as Prolog facts',
  777               author = 'Nicos Angelopoulos and Jan Wielemaker',
  778               booktitle= '19th International Symposium on Principles and Practice of Declarative Programming',
  779               year = 2017,
  780               month = 'October',
  781               address= 'Namur, Belgium'
  782               % url     = 'http://ceur-ws.org/Vol-1433/tc_74.pdf'
  783     ].
  784
  785bio_db_citation( Atom, bibtex(Type,Key,Pairs) ) :-
  786  Atom = 'A logical approach to working with biological databases.\nNicos Angelopoulos and Georgios Giamas.\nProceedings of the 31st International Conference on Logic Programming (ICLP 2015) Accepted as a technical communication.\nCork, Ireland. September 2015.',
  787  Type = inproceedings,
  788  Key  = 'AngelopoulosN_GiamasG_2015',
  789  Pairs = [
  790               author = 'Nicos Angelopoulos and Georgios Giamas',
  791               title  = 'A logical approach to working with biological databases',
  792               booktitle= 'Technical Communication in Proceedings of the 31st International Conference on Logic Programming (ICLP 2015)',
  793               year = 2015,
  794               month = 'September',
  795            address= 'Cork, Ireland',
  796               url     = 'http://ceur-ws.org/Vol-1433/tc_74.pdf'
  797     ].
  798
  799bio_db_citation( Atom, bibtex(Type,Key,Pairs) ) :-
  800    Atom = 'Working with biological databases.\nNicos Angelopoulos and Georgios Giamas.\n1th Workshop on Constraint Based Methods for Bioinformatics (2015)\nCork, Ireland. September 2015',
  801    Type    = inproceedings,
  802    Key  = 'AngelopoulosN_GiamasG_2015a',
  803    Pairs = [
  804               author = 'Nicos Angelopoulos and Georgios Giamas',
  805            title  = 'Working with biological databases',
  806            booktitle = '11th Workshop on Constraint Based Methods for Bioinformatics (2015)',
  807            year = 2015,
  808            month = 'September',
  809            address = 'Cork, Ireland',
  810            url = 'http://clp.dimi.uniud.it/wp/wp-content/uploads/2015/08/WCB_2015_paper_1.pdf'
  811    ].
 bio_db_source(?Type, ?Db)
True if Db is a source database for bio_db serving predicate of type Type. Type is either maps or graphs.

The databases are

To be done
- fixme: this gets out of synch for new dbs, maybe have it in other location or throw a message if it fails ? */
  827bio_db_source(maps, hgnc).
  828bio_db_source(maps, gont).
  829bio_db_source(maps, ncbi).
  830bio_db_source(maps, unip).
  831bio_db_source(maps, vgnc).
  832bio_db_source(graphs, string ).
  833% bio_db_source( graphs, gont ). % these are used for aliases, so gont already exists
  834bio_db_source( graphs, reactome ).
  835
  836bio_db_sub(graphs).
  837bio_db_sub(maps).
  838
  839bio_db_setting_default( 'bio_db_root', BioDbRoot ) :-
  840    absolute_file_name( pack(bio_db), BioDb ),
  841    directory_file_path( Dir, bio_db, BioDb ),
  842    directory_file_path( Dir, bio_db_repo, BioDbRoot ).
  843bio_db_setting_default( 'bio_db_data_root', BioDbData ) :-
  844    absolute_file_name( bio_db(data), BioDbData ).
  845bio_db_setting_default( 'bio_db_downloads_root', BioDbDnloads ) :-
  846    absolute_file_name( bio_db(downloads), BioDbDnloads ).
 bio_db_interface(?Iface, -Status)
Interrogate the installation status (true or false) of bio_db's known interfaces. true if the interface dependencies are installed and the interface can be used, and =|false=| otherwise.

Can be used to enumerate all known or installed interfaces.

 ?- findall( Iface, bio_db_interface(Iface,_), Ifaces ).
 Ifaces = [prolog, berkeley, prosqlite, rocks].
  861bio_db_interface( prolog, true ).
  862bio_db_interface( berkeley, Bool ) :-
  863    ( catch( use_module( library(bdb) ), _, fail ) -> Bool = true; Bool = false ).
  864bio_db_interface( prosqlite, Bool ) :-
  865    ( catch( use_module( library(prosqlite) ), _, fail ) -> Bool = true; Bool = false ).
  866bio_db_interface( rocks, Bool ) :-
  867    ( catch( use_module( library(rocksdb) ), _, fail ) -> Bool = true; Bool = false ).
 bio_db_interface(?Iface)
Interrogate or set the current interface for bio_db database predicates. By default Iface = prolog. Also supported: prosqlite (needs pack proSQLite), berkley (needs SWI's own library(bdb) and rocks (needs pack(rocskdb).
?- bio_db_interface( Iface ).
Iface = prolog.

?- debug( bio_db ).
true.

?- bio_db_interface( wrong ).
% Could not set bio_db_interface prolog_flag, to: wrong, which in not one of: [prolog,prosqlite,berkeley,rocks]
false.

?- bio_db_interface( Iface ).
Iface = prolog.

?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
% Loading prolog db: /usr/local/users/nicos/local/git/lib/swipl-7.1.32/pack/bio_db_repo/data/maps/hgnc/hgnc_homs_symb_hgnc.pl
Hgnc = 19295.

?- bio_db_interface( prosqlite ).
% Setting bio_db_interface prolog_flag, to: prosqlite
true.

?- hgnc_homs_prev_symb( Prev, Symb ).
% prosqlite DB:table hgnc:hgnc_homs_prev_symb/2 is not installed, do you want to download (Y/n) ?
% Execution Aborted
?- hgnc_homs_prev_symb( Prev, Symb ).
% Loading prosqlite db: /usr/local/users/nicos/local/git/lib/swipl-7.1.32/pack/bio_db_repo/data/maps/hgnc/hgnc_homs_prev_symb.sqlite
Prev = 'A1BG-AS',
Symb = 'A1BG-AS1' ;

In which case Iface is prosqlite.

  909bio_db_interface( Iface ) :-
  910    var( Iface ),
  911    !,
  912    current_prolog_flag( bio_db_interface, IfacePrv ),
  913    bio_db_interface_known( IfacePrv, Iface ).
  914bio_db_interface( Iface ) :-
  915    ground( Iface ),
  916    bio_db_interface_set( Iface ).
  917
  918bio_db_info( Iface, Pid, Key, Value ) :-
  919    var( Iface ),
  920    !,
  921    bio_db_info_gen( Iface, Pid, Key, Value ).
  922bio_db_info( Iface, Pid, Key, Value ) :-
  923    atom( Iface ),
  924    bio_db_info_source( Iface, Pid, Key, Value ).
  925
  926bio_db_info_gen( Iface, Pid, Key, Value ) :-
  927    bio_db_interface_extensions( Iface, _ ),
  928    bio_db_info( Iface, Pid, Key, Value ).
  929
  930bio_db_install_defaults( [org(hs),interactive(true)] ).
 bio_db_install(+PidOrPname, +Iface)
 bio_db_install(+PidOrPname, +Iface, +Opts)
Install the interface (Iface) for bio_db database that corresponds to predicate identifier (Pid) or a predicate name (Pname). Note that this is not necessary to do in advance as the library will auto load missing Iface and Pid combinations when first interrogated.

Opts

interactive(Ictive=true)
set false to accept default interactions
org(Org=hs)
organism

*/

  946bio_db_install( PorP, Iface ) :-
  947    bio_db_install( PorP, Iface, [] ).
  948bio_db_install( PorP, Iface, OptS ) :-
  949    options_append( bio_db_install, OptS, Opts ),
  950    options( interactive(Ictive), Opts ),
  951    options( org(Org), Opts ),
  952    bio_db_porp_call( PorP, bio_db_install/2, Call ),
  953    bio_db_map_call_db_pname( Call, Db, Pname, Arity ),
  954    ( bio_db_info(Iface,PorP,_,_) -> 
  955        Mess = '~a DB:table ~w is already installed. It will be overwritten. Continue',
  956        Args = [Iface,PorP],
  957        ui_yes_no( Ictive, Mess, Args, y, Reply ), 
  958        ( Reply == true ->
  959            bio_db_interface_extensions( Iface, [Ext|_] ),
  960            ( bio_db_pname_source(Org,Db,Pname,read,Ext,File) ->
  961                delete_installed( Ext, File )
  962                ;
  963                true
  964            ),
  965            bio_db_serve_pname( false, false, Org, Db, Pname, Arity, Iface, Call )
  966            ;
  967            % ensure qlf is also installed, before failing
  968            ( Iface == prolog ->
  969                ( bio_db_pname_source(Org,Db,Pname,read,qlf,_ExistFile) ->
  970                    Mess1 = 'Qlf is also istalled.',
  971                    phrase('$messages':translate_message(debug(Mess1,[])), Lines1),
  972                    print_message_lines(current_output, kind(informational), Lines1)
  973                    ;
  974                    bio_db_pname_source( Org, Db, Pname, read, pl, File ),
  975                    bio_db_load_call( false, Pname, Arity, Iface, File, true )
  976                )
  977                ;
  978                true
  979            )
  980        )
  981        ;
  982        bio_db_serve_pname( false, false, Db, Pname, Arity, Iface, Call )
  983    ).
  984
  985delete_installed( rocks, Dir ) :-
  986    delete_installed_db_dir_and_info( Dir ).
  987delete_installed( db, File ) :-
  988    delete_installed_db_file_and_info( File ).
  989delete_installed( sqlite, File ) :-
  990    delete_installed_db_file_and_info( File ).
  991delete_installed( pl, File ) :-
  992    delete_installed_db_file_and_info( File ),
  993    file_name_extension( Stem, _Ext, File ),
  994    file_name_extension( Stem,  qlf, Qile ),
  995    ( exists_file(Qile) ->
  996        debug( bio_db, 'Deleting file: ~p', Qile ),
  997        delete_file(Qile)
  998        ;
  999        true
 1000    ).
 1001
 1002delete_installed_db_file_and_info( File ) :-
 1003    ( exists_file(File) ->
 1004        debug( bio_db, 'Deleting file: ~p', File ),
 1005        delete_file( File )
 1006        ;
 1007        true
 1008    ),
 1009    file_name_extension( Stem, Ext, File ),
 1010    atom_concat( Stem, '_info', InfoStem ),
 1011    file_name_extension( InfoStem, Ext, InfoFile ),
 1012    ( exists_file(InfoFile) ->
 1013        debug( bio_db, 'Deleting file: ~p', InfoFile ),
 1014        delete_file( InfoFile )
 1015        ;
 1016        true
 1017    ).
 1018
 1019delete_installed_db_dir_and_info( Dir ) :-
 1020    ( exists_directory(Dir) ->
 1021        debug( bio_db, 'Deleting directory: ~p', Dir ),
 1022        delete_directory_contents( Dir )
 1023        ;
 1024        true
 1025    ),
 1026    file_name_extension( Stem, Ext, Dir ),
 1027    atom_concat( Stem, '_info', InfoStem ),
 1028    file_name_extension( InfoStem, Ext, InfoDir ),
 1029    ( exists_directory(InfoDir) ->
 1030        debug( bio_db, 'Deleting directory: ~p', InfoDir ),
 1031        delete_directory_contents( InfoDir )
 1032        ;
 1033        true
 1034    ).
 bio_db_porp_call(PorP, CallerId, Call)
Constract a generic call from predicate id or predicate name (Porp).

*/

 1041bio_db_porp_call( Porp, Cid, Call ) :-
 1042    ground( Porp ),
 1043    bio_db_porp_call_ground( Porp, Cid, Call ).
 1044
 1045bio_db_porp_call_ground( Pname/Arity, _Cid, Call ) :- !,
 1046    functor( Call, Pname, Arity ).
 1047bio_db_porp_call_ground( Pname, Cid, Call ) :-
 1048    % find the name from the module def of bio_db. A bit hackish.
 1049    atom( Pname ),
 1050    absolute_file_name( pack('bio_db/prolog/bio_db.pl'), BioDbF, [access(exist)] ),
 1051    open( BioDbF, read, In ),
 1052    read( In, ModuleDef ), 
 1053    close( In ),
 1054    ModuleDef = (:- module( bio_db, Pids ) ),
 1055    ( memberchk(Pname/Arity,Pids) ->
 1056        true
 1057        ;
 1058        throw( not_a_db_pred(Pname), [pack(bio_db),pred(Cid)] )
 1059    ),
 1060    functor( Call, Pname, Arity ).
 bio_db_predicate_name(+PidOrPname, -Pname)
Auxiliary predicate that strips the Pname of Pid or assumes atomic PidOrPname to be a Pname.
To be done
- check it looks like a db name. this is only useful for db_preds.

*/

 1070bio_db_predicate_name( Pid, Pname ) :-
 1071    ground( Pid ),
 1072    bio_db_predicate_name_ground( Pid, Pname ).
 1073
 1074bio_db_predicate_name_ground( Pname/_, Pname ) :- !.
 1075bio_db_predicate_name_ground( Pname, Pname ) :-
 1076    atom( Pname ).
 1077
 1078bio_db_predicate_db( Pid, Db ) :-
 1079    bio_db_predicate_name( Pid, Pname ),
 1080    atomic_list_concat( [_,Db|_], '_', Pname ).
 1081
 1082bio_db_info_source( Iface, Pid, Key, Value ) :-
 1083    bio_db_predicate_name( Pid, Pname ),
 1084    bio_db_predicate_db( Pname, Db ),
 1085    bio_db_pname_source( Db, Pname, read, Iface, DbF ),
 1086    ( bio_db_interface_initialisation(Iface) -> true; true ),
 1087    bio_db_info_db_file( Iface, Pid, DbF, Key, Value ).
 1088
 1089bio_db_info_db_file( prolog, _Pid, DbF, Key, Value ) :-
 1090    bio_db_pl_info( DbF, Infos ),
 1091    member( Info, Infos ),
 1092    arg( 1, Info, Key ),
 1093    arg( 2, Info, Value ).
 1094bio_db_info_db_file( prosqlite, Pid, DbF, Key, Value ) :-
 1095    bio_db_predicate_info( Pid, Info ),
 1096    % bio_db_source_info( DbF, InfoF ),
 1097    sqlite_connect( DbF, Info ),
 1098    atom_concat( 'Select * from ', Info, Query ),
 1099    findall( Row, sqlite_query(Info,Query,Row), Rows ),
 1100    sqlite_disconnect( Info ),
 1101    member( row(Key,ValueAtom), Rows ),
 1102    ( catch(atom_to_term(ValueAtom,Value,_),_,fail) ->
 1103        true
 1104        ;
 1105        Value = ValueAtom
 1106    ).
 1107bio_db_info_db_file( berkeley, Pid, DbF, Key, Value ) :-
 1108    bio_db_info_interface_infos( berkeley, Pid, DbF, _,  KVs ),
 1109    member( Key-Value, KVs ).
 1110bio_db_info_db_file( rocks, Pid, DbF, Key, Value ) :-
 1111    bio_db_info_interface_infos( rocks, Pid, DbF, _,  KVs ),
 1112    member( Key-Value, KVs ).
 bio_db_info(+Pid, ?Iface)
 bio_db_info(+Pid, ?Key, -Value)
 bio_db_info(+Iface, +Pid, ?Key, -Value)
Retrieve information about bio_db database predicates.

When Iface is not given, Key and Value are those of the interface under which Pid is currently open for access. The predicate errors if Pid is not open for serving yet.

The bio_db_info/2 version succeeds for all interfaces Pid is installed- it is simply a shortcut to: bio_db_info( Iface, Pid, _, _ ).

The Key-Value information returned are about the particular data predicate as saved in the specific backend.

Key

source_url
an atomic value of the URL
datetime
datetime/6 term
data_types
data_types/n given the primary type for each argyument in the data table
header
row/n term, where n is the number of columns in the data table
unique_lengths
unique_lengths/3 term, lengths for the ordered sets of: Ks, Vs and KVs
relation_type(From, TO)
where From and To take values in 1 and m
?- bio_db_info( Iface, hgnc_homs_hgnc_symb/2, Key, Value), write( Iface:Key:Value ), nl, fail.
prolog:source_url:ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz
prolog:datetime:datetime(2016,9,10,0,2,14)
prolog:data_types:data_types(integer,atom)
prolog:unique_lengths:unique_lengths(44266,44266,44266)
prolog:relation_type:relation_type(1,1)
prolog:header:row(HGNC ID,Approved Symbol)
prosqlite:source_url:ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz
prosqlite:datetime:datetime(2016,9,10,0,2,14)
prosqlite:data_types:data_types(integer,atom)
prosqlite:unique_lengths:unique_lengths(44266,44266,44266)
prosqlite:relation_type:relation_type(1,1)
prosqlite:header:row(HGNC ID,Approved Symbol)

*/

 1166bio_db_info( PorP, Iface ) :-
 1167    bio_db_info( Iface, PorP, _, _ ),
 1168    !.
 1169
 1170bio_db_info( Pid, Key, Value ) :-
 1171    bio_db_db_predicate( Pid ),
 1172    !,
 1173    bio_db_info_pred( Pid, Key, Value ).
 1174bio_db_info( Pid, _Key, _Value ) :-
 1175    throw( not_a_db_pred(Pid), [pack(bio_db),pred(bio_db_info/3)] ).
 1176
 1177bio_db_info_pred( Pid, Key, Value ) :-
 1178    bio_db_handle( Pid, Iface, File, Handle, _Mod ),
 1179    !,
 1180    bio_db_info_interface( Iface, Pid, File, Handle, Key, Value ).
 1181
 1182bio_db_info_pred( Pid, _Key, _Value ) :-
 1183    throw( close_to_info(Pid), [pack(bio_db),pred(bio_db_info/3)] ).
 1184
 1185bio_db_info_interface_kvs( Iface, Pid, File, Handle, KVs ) :-
 1186    bio_db_info_interface_infos( Iface, Pid, File, Handle, Pairs ),
 1187    \+ var( KVs ),
 1188    bio_db_info_interface_kvs( KVs, Pairs ).
 1189    
 1190bio_db_info_interface_kvs( [], _ ).
 1191bio_db_info_interface_kvs( [K-V|T], Pairs ) :-
 1192    memberchk( K-V, Pairs ),
 1193    bio_db_info_interface_kvs( T, Pairs ).
 1194    
 1195bio_db_info_interface( Iface, Pid, File, Handle, Key, Value ) :-
 1196    bio_db_info_interface_infos( Iface, Pid, File, Handle, KVs ),
 1197    member( Key-Value, KVs ).
 1198    
 1199bio_db_info_interface_infos( Callable, Pid, _File, _Handle, Pairs ) :-
 1200    memberchk( Callable, [prolog,prosqlite] ),
 1201    !,
 1202    bio_db_predicate_info( Pid, InfoName ),
 1203    Goal =.. [InfoName,Key,Value],
 1204    findall( Key-Value, ( (Key = interface, Value = Callable) ;  bio_db:Goal ), Pairs ).
 1205bio_db_info_interface_infos( berkeley, _Pid, File, _Handle, KVs ) :-
 1206    % fixme add key = Berkley interface
 1207    % ( ((Key=interface, Value=berkeley); bdb_enum( Handle, info+Key , Value)) ).
 1208    bio_db_source_info( File, InfoF ),
 1209
 1210    bdb_open( InfoF, read, InfoHandle, [key(atom),value(term)] ),
 1211    findall( AKey-AValue, bdb_enum(InfoHandle,AKey,AValue), Pairs ),
 1212    bdb_close( InfoHandle ),
 1213    KVs = [interface-berkeley|Pairs].
 1214bio_db_info_interface_infos( rocks, _Pid, File, _Handle, KVs ) :-
 1215    % fixme add key = Berkley interface
 1216    file_name_extension( Stem, Ext, File ),
 1217    atom_concat( Stem, '_info', InfoStem ),
 1218    file_name_extension( InfoStem, Ext, InfoFile ),
 1219    rocks_open( InfoFile, InfoHandle, [key(atom),value(term)] ),
 1220    findall( AKey-AValue, rocks_enum(InfoHandle,AKey,AValue), Pairs ),
 1221    rocks_close( InfoHandle ),
 1222    KVs = [interface-rocks|Pairs].
 bio_db_close(+Pid)
Close the current serving of predicate Pid. Next time a Pid Goal is called the current interface (bio_db_interface/1) will be used to establish a new server and resolve the query.

Predicate throws an error if the Pid does not correspond to a db_predicate or if it is not currently servered by any of the backends.

?- bio_db_interface(prosqlite).
?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
Hgnc = 506,
Symb = 'ANT3~withdrawn' .

?- bio_db_close( hgnc_homs_hgnc_symb/2 ).
?- bio_db_interface( prolog ).
?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
Hgnc = 1,
Symb = 'A12M1~withdrawn' .
?- bio_db_close(hgnc_homs_hgnc_symb/2).

*/

 1248bio_db_close( Pid ) :-
 1249    bio_db_db_predicate( Pid ),
 1250    !,
 1251    bio_db_close_pred( Pid ).
 1252bio_db_close( Pid ) :-
 1253    throw( not_a_db_pred(Pid), [pack(bio_db),pred(bio_db_close/1)] ).
 1254
 1255bio_db_close_pred( Pid ) :-
 1256    bio_db_handle( Pid, Iface, File, Handle, Mod ),
 1257    !,
 1258    bio_db_close_connection( Iface, Handle ),
 1259    Pid = Pname/Arity,
 1260    functor( Head, Pname, Arity ),
 1261    retractall( Head ),
 1262    atom_concat( Pname, '_info', InfoPname ),
 1263    functor( InfoHead, InfoPname, 2 ),
 1264    retractall( InfoHead ),
 1265    retractall( bio_db_handle(Pid,Iface,File,Handle,Mod) ),
 1266    assert( (Head :- bio_db_serve(Head)) ).
 1267bio_db_close_pred( Pid ) :-
 1268    throw( not_served(Pid), [pack(bio_db),pred(db_close/1)] ),
 1269    fail.
 1270
 1271bio_db_close_connection( prosqlite, Handle ) :-
 1272    sqlite_disconnect( Handle ).
 1273bio_db_close_connection( prolog, _Handle ).
 1274bio_db_close_connection( berkeley, Handle ) :- 
 1275    bdb_close( Handle ).
 1276bio_db_close_connection( rocks, Handle ) :- 
 1277    rocks_close( Handle ).
 bio_db_close_connections
Close all currently open bio_db backend connections.

This is called by bio_db at halt.

*/

 1286bio_db_close_connections:-
 1287    findall( Pid, bio_db:bio_db_handle(Pid,_B,_C,_D,_Mod), Pids ),
 1288    member( Pid, Pids ),
 1289    bio_db_close( Pid ),
 1290    fail.
 1291bio_db_close_connections.
 bio_db_db_predicate(?Pid)
True if Pid is a predicate identifier which is defined in current bio_db session, and contains 4 _ sep tokens, each of length 4. When Pid is a free variable all such predicate identifiers are returned on backtracking.

For a statically produced list of all data predicates in bio_db see, bio_db_data_predicate/4.

  ?- bio_db_db_predicate( hgnc_homs_hgnc_symb/2 ).
  true.

  ?- bio_db_db_predicate( X ).
  X = hgnc_homs_symb_ncbi/2 ;
  X = ense_homs_enst_ensg/2 ;
  ...

*/

 1313bio_db_db_predicate( Pname/Arity) :-
 1314    ground(Pname/Arity), !,
 1315    functor(Head,Pname,Arity),
 1316    bio_db_data_predicate_name(Pname),
 1317    % predicate_property(bio_db:Head, exported), !.
 1318    predicate_property(bio_db:Head, defined), !.  
 1319    % fixme: when called from closing,  maybe do a bit of checking ? \+ (rule=:=1,clauses=:=1)
 1320bio_db_db_predicate( Pname/Arity) :-
 1321    % module_property(bio_db, exports(List)),
 1322    % member(Pname/Arity, List),
 1323    current_predicate( bio_db:Pname/Arity ),
 1324    bio_db_data_predicate_name(Pname).
 1325
 1326bio_db_data_predicate_name( Pname ) :-
 1327     atomic_list_concat( Parts, '_', Pname ),
 1328     maplist( atom_length, Parts, [4,4,4,4] ),
 1329     !.
 1330bio_db_data_predicate_name( _Db, _Parts, Pname, Arity ) :-
 1331    throw( not_a_db_pred(Pname/Arity), [pack(bio_db),pred(bio_db_close/1)] ).
 1332
 1333% map stubs, 
 1334% these are in memory iff the map is to be loaded as prolog 
 1335% and this is the first call to the pred, they get replaced
 1336% by the map data after that.
 1337% 
 1338bio_db_serve( Call ) :-
 1339    functor( Call, Pn, _ ),
 1340    ( atomic_list_concat([_,OrgPredTkn,_,_],'_',Pn) ->
 1341          ( bio_db_organism(OrgPredTkn,OrgTkn,_Org) ->
 1342               true
 1343               ; 
 1344               ( bio_db_organism(_,OrgPredTkn,_) ->
 1345                    OrgTkn = OrgPredTkn
 1346                    ;
 1347                    throw( cannot_get_org_token_for_bio_db_served(Call) )
 1348               )
 1349          )
 1350    ),
 1351    bio_db_serve( OrgTkn, Call, true ).
 1352
 1353bio_db_serve( Org, Call ) :-
 1354    bio_db_serve( Org, Call, true ).
 1355
 1356bio_db_serve( Org, Call, Load ) :-
 1357    bio_db_interface( Iface ),
 1358    bio_db_map_call_db_pname( Call, Db, Pname, Arity ),
 1359    bio_db_serve_pname( Load, true, Org, Db, Pname, Arity, Iface, Call ).
 1360
 1361bio_db_interface_set( Iface ) :-
 1362    bio_db_interface_atom( Iface ),
 1363    !,
 1364    M = 'Setting bio_db_interface prolog_flag, to: ~a',
 1365    debug( bio_db, M, Iface ),
 1366    ( bio_db_interface_initialisation(Iface) -> true; true ),
 1367    set_prolog_flag( bio_db_interface, Iface ).
 1368bio_db_interface_set( Iface ) :-
 1369    findall( Aface, bio_db_interface_atom(Aface), AllFaces ),
 1370    throw( arg_enumerate(1,AllFaces,Iface), [pack(bio_db),pred(bio_db_interface/2)] ).
 1371    
 1372bio_db_interface_extensions( prolog, [pl,''] ).
 1373bio_db_interface_extensions( prosqlite, [sqlite,''] ).
 1374bio_db_interface_extensions( berkeley, [db,''] ).
 1375bio_db_interface_extensions( rocks, [rocks,''] ).
 1376
 1377bio_db_interface_known( Prov, Iface ) :-
 1378    atomic( Prov ),
 1379    bio_db_interface_atom( Prov ),
 1380    !,
 1381    Iface = Prov.
 1382bio_db_interface_known( Prov, Def ) :-
 1383    bio_db_default_interface( Def ),
 1384    M = 'Resetting bogus bio_db_interface prolog_flag, from: ~w, to default: ~a',
 1385    debug( bio_db, M, [Prov,Def] ), % fixme: this is informational rather than debug
 1386    set_prolog_flag( bio_db_interface, Def ).
 1387
 1388% prosqlite here
 1389/*
 1390bio_db_serve_pname( load, Db, Pname, Arity, Call ) :-
 1391    current_prolog_flag( bio_db_interface, prosqlite ),
 1392    !,
 1393    Term =.. [Db,Pname],
 1394    absolute_file_name( Term, Src, [access(Mode),file_type(prolog),file_errors(fail)] ).
 1395    sqlite_connect( phones, phones_db, as_predicates(true) )
 1396    */
 bio_db_serve_pname(+LoadFlag, +Ictive, Org, Db, Pname, Arity, Iface, _Call)
LoadFlag can be one of check, true (for loading) and false for ensuring the db is installed but does not actually hot-swap it in. Ictive is a boolean with true for interactively questioning user whereas false accepts the defaults with no interupptions.

*/

 1406bio_db_serve_pname( check, _Ictive, Org, Db, Pname, _Arity, Iface, _Call ) :-
 1407    !,
 1408    % bio_db_interface_extensions( Iface, Exts ),
 1409    bio_db_interface_extensions( Iface, [Ext|_] ),
 1410    % new implementation, untested:
 1411    bio_db_pname_source( Org, Db, Pname, read, Ext, _Abs ).
 1412    % % bio_db_db_pname_source( Db, Pname, exist, Ext, Abs ),
 1413    % Rel =.. [Db|Pname],
 1414    % absolute_file_name( Rel, Abs, [extensions(Exts),access(exist)] ),
 1415    % exists_file( Abs ),
 1416
 1417bio_db_serve_pname( Load, _Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1418    bio_db_interface_extensions( Iface, [Ext|_] ),
 1419    bio_db_pname_source( Org, Db, Pname, read, Ext, File ),
 1420    % bio_db_db_pname_source( Db, Pname, exist, Ext, Load ),
 1421    % user:file_search_path( Db, _DbPath ),
 1422    !,
 1423    bio_db_load_call( Load, Pname, Arity, Iface, File, Call ).
 1424bio_db_serve_pname( Load, Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1425    Iface \== prolog,
 1426    bio_db_interface_extensions( prolog, [Ext|_] ),
 1427    bio_db_pname_source( Org, Db, Pname, read, Ext, File ),
 1428    Mess = '~a DB:table ~w:~w is not installed, but the Prolog db exists. Shall it be created from Prolog',
 1429    Args = [Iface,Db,Pname/Arity],
 1430    ui_yes_no( Ictive, Mess, Args, y, Reply ),
 1431    Reply == true,
 1432    % bio_db_serve_pname_from_local( Reply, Db, Pname, Arity, Iface, Load, Call ),
 1433    bio_db_pl_nonpl_interface( Iface, File, NonPlLoad ),
 1434    !,
 1435    % fixme: add logic for deleting prolog interface of downloaded db
 1436    bio_db_load_call( Load, Pname, Arity, Iface, NonPlLoad, Call ).
 1437bio_db_serve_pname( Load, Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1438    % bio_db_pname_source( Db, Pname, read, prolog+zip, ZLoad ),
 1439    % bio_db_pname_source( Db, Pname, read, 'pl.zip', ZLoad ),
 1440    bio_db_pname_source( Org, Db, Pname, read, prolog+zip, ZLoad ),
 1441    !,
 1442    file_name_extension( PlLoad, zip, ZLoad ),
 1443    current_prolog_flag( bio_db_pl_from_zip, PlFromZipFlag ),
 1444    ( PlFromZipFlag == user ->
 1445        Mess = '~a DB:table ~w:~w is not installed, but the zipped prolog db exists. Shall it be created from this',
 1446        Args = [Iface,Db,Pname/Arity],
 1447        ui_yes_no( Ictive, Mess, Args, y, Reply )
 1448        ;
 1449        MessFg = '~a DB:table ~w:~w is not installed, but the zipped prolog db exists. Flag bio_db_pl_from_zip says: ~w',
 1450        message_report( MessFg, [Iface,Db,Pname/Arity,PlFromZipFlag], informational ),
 1451        Reply = PlFromZipFlag
 1452    ),
 1453    ( Reply == true ->
 1454        file_directory_name( ZLoad, Dir ),
 1455        archive_extract( ZLoad, Dir, [] ),
 1456        ( Iface \== prolog ->
 1457            bio_db_pl_nonpl_interface( Iface, PlLoad, NonPlLoad ),
 1458            bio_db_reply_delete_file( true, PlLoad )
 1459            ;
 1460            current_prolog_flag(bio_db_del_zip,DelZipFlag),
 1461            ( DelZipFlag == user ->
 1462                ZipDelMess = 'Delete the zip file: ~p',
 1463                ui_yes_no( Ictive, ZipDelMess, [ZLoad], n, ZipDelReply )
 1464                ;
 1465                MessDelFg = 'Zip file will be deleted depending on value of flag bio_db_del_zip, which is: ~w',
 1466                message_report( MessDelFg, [DelZipFlag], informational ),
 1467                ZipDelReply = DelZipFlag
 1468            ),
 1469            bio_db_reply_delete_file( ZipDelReply, ZLoad ),
 1470            NonPlLoad = PlLoad
 1471        ),
 1472        !,
 1473        bio_db_load_call( Load, Pname, Arity, Iface, NonPlLoad, Call )
 1474        ;
 1475        % fixme: do fresh download
 1476        debug( bio_db, 'Downloading fresh zip file for: ~w', Pname/Arity ),
 1477        delete_file( ZLoad ),
 1478        file_directory_name( ZLoad, DataDir ),
 1479        directory_files( DataDir, DataFiles ),
 1480        findall( Delable-FullDel, ( member(Delable,DataFiles), 
 1481                                file_name_extension(Pname,_DelExt,Delable), 
 1482                                directory_file_path(DataDir,Delable,FullDel)
 1483                            ),
 1484                                Delables ),
 1485        maplist( bio_db_conflict_file, Delables ),
 1486        bio_db_serve_pname_reply( true, Ictive, Load, Org, Db, Pname, Arity, Iface, Call )
 1487    ).
 1488% here  fixem: 
 1489% add logic that warns if other interfaces will be 
 1490bio_db_serve_pname( Load, Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1491    ( Iface == prolog -> 
 1492        Mess = '~a DB:table ~w:~w is not installed, do you want to download it'
 1493        ;
 1494        Mess = '~a DB:table ~w:~w is not installed, do you want to download the prolog db and then generate this interface'
 1495    ),
 1496    Args = [Iface,Db,Pname/Arity],
 1497    ui_yes_no( Ictive, Mess, Args, y, Reply ),
 1498    bio_db_serve_pname_reply( Reply, Ictive, Load, Org, Db, Pname, Arity, Iface, Call ).
 1499
 1500bio_db_serve_pname_reply( false, _Ictive, _Load, _Org, _Db, _Pname, _Arity, _Iface, _Call ) :-
 1501    abort.
 1502bio_db_serve_pname_reply( true, Ictive, Load, Org, Db, Pname, Arity, Iface, Call ) :-
 1503    stoics( Stoics ),
 1504    Mess = 'Downloading dataset from server: ~w',
 1505    phrase('$messages':translate_message(debug(Mess,[Stoics])), Lines),
 1506    print_message_lines(current_output, kind(informational), Lines),
 1507    atomic_list_concat( [PredType|_], '_', Pname ), 
 1508    bio_db_predicate_type_sub_dir( PredType, Sub ),
 1509    atomic_list_concat( [Stoics,Org,Sub,Db,Pname], '/', StoicsStem ),
 1510    atomic_list_concat( [StoicsStem,pl,zip], '.', StoicsFile ),
 1511    bio_db_pname_source( Org, Db, Pname, none, 'pl.zip', Local ),
 1512    debug( bio_db, 'Trying to get: ~w', url_file(StoicsFile,Local) ),
 1513    % directory_file_path( LocDir, _, Local ),
 1514    file_directory_name( Local, LocalDir ),
 1515    % here
 1516    bio_db_repo_skeleton_pack,
 1517    make_directory_path( LocalDir ),
 1518    url_file( StoicsFile, Local ),
 1519    % fixme: delete the .pl file here if it exists before unpacking ?  % although this is inconsistent with calling logic
 1520    archive_extract( Local, LocalDir, [] ),
 1521    % here( 'Unzip the pl, create the Iface and if not Iface==Prolog, suggest deleting the .pl db' ),
 1522    file_name_extension( LocalPlF, zip, Local ),
 1523    directory_files( LocalDir, LocalFiles ),
 1524    bio_db_interface_extensions( Iface, [Ext|_] ),
 1525    findall( Delable-FullDel, ( member(Delable,LocalFiles), 
 1526                            file_name_extension(Pname,DelExt,Delable), 
 1527                            \+ memberchk(DelExt,['pl.zip',pl,Ext]),
 1528                            directory_file_path(LocalDir,Delable,FullDel)
 1529                            ),
 1530                                Delables ),
 1531    debug( bio_db, 'Candidates for deletion: ~w', [Delables] ),
 1532
 1533    ( \+ exists_file(LocalPlF) -> 
 1534        throw( decompression_didnot_produce(LocalPlF) )
 1535        ; 
 1536        % here: ask to delete .zip file
 1537        ZipDelMess = 'Delete the zip file: ~p',
 1538        ui_yes_no( Ictive, ZipDelMess, [Local], n, ZipDelReply ),
 1539        bio_db_reply_delete_file( ZipDelReply, Local )
 1540    ),
 1541    ( Iface == prolog ->
 1542        NonPlLoad = LocalPlF
 1543        ;
 1544        bio_db_pl_nonpl_interface( Iface, LocalPlF, NonPlLoad ),
 1545        PlDelMess = 'Delete the Prolog file: ~p',
 1546        ui_yes_no( Ictive, PlDelMess, [LocalPlF], y, PlDelReply ),
 1547        bio_db_reply_delete_file( PlDelReply, LocalPlF )
 1548    ),
 1549    maplist( bio_db_conflict_file, Delables ),
 1550    % then( 'go back and make sure you deal with existing other interfaces (delete them)' ),
 1551    !,
 1552    bio_db_load_call( Load, Pname, Arity, Iface, NonPlLoad, Call ).
 1553    % we probably (now need something lighter than:
 1554    % bio_db_serve_pname( load, Db, Pname, Arity, Iface, Call ).
 1555
 1556bio_db_repo_skeleton_pack :-
 1557    absolute_file_name( pack(bio_db), BioDbD, [file_type(directory)] ),
 1558    directory_file_path( PackD, _, BioDbD ),
 1559    directory_file_path( PackD, bio_db_repo, RepoD ),
 1560    directory_file_path( RepoD, 'pack.pl', RepoPackPl ),
 1561    ( exists_file(RepoPackPl) ->
 1562        true
 1563        ;
 1564        make_directory_path( RepoD ),
 1565        ensure_loaded( pack('bio_db/auxil/lib/bio_db_repo_info') ),
 1566        findall( InfTerm, bio_db_repo_info(InfTerm), [InfNm,InfTi|Infs] ),
 1567        date_two_digit_dotted( Dotted ),
 1568        atomic_list_concat( [YrA,MnA,DyA], '.', Dotted ),
 1569        % atomic_list_concat( [Dotted,skeleton], '-', PlPackVers ),
 1570        Clauses = [InfNm,InfTi,version(Dotted)|Infs],
 1571        portray_clauses( Clauses, file(RepoPackPl) ),
 1572        atomic_list_concat( [20,YrA], FullYA ),
 1573        maplist( atom_number, [YrA,FullYA,MnA,DyA], [Yr,FullY,Mn,Dy] ), % the day gets a -skeleton suffix
 1574        atomic_list_concat( [DyA,skeleton], '-', DyPsfx ),
 1575        directory_file_path( RepoD, prolog, RepoPlD ),
 1576        make_directory_path( RepoPlD ),
 1577        directory_file_path( RepoPlD, 'bio_db_repo_version.pl', ModVersF ),
 1578        portray_clauses( [bio_db_repo_version(Yr:Mn:DyPsfx,date(FullY,Mn,Dy))], file(ModVersF) ),
 1579        directory_file_path( BioDbD, 'auxil/lib/bio_db_repo.pl', BioDbRepoPlF ),
 1580        directory_file_path( RepoPlD, 'bio_db_repo.pl', DstRepoF ),
 1581        copy_file( BioDbRepoPlF, DstRepoF )
 1582    ).
 1583
 1584bio_db_conflict_file( Delable-Full ) :-
 1585    Mess = 'Current db file might be inconsistent to new zip file. Delete db file: ~p',
 1586    Ictive = false,
 1587    % fixme: should we be passing Ictive from above ?
 1588    ui_yes_no( Ictive, Mess, [Delable], y, Reply ),
 1589    bio_db_reply_delete_file( Reply, Full ).
 1590
 1591/*
 1592bio_db_serve_pname_from_local( false, _Db, _Pname, Arity,Iface, Load, Call ) :-
 1593    ( bio_db_db_pname_source( Db, Pname, read, prolog+zip, ZLoad ) ->
 1594        fail  % .zip will be tried by caller on failure
 1595        ; 
 1596    ).
 1597    fail.
 1598    */
 1599% fixme: this is not called from anywhere? 
 1600bio_db_serve_pname_from_local( true, _Db, Pname, Arity, Iface, Load, Call ) :-
 1601    % fixme: add predicates for interogating and deleting db/interface pairs
 1602    bio_db_pl_nonpl_interface( Iface, Load, NonPlLoad ),
 1603    % fixme: add logic for deleting prolog interface of downloaded db
 1604    !,
 1605    bio_db_load_call( Pname, Arity, Iface, NonPlLoad, Call ).
 1606
 1607bio_db_pl_nonpl_interface( Iface, Load, NonPlLoad ) :-
 1608    debug( bio_db, 'Converting to interface: ~a, from file: ~p', [Iface,Load] ),
 1609    atom_concat( pl_, Iface, Stem ),
 1610    atom_concat( 'bio_db/auxil/backends/', Stem, Backend ),
 1611    ensure_loaded( pack(Backend) ),
 1612    Conv =.. [Stem,Load],
 1613    call( Conv ),
 1614    file_name_extension( LoadStem, _Pl, Load ),
 1615    bio_db_interface_extensions( Iface, [Ext|_] ),
 1616    file_name_extension( LoadStem, Ext, NonPlLoad ).
 1617
 1618bio_db_ensure_loaded( Iface, Pid, Load, Handle, From ) :-
 1619    atom( Iface ),
 1620    bio_db_ensure_loaded_1( Iface, Pid, Load, Handle, From ),
 1621    !.
 1622bio_db_ensure_loaded( Iface, Pid, Load, _Handle, _From ) :-
 1623    % fixme: Goal in error can be supplied ?
 1624    throw( failed_to_load(Iface,Pid,Load), [pack(bio_db),pred(bio_db_ensure_loaded/4)] ).
 1625
 1626bio_db_ensure_loaded_1( prolog, Pid, Load, [], From ) :-
 1627    Pid = Pname/_Arity,
 1628    atomic_list_concat( [Ppfx|_], '_', Pname ),
 1629    bio_db_pl_load( Ppfx, Pid, Load, From ).
 1630bio_db_ensure_loaded_1( prosqlite, Pname/_Arity, Load, Pname, _From ) :-
 1631    sqlite_connect( Load, Pname, [as_predicates(true),at_module(bio_db)] ).
 1632bio_db_ensure_loaded_1( berkeley, Pname/Arity, Load, Berkeley, _From ) :-
 1633    \+ '$bio_db_handle'(Pname,_),
 1634    % fixme: is the option needed ? we are just reading- check
 1635    % bio_db_info_interface( berkeley, _Pid, Load, _Handle, data_types, data_types(Ktype,Vtype) ),
 1636
 1637    Pairs = [data_types-DtTypes,relation_type-RelType],
 1638    bio_db_info_interface_kvs( berkeley, _Pid, Load, _Handle, Pairs ),
 1639    bio_db_info_interface_types( RelType, DtTypes, berkeley, Dup, _DbTypes, KeyType, ValType ),
 1640    % Open = bdb_open( Load, read, Berkeley, [duplicates(Dupl),key(KeyType),value(ValType)] ),
 1641    Open = bdb_open( Load, read, Berkeley, [dup(Dup),key(KeyType),value(ValType)] ),
 1642    debug( bio_db, 'Bdb opening for reading with: ~w' , Open ),
 1643    call( Open ),
 1644    % bdb_open( Load, read, Berkeley, [duplicates(true),key(KeyType),value(ValType)] ),  % 0.5
 1645    % retractall( '$bio_db_handle'(Pname,_) ),  % fixme: we can do some error reporting if something does exist
 1646    % assert( '$bio_db_handle'(Pname,Berkeley) ),
 1647    % atomic_list_concat( [Ppfx|_], '_', Pname ),
 1648    arg( 1, RelType, Krt ),
 1649    arg( 1, RelType, Vrt ),
 1650    ground( Arity ),
 1651    bio_db_berkeley_predicate_assert_arity( Arity, Krt, Vrt, Pname, bdb_get, bdb_enum, Berkeley ).
 1652bio_db_ensure_loaded_1( rocks, Pname/Arity, Load, Handle, _From ) :-
 1653    /*
 1654    bio_db_info_interface( rocks, _Pid, Load, _Handle, data_types, data_types(Ktype,Vtype) ),
 1655    */
 1656    Pairs = [data_types-DtTypes,relation_type-RelType],
 1657    bio_db_info_interface_kvs( rocks, _Pid, Load, _Handle, Pairs ),
 1658    bio_db_info_interface_types( RelType, DtTypes, rocks, Dup, _DbTypes, KeyType, ValType ),
 1659    % maplist( bio_db_info_rocks_singleton_type, [Ktype,Vtype], [Kbype,Vbype] ),
 1660    % ( Dup == false -> KeyType = NoDupKeyType; NoDupKeyType = term ),
 1661    % 2nd take, duplicates are now stored as lists of values
 1662    ( Dup == false -> ValType = DupValType; DupValType = term ),
 1663    Open = rocks_open( Load, Handle, [key(KeyType),value(DupValType)] ),
 1664    debug( bio_db, 'Rocks opening for reading with: ~w' , Open ),
 1665    call( Open ),
 1666
 1667    % atomic_list_concat( [Ppfx|_], '_', Pname ),
 1668    bio_db_rocks_predicate_assert_arity( Arity, Dup, Pname, rocks_get, rocks_enum, Handle ).
 1669    % bio_db_rocks_predicate_assert_arity( Kbype/Vbype, Arity, Pname, rocks_get, rocks_enum, Handle ).
 1670
 1671% bio_db_pl_load( map, Pid, Load, From ).
 1672bio_db_pl_load( _Type, Pid, Load, Mod ) :-
 1673    dynamic( Mod:Pid ),  % fixme: we should be able to remove this? 
 1674    % ensure_loaded( Load ).  % following is an elaboration of code by JW: 16.11.13:
 1675    (   (file_name_extension(Base,pl,Load), \+ current_prolog_flag(bio_db_qcompile,false))
 1676    ->  Mod:load_files( Base, [qcompile(auto),if(not_loaded)] )
 1677    ;   ensure_loaded( Mod:Load )  % fixme: use load_files/2 ?
 1678    ).
 1679
 1680% bio_db_pl_load( edge, Pname/_Arity, Load ) :-
 1681/*
 1682bio_db_pl_load( edge, Pid, Load ) :-
 1683    % os_postfix ... :(
 1684    % % file_name_extension( Base, Ext, Load ),
 1685    % % atomic_list_concat( [Base,ord], '_', OrdBase ),
 1686    % % file_name_extension( OrdBase, Ext, OrdLoad ),
 1687    % % ensure_loaded( OrdLoad ),
 1688    ensure_loaded( Load ),
 1689    % % atomic_list_concat( [Pname,ord], '_', Pord ),
 1690    % % Head =.. [Pname,X,Y,W],
 1691    % % GoalF =.. [Pord,X,Y,W],
 1692    % % GoalB =.. [Pord,Y,X,W],
 1693    % % consult_clause( (Head:-(GoalF;GoalB)) ).
 1694    true.
 1695    */
 1696
 1697    /*
 1698bio_db_kv_db_predicate_assert( _, Pname, Krt, Vrt, Arity, Get, Enum, Handle ) :-
 1699    ground( Arity ),
 1700    bio_db_kv_db_predicate_assert_arity( Arity, Krt, Vrt, Pname, Get, Enum, Handle ).
 1701bio_db_kv_db_predicate_assert( edge, Pname, Arity, Get, Enum, Handle ) :-
 1702    bio_db_kv_db_predicate_assert_edge( Arity, Pname, Get, Enum, Handle ).
 1703    */
 1704
 1705bio_db_berkeley_predicate_assert_arity( 2, 1, 1,  Pname, Get, Enum, Handle ) :-
 1706    !, % maybe this relevat to other modes too  (here mode is 2,1,1
 1707    Head =.. [Pname,Key,Value],
 1708    GetG  =.. [  Get, Handle, Key, Value ],
 1709    EnumG =.. [ Enum, Handle, Key, Value ],
 1710    Conditional =  ( ( ground(Key) -> 
 1711                    GetG
 1712                    ;
 1713                    EnumG
 1714                  )
 1715            ),
 1716    consult_clause( (Head:-(Conditional)) ).
 1717
 1718bio_db_berkeley_predicate_assert_arity( N, _, _,  Pname, Get, Enum, Handle ) :-
 1719    functor( Head, Pname, N ),
 1720    Head =.. [Pname,Key|Args],
 1721    GetG  =.. [  Get, Handle, Key, Value ],
 1722    EnumG =.. [ Enum, Handle, Key, Value ],
 1723    Conditional =  ( ( ground(Key) -> 
 1724                    GetG
 1725                    ;
 1726                    EnumG
 1727                  )
 1728            ),
 1729    Unravel = bio_db_kv_db_value( Args, Value ),
 1730    consult_clause( (Head:-(Conditional,Unravel)) ).
 1731
 1732bio_db_rocks_predicate_assert_arity( 2, false, Pname, Get, Enum, Handle ) :-
 1733    !, % maybe this relevat to other modes too  (here mode is 2, false (=no duplicates)
 1734    Head =.. [Pname,Key,Value],
 1735    GetG  =.. [  Get, Handle, Key, Value ],
 1736    EnumG =.. [ Enum, Handle, Key, Value ],
 1737    Conditional =  ( ( ground(Key) -> 
 1738                    GetG
 1739                    ;
 1740                    EnumG
 1741                  )
 1742            ),
 1743    consult_clause( (Head:-(Conditional)) ).
 1744bio_db_rocks_predicate_assert_arity( N, false, Pname, Get, Enum, Handle ) :-
 1745    N > 2,
 1746    functor( Head, Pname, N ),
 1747    Head =.. [Pname,Key|Args],
 1748    GetG  =.. [  Get, Handle, Key, Value ],
 1749    EnumG =.. [ Enum, Handle, Key, Value ],
 1750    Conditional =  ( ( ground(Key) -> 
 1751                    GetG
 1752                    ;
 1753                    EnumG
 1754                  )
 1755            ),
 1756    Unravel = bio_db_kv_db_value( Args, Value ),
 1757    consult_clause( (Head:-(Conditional,Unravel)) ).
 1758bio_db_rocks_predicate_assert_arity( 2, true, Pname, Get, Enum, Handle ) :-
 1759    !, % maybe this relevat to other modes too  (here mode is 2, false (=no duplicates)
 1760    Head =.. [Pname,Key,Value],
 1761    GetG  =.. [  Get, Handle, Key, Values ],
 1762    EnumG =.. [ Enum, Handle, Key, Values ],
 1763    Conditional =  ( ( ground(Key) -> 
 1764                    (GetG, bio_db_rocks_multi_key_value(Values,Value) )
 1765                    ;
 1766                    (EnumG, bio_db_rocks_multi_key_value(Values,Value) )
 1767                  )
 1768            ),
 1769    consult_clause( (Head:-(Conditional)) ).
 1770bio_db_rocks_predicate_assert_arity( Arity, true, Pname, Get, Enum, Handle ) :-
 1771    Arity > 2,
 1772    functor( Head, Pname, Arity ),
 1773    Head =.. [Pname,Key|Args],
 1774    GetG  =.. [  Get, Handle, Key, ValueTerm ],
 1775    EnumG =.. [ Enum, Handle, Key, ValueTerm ],
 1776    % EnuTG =.. [ Enum, Handle, Key:_X, Value ],
 1777    Conditional =  ( ( ground(Key) -> 
 1778                    ( GetG, bio_db_rocks_multi_key_value(ValueTerm,Value) )
 1779                    ;
 1780                    ( EnumG, bio_db_rocks_multi_key_value(ValueTerm,Value) )
 1781                    % ( EnumG , ( (atomic(ProvKey),ProvKey=Key);ProvKey=Key:_) )
 1782                  )
 1783                ),
 1784    Unravel = bio_db_kv_db_value( Args, Value ),
 1785    consult_clause( (Head:-(Conditional,Unravel)) ).
 1786    
 1787
 1788bio_db_rocks_multi_key_value( [H|T], Value ) :-
 1789    !,
 1790    ( Value = H; member( Value, T ) ).
 1791bio_db_rocks_multi_key_value( Value, Value ).
 1792
 1793bio_db_kv_db_value( [H], Value ) :- !, Value = H.
 1794bio_db_kv_db_value( [H|T], H+Value ) :-
 1795    bio_db_kv_db_value( T, Value ).
 1796consult_clause( Clause ) :-
 1797    assert( Clause ).
 1798
 1799/*
 1800consult_clause( Clause ) :-
 1801    tmp_file_stream(text, File, Stream),
 1802    portray_clause( Stream, Clause ),
 1803    close( Stream ),
 1804    debug( bio_db, 'Consulting from: ~p', File ),
 1805    consult( File ),
 1806    true.
 1807*/
 1808
 1809bio_db_interfaces_ext( A+B, Ext ) :-
 1810    !,
 1811    bio_db_interfaces_ext( A, AExt ),
 1812    bio_db_interfaces_ext( B, BExt ),
 1813    atomic_list_concat( [AExt,BExt], '.', Ext ).
 1814bio_db_interfaces_ext( Iface, Ext ) :-
 1815    bio_db_interface_extensions( Iface, [Ext|_] ),
 1816    !.
 1817bio_db_interfaces_ext( Ext, Ext ).
 1818
 1819bio_db_pname_source( _Org, Db, Pname, Mode, DbFaces, Src ) :-
 1820    % fixme: make it play with Org ?
 1821    bio_db_interfaces_ext( DbFaces, Ext ),
 1822    Term =.. [Db,Pname],
 1823    debug( bio_db, 'Trying DB location: ~p, mode: ~w', [Term,Mode] ),
 1824    ( absolute_file_name( Term, Src, [access(Mode),extensions([Ext]),file_errors(fail)] )
 1825      ;
 1826      (  DbFaces==rocks,
 1827         file_name_extension(Pname,rocks,Rname),
 1828        Rerm =.. [Db,Rname],
 1829        absolute_file_name(Rerm,Src,[access(Mode),file_errors(fail),file_type(directory)])
 1830      )
 1831    ),
 1832    !.
 1833% The above is a short-cut this is the long way.
 1834% Works when single db provides both maps and graphs
 1835%
 1836bio_db_pname_source( Org, Db, Pname, Mode, DbFaces, Src ) :-
 1837    bio_db_interfaces_ext( DbFaces, Ext ),
 1838    % Term =.. [Db,Pname],
 1839    bio_db_pred_name_type( Pname, Type ),
 1840    directory_file_path( Org, Type, Rel ),
 1841    % Term =.. [bio_db_data,Type], % pre Org times
 1842    Term =.. [bio_db_data,Rel],
 1843    absolute_file_name( Term, Dir ),
 1844    file_name_extension( Pname, Ext, Bname ),
 1845    directory_file_path( Dir, Db, DbDir ),
 1846    directory_file_path( DbDir, Bname, Src ),
 1847    debug( bio_db, 'Trying DB location: ~p, mode: ~w', [Src,Mode] ), % fixme: debug_call, with success/failure
 1848    ( absolute_file_name( Src, _, [access(Mode),file_errors(fail)])
 1849       ;
 1850      (  DbFaces==rocks,
 1851        absolute_file_name( Src, _, [access(Mode),file_errors(fail),file_type(directory)] )
 1852      )
 1853    ),
 1854    !.
 1855    % absolute_file_name( Pname, Src, [access(Mode),extensions([Ext]),file_errors(fail)] ).
 bio_db_source_info(+File, -InfoF)
Generate Info filename corresponding to the database filename at File.

*/

 1862bio_db_source_info( File, InfoF ) :-
 1863    file_name_extension( Stem, Ext, File ),
 1864    atom_concat( Stem, '_info', InfoStem ),
 1865    file_name_extension( InfoStem, Ext, InfoF ).
 bio_db_predicate_info(+PidOrPname, -InfoName)
Generate the information predicate name of a Pid or of Db predicate name.

*/

 1872bio_db_predicate_info( Pname/_Arity, InfoName ) :-
 1873    !,
 1874    atom_concat( Pname, '_info', InfoName ).
 1875bio_db_predicate_info( Pname, InfoName ) :-
 1876    atom( Pname ),
 1877    atom_concat( Pname, '_info', InfoName ).
 1878
 1879bio_db_pred_name_type( Pname, Type ) :-
 1880    atomic_list_concat( [_,_,Trd|_], '_', Pname ),
 1881    bio_db_pred_name_prefix_type( Trd, Type ).
 1882
 1883bio_db_pred_name_prefix_type( edge, graphs ) :- !.
 1884bio_db_pred_name_prefix_type( _, maps ).
 1885
 1886bio_db_load_call( false, Pname, Arity, Iface, File, _Call ) :-
 1887    ( Iface == prolog -> 
 1888        % ensure .qlf is created
 1889        file_name_extension( Stem, pl, File ),
 1890        Mess = 'Ensuring .qlf is also installed: ~w',
 1891        phrase('$messages':translate_message(debug(Mess,[Pname/Arity])), Lines),
 1892        print_message_lines(current_output, kind(informational), Lines),
 1893        load_files( scratch:Stem, [qcompile(auto),if(true)] ),
 1894        abolish( scratch:Pname/Arity )
 1895        ;
 1896        true
 1897    ).
 1898bio_db_load_call( true, Pname, Arity, Iface, File, Call ) :-
 1899    debug( bio_db, 'Loading pred: ~w, interface: ~a, file: ~w', [Pname/Arity,Iface,File] ),
 1900    ground( Iface ),
 1901    functor( Phead, Pname, Arity ),
 1902    ( predicate_property(Phead,imported_from(From) ) -> true; From = bio_db ),
 1903    abolish( From:Pname/Arity ),    % fixme: retractall/1 if we have problem with regenerations ?
 1904    % retractall(Phead),
 1905    atom_concat( Pname, '_info', InfoPname ),
 1906    dynamic( From:InfoPname/2 ),
 1907    % functor( Ihead, InfoPname, 2 ),
 1908    ( (From \== bio_db,\+ current_predicate(bio_db:InfoPname/2)) -> 
 1909            % fixme: test again:
 1910            From:export(InfoPname/2),
 1911            bio_db:import(From:InfoPname/2)
 1912            ;
 1913            true
 1914    ),
 1915    functor( InfoHead, InfoPname, 2),
 1916    retractall( From:InfoHead ),
 1917    bio_db_ensure_loaded( Iface, Pname/Arity, File, Handle, From ),
 1918    assert( bio_db_handle(Pname/Arity,Iface,File,Handle,From) ),
 1919    call( Call ).
 1920
 1921bio_db_predicate_type_sub_dir( map, maps ).
 1922bio_db_predicate_type_sub_dir( edge, graphs ).
 1923
 1924bio_db_map_call_db_pname( Call, Db, Pname, Arity ) :-
 1925    functor( Call, Pname, Arity ),
 1926    at_con( [Db|Parts], '_', Pname ),
 1927    bio_db_map_call_db_pname_check( Db, Parts, Pname, Arity ).
 1928    % bio_db_type_arity_check( Type, Arity ).
 1929
 1930bio_db_map_call_db_pname_check( Db, Parts, _Pname, _Arity ) :-
 1931     maplist( atom_length, [Db|Parts], [4,4,4,4] ),
 1932     !.
 1933bio_db_map_call_db_pname_check( _Db, _Parts, Pname, Arity ) :-
 1934    throw( not_a_db_pred(Pname/Arity), [pack(bio_db),pred(bio_db_serve/3)] ).
 1935
 1936% fixme: delete these 2 preds
 1937bio_db_type_arity_check( Type, Arity ) :-
 1938    bio_db_type_arity_known( Type, Arity ),
 1939    !.
 1940bio_db_type_arity_check( Type, Arity ) :-
 1941    throw( unknown_combination_of_type_arity(Type,Arity) ).
 1942
 1943% fixme: this now a bit outdated... maybe add name for special cases ?
 1944bio_db_type_arity_known( map, 2 ).
 1945bio_db_type_arity_known( map, 3 ).
 1946bio_db_type_arity_known( map, 4 ).
 1947bio_db_type_arity_known( map, 5 ).
 1948bio_db_type_arity_known( map, 7 ).
 1949bio_db_type_arity_known( edge, 3 ).
 1950bio_db_type_arity_known( edge, 2 ).
 1951
 1952bio_db_reply_delete_file( true, Local ) :-
 1953    debug( bio_db, 'Deleting file: ~p', Local ),
 1954    delete_file( Local ).
 1955bio_db_reply_delete_file( false, Local ) :-
 1956    debug( bio_db, 'NOT deleting file: ~p', Local ).
 1957
 1958/*
 1959bio_db_info_db_types( berkeley, RelType, DataTypes, Dup, DbTypes, KeyType, ValType ) :-
 1960    bio_db_info_berkeley_types( RelType, DataTypes, Dup, DbTypes, KeyType, ValType ).
 1961bio_db_info_db_types( rocks, RelType, DataTypes, Dup, DbTypes, KeyType, ValType ) :-
 1962    % bio_db_info_rocks_types( RelType, DataTypes, Dup, DbTypes, KeyType, ValType ).
 1963    bio_db_info_rocks_types( RelType, DataTypes, Dup, DbTypes, KeyType, ValType ).
 1964
 1965bio_db_info_rocks_types( relation_type(1,1), DataTypes, Dup, DbTypes, KeyType, ValType ) :- 
 1966    DataTypes =.. [data_types,PlKeyType,PlValsTypes],
 1967    bio_db_info_rocks_type( PlKeyType, KeyType ),
 1968    bio_db_info_rocks_type( PlValsTypes, ValType ),
 1969    DbTypes = [key(KeyType),value(ValType)].
 1970    */
 1971% fixme: change all the calls and remove this
 1972bio_db_info_db_types( Iface, RelType, DataTypes, Dup, DbTypes, KeyType, ValType ) :-
 1973    bio_db_info_interface_types( RelType, DataTypes, Iface, Dup, DbTypes, KeyType, ValType ).
 1974
 1975% bio_db_info_berkeley_types( relation_type(1,MR), data_types(Kt,Vt), Dup, DbTypes, KeyType, ValType ) :-   
 1976bio_db_info_interface_types( relation_type(1,MR), data_types(Kt,Vt), Iface, Dup, DbTypes, KeyType, ValType ) :- 
 1977    ( MR =:= 1 -> Dup = false; Dup = true ),
 1978    !,  % Arity = 2 (from the form of data_types...
 1979    bio_db_info_interface_type( Kt, Iface, KeyType ),
 1980    bio_db_info_interface_type( Vt, Iface, ValType ),
 1981    DbTypes = [key(KeyType),value(ValType)].
 1982bio_db_info_interface_types( relation_type(1,MR), DtTypes, Iface, Dup, DbTypes, KeyType, ValType ) :-   
 1983    ( MR =:= 1 -> Dup = false; Dup = true ),
 1984    !,  % Arity = 2 (from the form of data_types...
 1985    functor( DtTypes, _, Arity ),
 1986    Arity > 2,
 1987    !,
 1988    arg( 1, DtTypes, Kt ),
 1989    bio_db_info_interface_type( Kt, Iface, KeyType ),
 1990    ValType = term,
 1991    DbTypes = [key(KeyType),value(term)].
 1992bio_db_info_interface_types( RelType, DtTypes, Iface, Dup, DbTypes, KeyType, ValType ) :-
 1993    ( RelType = relation_type(1,1) -> Dup = false; Dup = true ),
 1994    arg( 1, DtTypes, Kt ),
 1995    functor( DtTypes, _, Arity ),
 1996    ( Arity > 2 -> ValType = term
 1997                ; 
 1998
 1999                arg( 2, DtTypes, Vt ),
 2000                    bio_db_info_interface_type( Vt, Iface, ValType )
 2001    ),
 2002    bio_db_info_interface_type( Kt, Iface, KeyType ),
 2003    DbTypes = [key(KeyType),value(term)].
 2004
 2005bio_db_info_interface_type( [Singleton], Iface, Type ) :-  !,
 2006    bio_db_info_interface_unit_type( Iface, Singleton, Type ).
 2007bio_db_info_interface_type( [_,_|_], _Iface, term ) :- !.  % a bit of a shortcut
 2008bio_db_info_interface_type( Singleton, Iface, Type ) :-
 2009    bio_db_info_interface_unit_type( Iface, Singleton, Type ).
 2010
 2011bio_db_info_interface_unit_type( berkeley, Unit, Type ) :-
 2012    bio_db_berkeley_type( Unit, Type ).
 2013bio_db_info_interface_unit_type( rocks, Unit, Type ) :-
 2014    bio_db_rocks_type( Unit, Type ).
 2015
 2016bio_db_rocks_type( term, term ).
 2017bio_db_rocks_type( atom, atom ).
 2018bio_db_rocks_type( integer, int64 ). % rocks also has int32
 2019bio_db_rocks_type( number, atom ).  % rocks has doubles and floats
 2020
 2021bio_db_berkeley_type( term, term ).
 2022bio_db_berkeley_type( atom, atom ).
 2023bio_db_berkeley_type( integer, c_long ).
 2024bio_db_berkeley_type( number, atom ).
 2025
 2026% this is a mock implementation see library(os) or library(os_) 
 2027% for the real one
 2028os_path_( Dir, File, Path ) :-
 2029    ground( Dir ),
 2030    ground( File ),
 2031    !,
 2032    directory_file_path( Dir, File, Path ).
 2033os_path_1( Dir, File, Path ) :-
 2034    ground( Path ),
 2035    directory_file_path( DirSl, File, Path ),
 2036    atom_concat( Dir, '/', DirSl ).
 2037    
 2038pack_errors:message( close_to_info(Pid) ) -->
 2039    ['Predicate: ~w, is not currently served, info depend on the opening interface.'-[Pid]].
 2040pack_errors:message( not_a_db_pred(Pid) ) -->
 2041    ['Predicate identifier: ~w, not of a db predicate.'-[Pid]].
 2042pack_errors:message( not_served(Pid) ) -->
 2043    ['Predicate: ~w, is not currently served.'-[Pid]].
 2044pack_errors:message( failed_to_load(Iface,Pid,File) ) -->
 2045    ['Failed to load predicate: ~w, for backend: ~w, from file: ~p.'-[Pid,Iface,File]].
 2046
 2047% add at_halt, close databases particularly berkeley ones
 2048:- at_halt( bio_db_close_connections ). 2049:- initialization( bio_db_paths, after_load ). 2050
 2051:- multifile sandbox:safe_primitive/1. 2052
 2053bio_sandbox_clause(sandbox:safe_primitive(bio_db:Head)) :-
 2054    module_property(bio_db, exports(PIList)),
 2055    member(Name/Arity, PIList),
 2056    (   sub_atom(Name, 0, _, _, edge_)
 2057    ;   sub_atom(Name, 0, _, _, map_)
 2058    ),
 2059    functor(Head, Name, Arity).
 2060
 2061term_expansion(bio_db_interface, Clauses) :-
 2062    findall(Clause, bio_sandbox_clause(Clause), Clauses).
 2063
 2064bio_db_interface.
 2065sandbox:safe_primitive(bio_db:bio_db_info(_,_,_)).
 2066sandbox:safe_primitive(bio_db:bio_db_info(_,_,_,_))