2% nutcracker.pl, by Johan Bos
    3
    4/* ========================================================================
    5   File Search Paths
    6======================================================================== */
    7
    8file_search_path(semlib,     'src/prolog/lib').
    9file_search_path(nutcracker, 'src/prolog/nutcracker').
   10file_search_path(knowledge,  'src/prolog/boxer/knowledge').
   11
   12
   13/* ========================================================================
   14   Dynamic Predicates
   15======================================================================== */
   16
   17:- dynamic axiom/3.   18
   19
   20/* ========================================================================
   21   Load other libraries
   22======================================================================== */
   23
   24:- use_module(library(lists),[member/2,append/3]).   25:- use_module(library(ordsets),[list_to_ord_set/2,ord_intersection/3]).   26:- use_module(library(readutil),[read_line_to_codes/2]).   27
   28:- use_module(semlib(drs2fol),[drs2fol/2]).   29:- use_module(semlib(errors),[error/2,warning/2,inform/2]).   30:- use_module(semlib(options),[option/2,parseOptions/2,setOption/3,
   31                               showOptions/1,setDefaultOptions/1]).   32
   33:- use_module(nutcracker(version),[version/1]).   34:- use_module(nutcracker(input),[openInput/1,inputDRS/2,lenDRS/2,openModel/3]).   35:- use_module(nutcracker(callInference),[callMBbis/7,callTPandMB/8]).   36:- use_module(nutcracker(miniFrameNet),[axiomsFN/2]).   37:- use_module(nutcracker(counting),[countingAxioms/2]).   38:- use_module(nutcracker(miniWordNet),[compConcepts/2,compISA/0,
   39                                       clearMWN/0,cutDownMWN/0,
   40                                       addTopMWN/0,graphMWN/2,sizeMWN/1,
   41                                       outputMWN/2,axiomsWN/1]).   42
   43
   44/* ========================================================================
   45   Main
   46======================================================================== */
   47
   48main:-
   49   option(Option,do), 
   50   member(Option,['--version','--help']), !, 
   51   version,
   52   help.
   53
   54main:-
   55   checkDir([Dir|Dirs]), !,
   56   axioms(Dir),
   57   main([Dir|Dirs]).
   58
   59main:-
   60   setOption(nutcracker,'--help',do), !,
   61   help.
   62
   63
   64/*========================================================================
   65   Main (traverse directories)
   66========================================================================*/
   67
   68main([]).
   69
   70main([X|Dirs]):-
   71   checkFiles(X), !,
   72   tokenise(X,Overlap), 
   73   pipeline(X,Overlap),
   74   main(Dirs).
   75
   76main([X|Dirs]):-
   77   atom_concat(X,'/*',Wild),
   78   subdirs(Wild,SubDirs), \+ SubDirs = [], !,
   79   main(SubDirs),
   80   main(Dirs).
   81
   82main([_|Dirs]):-
   83   main(Dirs).
   84
   85
   86/*------------------------------------------------------------------------
   87   Pipeline
   88------------------------------------------------------------------------*/
   89
   90pipeline(X,Overlap):-
   91   option('--inference',yes),
   92   meta(X), parse(X), wsd(X), box(X), 
   93   mwn(X,Ax1,Ax2,Ax3,Novelty),
   94   nc(X,Ax1,Ax2,Ax3), !,
   95   prediction(X,Novelty,Overlap).
   96
   97pipeline(X,Overlap):-
   98   option('--inference',no),
   99   meta(X), parse(X), wsd(X), box(X), 
  100   mwn(X,_,_,_,Novelty), !,
  101   prediction(X,Novelty,Overlap).
  102
  103pipeline(X,Overlap):-
  104   option('--inference',only),
  105   box(X),
  106   mwn(X,Ax1,Ax2,Ax3,Novelty), 
  107   nc(X,Ax1,Ax2,Ax3), !,
  108   prediction(X,Novelty,Overlap).
  109
  110pipeline(X,Overlap):-
  111   prediction(X,-1,Overlap).
  112
  113
  114/*------------------------------------------------------------------------
  115   Check Dir
  116------------------------------------------------------------------------*/
  117
  118checkDir(Dirs):-
  119   checkDir1(Dir),        % remove slash at end (if there is one)
  120   checkDir2(Dir,Dirs).   % check permissions
  121
  122checkDir1(NewDir):-
  123   option('--dir',Dir),
  124   atom_chars(Dir,Chars),
  125   append(NewChars,['/'],Chars), !,
  126   atom_chars(NewDir,NewChars),
  127   setOption(nutcracker,'--dir',NewDir).
  128
  129checkDir1(Dir):-
  130   option('--dir',Dir).
  131
  132checkDir2(Dir,[Dir]):-
  133   exists_directory(Dir), 
  134   access_file(Dir,write), !.
  135
  136checkDir2(Dir,List):- 
  137   subdirs(Dir,List), !.
  138
  139checkDir2(Dir,[]):-
  140   error('cannot access directory ~p',[Dir]).
  141
  142
  143/*------------------------------------------------------------------------
  144   Sub Dirs
  145------------------------------------------------------------------------*/
  146
  147subdirs(Wild,Dirs):-
  148   expand_file_name(Wild,List),
  149   findall(D,( member(D,List),
  150               exists_directory(D),
  151               access_file(D,write) ),Dirs), !.
  152
  153
  154/*------------------------------------------------------------------------
  155   Check presence of files t and h
  156------------------------------------------------------------------------*/
  157
  158checkFiles(Dir):-
  159   atomic_list_concat([Dir,'/','t'],TFile),
  160   atomic_list_concat([Dir,'/','h'],HFile),   
  161   atomic_list_concat([Dir,'/','gold.txt'],GFile),   
  162   access_file(TFile,read),
  163   access_file(HFile,read),
  164   access_file(GFile,read), !,
  165   printTHG(Dir,TFile,HFile,GFile).
  166
  167checkFiles(Dir):-
  168   atomic_list_concat([Dir,'/','t'],TFile),
  169   atomic_list_concat([Dir,'/','h'],HFile),   
  170   access_file(TFile,read),
  171   access_file(HFile,read), !,
  172   printTHG(Dir,TFile,HFile).
  173
  174checkFiles(Dir):-
  175   warning('directory ~p does not contain files named t and h',[Dir]), 
  176   !, fail.   
  177
  178
  179/*------------------------------------------------------------------------
  180   Print t and h file   
  181------------------------------------------------------------------------*/
  182
  183printTHG(_,_,_,_):-
  184   option('--info',false), !.
  185
  186printTHG(Dir,TFile,HFile,GFile):-
  187   option('--info',true), 
  188   inform('[=====> ~p <=====]',[Dir]),
  189   inform('Text:',[]),
  190   atomic_list_concat(['cat',TFile],' ',Shell1),
  191   shell(Shell1,Return1), Return1 = 0,
  192   inform('Hypothesis:',[]),
  193   atomic_list_concat(['cat',HFile],' ',Shell2),
  194   shell(Shell2,Return2), Return2 = 0, 
  195   inform('Annotation:',[]),
  196   atomic_list_concat(['cat',GFile],' ',Shell3),
  197   shell(Shell3,Return3), Return3 = 0, !.
  198
  199printTHG(_,_,_,_):-
  200   error('failed to access t and h file',[]).
  201
  202printTHG(_,_,_):-
  203   option('--info',false), !.
  204
  205printTHG(Dir,TFile,HFile):-
  206   option('--info',true), 
  207   inform('[=====> ~p <=====]',[Dir]),
  208   inform('Text:',[]),
  209   atomic_list_concat(['cat',TFile],' ',Shell1),
  210   shell(Shell1,Return1), Return1 = 0,
  211   inform('Hypothesis:',[]),
  212   atomic_list_concat(['cat',HFile],' ',Shell2),
  213   shell(Shell2,Return2), Return2 = 0, !.
  214
  215printTHG(_,_,_):-
  216   error('failed to access t and h file',[]).
  217
  218
  219/*------------------------------------------------------------------------
  220   Tokenise (init)
  221------------------------------------------------------------------------*/
  222
  223tokenise(Dir,Overlap):-
  224   atomic_list_concat([Dir,'/','t'],TFile),
  225   atomic_list_concat([Dir,'/','h'],HFile),   
  226   atomic_list_concat([Dir,'/','t.tok'],TFileTOK),
  227   atomic_list_concat([Dir,'/','h.tok'],HFileTOK),
  228   tokeniseFile(TFile,TFileTOK),
  229   tokeniseFile(HFile,HFileTOK),
  230   bagofwords(TFileTOK,TWords),
  231   bagofwords(HFileTOK,HWords),
  232   overlap(TWords,HWords,Overlap).
  233
  234
  235/*------------------------------------------------------------------------
  236   Tokenise
  237------------------------------------------------------------------------*/
  238
  239tokeniseFile(In,Out):-
  240   atomic_list_concat(['bin/tokkie',
  241                '--quotes',delete,
  242                '--input',In,
  243                '--output',Out],' ',Shell),
  244   write(Shell), nl,
  245   shell(Shell,Return), 
  246   Return = 0, !.
  247
  248tokeniseFile(In,_):-
  249   error('problem tokenising ~p',[In]), 
  250   !, fail.
  251
  252
  253/* ------------------------------------------------------------------------
  254   Bag of words overlap: 
  255   the higher the overlap, the more likely an entailment
  256
  257   T : a b c d      a b c    a b c       a b 
  258   H :     c d e      b          c d e       c d
  259   O = 2/3          1/1      1/3         0/2
  260------------------------------------------------------------------------ */
  261
  262bagofwords(File,Bag):-
  263   open(File,read,Stream),
  264   read_line_to_codes(Stream,Codes),
  265   close(Stream),
  266   atom_codes(Atom,Codes),
  267   atomic_list_concat(Bag,' ',Atom).
  268
  269overlap(T,H,Overlap):-
  270   list_to_ord_set(T,TO),
  271   list_to_ord_set(H,HO),
  272   ord_intersection(TO,HO,Intersection),
  273   length(Intersection,CardTandH),
  274   length(HO,CardH), CardH > 0,
  275   Overlap is CardTandH/CardH.
  276
  277
  278/*------------------------------------------------------------------------
  279   File preparation (adding META markup)
  280------------------------------------------------------------------------*/
  281
  282meta(Dir):-
  283   atomic_list_concat([Dir,'/','t.tok'],TFile),
  284   access_file(TFile,read),
  285   atomic_list_concat([Dir,'/','h.tok'],HFile),
  286   access_file(HFile,read), 
  287   atomic_list_concat([Dir,'/','th.tok'],THFile),
  288   atomic_list_concat([cat,TFile,HFile,'>',THFile],' ',Shell), 
  289   write(Shell), nl,
  290   shell(Shell,0), !.
  291
  292meta(Dir):-
  293   error('directory ~p does not contain files named t.tok and h.tok',[Dir]), 
  294   !, fail.   
  295
  296
  297/*------------------------------------------------------------------------
  298   Parse (init)
  299------------------------------------------------------------------------*/
  300
  301parse(Dir):-
  302   atomic_list_concat([Dir,'/', 't.tok'], TFileTOK),
  303   atomic_list_concat([Dir,'/', 'h.tok'], HFileTOK),   
  304   atomic_list_concat([Dir,'/','th.tok'],THFileTOK),   
  305   access_file( TFileTOK,read),
  306   access_file( HFileTOK,read),
  307   access_file(THFileTOK,read), !,
  308   atomic_list_concat([Dir,'/', 't.ccg'], TFileCCG),
  309   atomic_list_concat([Dir,'/', 'h.ccg'], HFileCCG),
  310   atomic_list_concat([Dir,'/','th.ccg'],THFileCCG),
  311   parse( TFileTOK, TFileCCG),
  312   parse( HFileTOK, HFileCCG),
  313   parse(THFileTOK,THFileCCG).
  314
  315parse(Dir):-
  316   error('directory ~p does not contain files named *.tok',[Dir]), 
  317   !, fail.   
  318
  319
  320/*------------------------------------------------------------------------
  321   Parse
  322------------------------------------------------------------------------*/
  323
  324parse(In,Out):-
  325   option('--soap',true),
  326   atomic_list_concat(['bin/soap_client',
  327                '--url http://localhost:9000',
  328                '--input',In,
  329                '--output',Out],' ',Shell),
  330   write(Shell), nl,
  331   shell(Shell,0), !.
  332
  333parse(In,Out):-
  334   option('--soap',false),
  335   atomic_list_concat(['bin/candc',
  336                '--input',In,
  337                '--output',Out,
  338                '--models models/boxer',
  339                '--candc-printer boxer'],' ',Shell),
  340   write(Shell), nl,
  341   shell(Shell,0), !.
  342
  343parse(In,_):-
  344   error('cannot parse ~p',[In]), 
  345   !, fail.   
  346
  347
  348/*------------------------------------------------------------------------
  349   Boxer (init)
  350------------------------------------------------------------------------*/
  351
  352box(Dir):-
  353   ( option('--wsd',true), !, Ext = 'ccg.wsd'; Ext = 'ccg' ),
  354   atomic_list_concat([Dir,'/', 't.',Ext], TFileCCG),
  355   atomic_list_concat([Dir,'/', 'h.',Ext], HFileCCG),
  356   atomic_list_concat([Dir,'/','th.',Ext],THFileCCG),
  357   access_file( TFileCCG,read),
  358   access_file( HFileCCG,read),
  359   access_file(THFileCCG,read), !,
  360   atomic_list_concat([Dir,'/', 't.drs'], TFileDRS),
  361   atomic_list_concat([Dir,'/', 'h.drs'], HFileDRS),
  362   atomic_list_concat([Dir,'/','th.drs'],THFileDRS),
  363   box( TFileCCG,TFileDRS),
  364   box( HFileCCG,HFileDRS),
  365   box(THFileCCG,THFileDRS).
  366
  367box(Dir):-
  368   error('directory ~p does not contain files named t.ccg and h.ccg',[Dir]), 
  369   !, fail.   
  370
  371
  372/*------------------------------------------------------------------------
  373   Boxer 
  374------------------------------------------------------------------------*/
  375
  376box(In,Out):-
  377   option('--plural',PluralOpt), 
  378   option('--modal',ModalOpt), 
  379%  option('--vpe',VpeOpt), 
  380   option('--copula',CopOpt), 
  381   option('--warnings',WarOpt), 
  382   option('--roles',RolesOpt), 
  383   option('--resolve',ResolveOpt), 
  384   option('--nn',NNOpt), 
  385   option('--x',XOpt), 
  386   atomic_list_concat([Out,xml],'.',OutXML),
  387   atomic_list_concat(['bin/boxer',
  388                '--input',In,
  389                '--output',OutXML,
  390                '--plural',PluralOpt,
  391                '--modal',ModalOpt,
  392                '--copula',CopOpt,
  393                '--roles',RolesOpt,
  394		'--format',xml,
  395                '--nn',NNOpt,
  396                '--x',XOpt,
  397                '--elimeq',false,
  398                '--resolve',ResolveOpt,
  399                '--integrate',true,
  400                '--warnings',WarOpt,
  401                '--box'],' ',ShellXML),
  402   shell(ShellXML,_),
  403   atomic_list_concat(['bin/boxer',
  404                '--input',In,
  405                '--output',Out,
  406                '--plural',PluralOpt,
  407                '--modal',ModalOpt,
  408                '--copula',CopOpt,
  409                '--roles',RolesOpt,
  410                '--nn',NNOpt,
  411                '--x',XOpt,
  412                '--elimeq',false,
  413                '--resolve',ResolveOpt,
  414                '--integrate',true,
  415                '--warnings',WarOpt,
  416                '--box'],' ',Shell),
  417   write(Shell), nl,
  418   shell(Shell,Return),
  419   Return = 0, !.
  420
  421box(In,_):-
  422   error('cannot box ~p',[In]), 
  423   !, fail.   
  424
  425
  426/*------------------------------------------------------------------------
  427   WSD (init)
  428------------------------------------------------------------------------*/
  429
  430wsd(Dir):-
  431   option('--wsd',true), 
  432   atomic_list_concat([Dir,'/','t.ccg'],TFileCCG),
  433   atomic_list_concat([Dir,'/','h.ccg'],HFileCCG),
  434   atomic_list_concat([Dir,'/','th.ccg'],THFileCCG),
  435   access_file(TFileCCG,read),
  436   access_file(HFileCCG,read), !,
  437   access_file(THFileCCG,read), !,
  438   atomic_list_concat([Dir,'/','t.ccg.wsd'],TFileWSD),
  439   atomic_list_concat([Dir,'/','h.ccg.wsd'],HFileWSD),
  440   atomic_list_concat([Dir,'/','th.ccg.wsd'],THFileWSD),
  441   wsd(TFileCCG,TFileWSD),
  442   wsd(HFileCCG,HFileWSD),
  443   wsd(THFileCCG,THFileWSD).
  444
  445wsd(Dir):-
  446   option('--wsd',true), 
  447   error('directory ~p does not contain files named t.ccg and h.ccg',[Dir]), 
  448   !, fail.   
  449
  450wsd(_):- 
  451   option('--wsd',false).
  452
  453
  454/*------------------------------------------------------------------------
  455   WSD (external) 
  456------------------------------------------------------------------------*/
  457
  458wsd(CCG,WSD):-
  459   atomic_list_concat(['ext/wsd.pl',
  460                       '--input',CCG,
  461                       '--output',WSD,
  462                       '--slearner','ext/senselearner/'],' ',Shell),
  463   write(Shell), nl,
  464   shell(Shell,Return),
  465   Return = 0, !.
  466
  467wsd(_,In):-
  468   error('cannot wsd ~p',[In]), 
  469   !, fail.   
  470
  471
  472/* =======================================================================
  473   Textual Entailment (logical inference)
  474========================================================================*/
  475
  476nc(Dir,KT,KH,KTH):-
  477   openInput(Dir),
  478
  479   inputDRS(t,TDRS), 
  480   inputDRS(h,HDRS), 
  481   inputDRS(th,THDRS), 
  482
  483   countingAxioms([],Ax),
  484   consistent(  TDRS, Ax, Dir,  t, 1,ModT), domSize(ModT,DomT),
  485   consistent(  HDRS, Ax, Dir,  h, 1,ModH),   
  486   consistent( THDRS, Ax, Dir, th, DomT,ModTH),
  487
  488   informative(TDRS,THDRS, Ax, Dir, tth, 1, _),
  489
  490   countingAxioms(KT,KTAx),
  491   bk(ModT,KTAx,KTBAx),
  492   consistent(  TDRS, KTBAx, Dir,  kt, 1,ModKT), domSize(ModKT,DomKT),
  493
  494   countingAxioms(KH,KHAx),
  495   bk(ModH,KHAx,KHBAx),
  496   consistent(  HDRS, KHBAx, Dir, kh, 1, _), 
  497
  498   countingAxioms(KTH,KTHAx),
  499   bk(ModTH,KTHAx,KTHBAx),
  500   consistent( THDRS,KTHBAx, Dir, kth, DomKT,_),
  501   informative(TDRS,THDRS,KTHBAx, Dir,ktkth, 1, _).
  502
  503
  504/* =======================================================================
  505   Load Axioms
  506========================================================================*/
  507
  508axioms(_):-
  509   option('--axioms',File), 
  510   File = none, !.
  511
  512axioms(Dir):-
  513   option('--axioms',File), 
  514   access_file(File,read),
  515   catch(load_files([File],[autoload(true),encoding(utf8)]),_,fail),
  516   findall(imp(A,B),imp(A,B),Axioms), !,
  517   preprocessAxioms(Axioms,Dir,0).
  518   
  519axioms(_):-
  520   option('--axioms',File), 
  521   error('cannot access axioms ~p',[File]).
  522
  523
  524/* =======================================================================
  525   Process Axioms
  526========================================================================*/
  527
  528preprocessAxioms([],_,N):-
  529   inform('Background knowledge: ~p axioms',[N]).
  530
  531preprocessAxioms([imp(A,B)|L],Dir,M):-
  532   option('--modal',true), N is M+1,
  533   drs2fol(drs([],[nec(drs([],[imp(A,B)]))]),Axiom), 
  534   drs2fol(A,Antecedent), !,
  535   callTPandMB(Dir,[],not(Antecedent),Antecedent,1,10,Model,_Engine),
  536   Model = model(_,F),
  537   findall(Sym,member(f(_,Sym,_),F),Symbols),
  538   assert(axiom(N,Symbols,Axiom)),
  539   preprocessAxioms(L,Dir,N).
  540
  541preprocessAxioms([imp(A,B)|L],Dir,M):-
  542   option('--modal',false), N is M+1,
  543   drs2fol(drs([],[imp(A,B)]),Axiom), !,
  544   callTPandMB(Dir,[],not(Axiom),Axiom,1,10,Model,_Engine),
  545   Model = model(_,F),
  546   findall(Sym,member(f(_,Sym,_),F),Symbols),
  547   assert(axiom(N,Symbols,Axiom)),
  548   preprocessAxioms(L,Dir,N).
  549
  550
  551/* =======================================================================
  552   Include Background Knowledge
  553========================================================================*/
  554
  555bk(model(_,F),In,Out):-
  556   findall(N,axiom(N,_,_),L),
  557   bk(L,F,0,In,Out).
  558
  559bk([],_,N,A,A):-
  560   inform('added a total of ~p axioms',[N]).
  561
  562bk([A|L],F,N1,In,[Axiom|Out]):-
  563   axiom(A,Symbols,Axiom),
  564   member(f(1,X,_),F), 
  565   member(X,Symbols), 
  566   \+ X = n1numeral,
  567   !,
  568   inform('added axiom ~p triggered by ~p',[A,X]),
  569   N2 is N1 + 1,
  570   bk(L,F,N2,In,Out).
  571
  572bk([_|L],F,N,In,Out):-
  573   bk(L,F,N,In,Out).
  574
  575
  576/* =======================================================================
  577   Textual Entailment (WordNet)
  578========================================================================*/
  579
  580mwn(Dir,AxiomsKT,AxiomsKH,AxiomsKTH,Novelty):-
  581   openInput(Dir),
  582   inputDRS(t,TDRS), computeMWN(TDRS,Dir,kt,DomT),  
  583   axiomsWN(WNAxiomsKT), 
  584   axiomsFN(TDRS,FNAxiomsKT), 
  585   append(WNAxiomsKT,FNAxiomsKT,AxiomsKT),
  586   inputDRS(h,HDRS), computeMWN(HDRS,Dir,kh,DomH), 
  587   axiomsWN(WNAxiomsKH), axiomsFN(HDRS,FNAxiomsKH),
  588   append(WNAxiomsKH,FNAxiomsKH,AxiomsKH),
  589   inputDRS(th,THDRS), computeMWN(THDRS,Dir,kth,DomTH), 
  590   axiomsWN(WNAxiomsKTH), axiomsFN(THDRS,FNAxiomsKTH),
  591   append(WNAxiomsKTH,FNAxiomsKTH,AxiomsKTH),
  592   computeNovelty(DomT,DomH,DomTH,Novelty).
  593
  594
  595/* =======================================================================
  596   Inference -- consistency check
  597========================================================================*/
  598
  599consistent(_,_,Dir,Name,DomSize,Model):- 
  600   DomSize = 0, !, 
  601   Model = model([],[]),
  602   outputModel(Model,Name,Dir,DomSize),
  603   inform('previously inconsistent, no inference for ~p',[Name]).
  604
  605consistent(B,BK,Dir,Name,MinDom,Model):-
  606   drs2fol(B,F),
  607   option('--domsize',MaxDom),
  608   callTPandMB(Dir,BK,not(F),F,MinDom,MaxDom,TmpModel,TmpEngine),
  609   ( member(Name,[kt,kh,kth]), !, callMBbis(Dir,BK,F,TmpModel,Model,TmpEngine,Engine)
  610   ; TmpModel = Model, TmpEngine = Engine ),
  611   outputModel(Model,Name,Dir,DomSize),
  612   ( DomSize > 0, !, Result = 'consistent'
  613   ; DomSize = 0, !, Result = 'inconsistent'
  614   ; DomSize < 0,    Result = 'unknown' ),
  615   inform('~p found result for ~p (~p, domain size: ~p)',[Engine,Name,Result,DomSize]).
  616
  617
  618/* =======================================================================
  619   Inference -- informativeness check
  620========================================================================*/
  621
  622informative(B1,B2,BK,Dir,Name,MinDom,Model):-
  623   drs2fol(B1,F1),
  624   drs2fol(B2,F2),
  625   F = imp(F1,F2),
  626   option('--domsize',MaxDom),
  627   callTPandMB(Dir,BK,F,not(F),MinDom,MaxDom,Model,Engine),
  628   outputModel(Model,Name,Dir,DomSize),
  629   ( DomSize > 0, !, Result = 'informative'
  630   ; DomSize = 0, !, Result = 'uninformative'
  631   ; DomSize < 0,    Result = 'unknown' ),
  632   inform('~p found result for ~p (~p, domain size: ~p)',[Engine,Name,Result,DomSize]).
  633
  634
  635/* =======================================================================
  636   Prediction (try inference first, else back off to WordNet)
  637========================================================================*/
  638 
  639prediction(Dir,WNNovelty,Overlap):-
  640
  641   openModel(Dir,t,ModT),     openModel(Dir,h,ModH),
  642   openModel(Dir,th,ModTH),   openModel(Dir,tth,ModTNH),
  643   openModel(Dir,kt,ModKT),   openModel(Dir,kh,ModKH),
  644   openModel(Dir,kth,ModKTH), openModel(Dir,ktkth,ModKTNH),
  645
  646   domSize(ModT,DomT),        domSize(ModH,DomH),
  647   domSize(ModTH,DomTH),      domSize(ModTNH,DomTNH),
  648   domSize(ModKT,DomKT),      domSize(ModKH,DomKH),
  649   domSize(ModKTH,DomKTH),    domSize(ModKTNH,DomKTNH),
  650
  651   relSize(ModKT,RelKT),   
  652   relSize(ModKH,RelKH),
  653   relSize(ModKTH,RelKTH),
  654
  655   SizeKT is DomKT*RelKT,    % modelSize(ModKT,SizeKT),
  656   SizeKH is DomKH*RelKH,    % modelSize(ModKH,SizeKH),
  657   SizeKTH is DomKTH*RelKTH, % modelSize(ModKTH,SizeKTH),
  658
  659%  compareModels(Dir,ModKTH,ModKT),
  660
  661   computeNovelty(DomKT,DomKH,DomKTH,DomNovelty),
  662   computeNovelty(SizeKT,SizeKH,SizeKTH,SizeNovelty),
  663   computeNovelty(RelKT,RelKH,RelKTH,RelNovelty),
  664
  665   makePrediction(DomT,DomH,DomTH,DomTNH,DomKT,DomKH,DomKTH,DomKTNH,
  666                  DomNovelty,RelNovelty,WNNovelty,Overlap,Prediction), !,
  667
  668   outputPrediction(Dir,Prediction,DomKTNH,DomKTH,
  669                    DomNovelty,RelNovelty,WNNovelty,SizeNovelty,Overlap).
  670
  671
  672/* =======================================================================
  673   Make prediction...
  674
  675   makePrediction( +T,  +H,  +TH,  +TnotH,   %%% model sizes without BK
  676              +KT, +KH, +KTH, +KTnotH,   %%% model sizes wit BK
  677              +DomNovelty, 
  678              +RelNovelty, 
  679              +WNNovelty, 
  680              +WordOverlap, 
  681              -Prediction )              %%% prediction description
  682========================================================================*/
  683
  684% PROOF without BK by THEOREM PROVER: INPUT INCONSISTENT
  685%
  686makePrediction(T,H,_,_,_,_,_,_,_,_,_,_,Prediction):-
  687   option('--contradiction',true), 
  688   (T = 0; H = 0), !,
  689   Prediction = 'unknown (simple input contradiction)'.
  690
  691% PROOF without BK by THEOREM PROVER: INCONSISTENT
  692%
  693makePrediction(T,H,TH,_,_,_,_,_,_,_,_,_,Prediction):-
  694   option('--contradiction',true), 
  695   T > 0, H > 0, TH = 0, !,
  696   Prediction = 'informative (simple inconsistency)'.
  697
  698% PROOF without BK by THEOREM PROVER: UNINFORMATIVE (ENTAILMENT)
  699%
  700makePrediction(T,H,TH,TNH,_,_,_,_,_,_,_,_,Prediction):-
  701   T > 0, H > 0, TH > 0, TNH = 0, !,
  702   Prediction = 'entailed (simple proof)'. 
  703
  704% PROOF with BK by THEOREM PROVER: INPUT INCONSISTENT
  705%
  706makePrediction(T,H,TH,_,KT,KH,_,_,_,_,_,_,Prediction):-
  707   option('--contradiction',true), 
  708   T > 0, H > 0, TH > 0, (KT = 0; KH = 0),  !,
  709   Prediction = 'unknown (complex input contradiction)'.
  710
  711% PROOF with BK by THEOREM PROVER: INCONSISTENT
  712%
  713makePrediction(T,H,TH,_,KT,KH,KTH,_,_,_,_,_,Prediction):-
  714   option('--contradiction',true), 
  715   T > 0, H > 0, TH > 0, KT > 0, KH > 0, KTH = 0, !,
  716   Prediction = 'informative (complex inconsistency)'.
  717
  718% PROOF with BK by THEOREM PROVER: UNINFORMATIVE
  719%
  720makePrediction(T,H,TH,TNH,KT,KH,KTH,KTNH,_,_,_,_,Prediction):-
  721    T > 0,  H > 0,  TH > 0,  TNH > 0, 
  722   KT > 0, KH > 0, KTH > 0, KTNH = 0, !,
  723   Prediction = 'entailed (complex proof)'. %%% proof with BK
  724
  725% WORDNET NOVELTY
  726%
  727makePrediction(_,_,_,_,_,_,_,_,DomNovelty,_,WNNovelty,_,Prediction):-
  728   option('--modal',false), WNNovelty >= 0, DomNovelty < 0, !, 
  729   %%% DRS but no model could be computed, back off to WN novelty
  730%  Threshold = 0.416667, %%% RTE-2 dev  (J48, 59.6%, n= 768)
  731%  Threshold = 0.25,     %%% RTE-2 test (J48, 59.1%, n= 766)
  732%  Threshold = 0.4     , %%% RTE-2      (J48, 58.6%, n=1534)
  733%  Threshold = 0.363636, %%% RTE-3 dev  (J48, 58.7%, n= 770)
  734%  Threshold = 0.375,    %%% RTE-3 test (J48, 55.9%, n= 782) 
  735%  Threshold = 0.375,    %%% RTE-3      (J48, 59.6%, n=1552)
  736   Threshold = 0.375,    %%% RTE-2+3    (J48, 59.6%, n=3086)  
  737   ( WNNovelty =< Threshold, !
  738   , Prediction = 'entailed (wordnet novelty)'
  739   ; Prediction = 'informative (wordnet novelty)' ).
  740   
  741% WORD OVERLAP
  742%
  743makePrediction(_,_,_,_,_,_,_,_,_,_,_,Overlap,Prediction):-
  744%  Threshold = 0.576923, %%% RTE-2 dev  (J48, 60.6%)
  745%  Threshold = 0.533333, %%% RTE-2 test (J48, 56.1%)
  746%  Threshold = 0.55,     %%% RTE-2      (J48, 58.8%)
  747%  Threshold = 0.692308, %%% RTE-3 dev  (J48, 61.3%)
  748%  Threshold = 0.4,      %%% RTE-3 test (J48, 57.6%)
  749%  Threshold = 0.428571, %%% RTE-3      (J48, 60.8%)
  750   Threshold = 0.55,     %%% RTE-2+3    (J48, 60.6%)
  751   ( Overlap > Threshold, !
  752   , Prediction = 'entailed (word overlap)'
  753   ; Prediction = 'informative (word overlap)' ).
  754
  755% MODEL NOVELTY
  756%
  757makePrediction(_,_,_,_,_,_,_,_,DomNovelty,_,_,_,Prediction):-
  758%  Threshold = 0.4,      %%% RTE-2 dev  ( 710 instances, J48, 63,6%) model -> 58.1; rel -> 62.9
  759%  Threshold = 0.375,    %%% RTE-2 test ( 692 instances, J48, 58.4%) model -> 52.0; rel -> 60.0
  760%  Threshold = 0.416777, %%% RTE-2      (1412 instances, J48, 60.9%) model -> 57.2; rel -> 60.0
  761%  Threshold = 0.375,    %%% RTE-3 dev  ( 656 instances, J48, 58.4%) model -> 58.8; rel -> 60.8
  762%  Threshold = 0.387,    %%% RTE-3 test ( 686 instances, J48, 58.2%) model -> 58.2; rel -> 59.5
  763%  Threshold = 0.6,      %%% RTE-3      (1342 instances, J48, 59,6%) model -> 57.9; rel -> 59.0
  764   Threshold = 0.416667, %%% RTE-2+3    (2758 instances, J48, 60,3%) model -> 57.9; rel -> 60.8
  765   ( DomNovelty =< Threshold, !
  766   , Prediction = 'entailed (model novelty)'
  767   ; Prediction = 'informative (model novelty)' ).
  768
  769
  770/* =======================================================================
  771   Output Model
  772========================================================================*/
  773
  774outputModel(Model,Name,Dir,Size):-
  775   atomic_list_concat([Dir,'/',Name,'.mod'],File),
  776   open(File,write,Stream),
  777   printModel(Model,Stream), 
  778   write(Stream,'.'), nl(Stream),
  779   close(Stream),
  780   domSize(Model,Size).
  781
  782
  783/* =======================================================================
  784   Print Model
  785========================================================================*/
  786
  787printModel(model(D,[]),Stream):- !, format(Stream,'model(~p, [])',[D]).
  788
  789printModel(model(D,[F]),Stream):- !, format(Stream,'model(~p,~n  [~p])',[D,F]).
  790
  791printModel(model(D,[X,Y|F]),Stream):- !,
  792   setof(M,Sym^Ext^(member(M,[X,Y|F]),\+ M=f(0,Sym,Ext)),[First|Sorted]),
  793   format(Stream,'model(~p,~n  [~p,~n',[D,First]),
  794   printModel(Sorted,Stream).
  795
  796printModel([Last],Stream):- !, format(Stream,'   ~p])',[Last]).
  797
  798printModel([X|L],Stream):- !, 
  799   format(Stream,'   ~p,~n',[X]), 
  800   printModel(L,Stream).
  801
  802printModel(Model,Stream):- write(Stream,Model).
  803
  804
  805
  806/* =======================================================================
  807   Determine Model Size (Domain)
  808
  809modelSize(Model,Size):- 
  810   Model = model(_,F), !,
  811   modelSize(F,0,Size).
  812
  813modelSize(_,-1).
  814
  815modelSize([],S,S).
  816
  817modelSize([f(_,Symbol,[E|Xtension])|L],Old,New):-
  818   idf(Symbol,IDF,_), !,
  819   length([E|Xtension],N), 
  820   Temp is Old+(IDF*N),
  821   modelSize(L,Temp,New).
  822   
  823modelSize([_|L],Old,New):-
  824   modelSize(L,Old,New).
  825========================================================================*/
  826
  827compareModels(Dir,model(_,F1),model(_,F2)):- !, 
  828   atomic_list_concat([Dir,'/','novel.txt'],File),
  829   open(File,write,Stream),
  830   compareExtensions(F1,F2,Stream),
  831   close(Stream).
  832
  833compareModels(_,_,_).
  834
  835
  836compareExtensions([],_,_).
  837
  838compareExtensions([f(Arity,Sym,[_|_])|L],F,Stream):- 
  839   ( member(f(Arity,Sym,[_|_]),F), !
  840   ; write(Stream,Sym), nl(Stream)),
  841   compareExtensions(L,F,Stream).
  842
  843compareExtensions([_|L],F,Stream):- 
  844   compareExtensions(L,F,Stream).
  845
  846
  847/* =======================================================================
  848   Determine Domain Size 
  849========================================================================*/
  850
  851domSize(Model,Size):-
  852   Model = model(Dom,_), !,
  853   length(Dom,Size).
  854
  855domSize(_,-1).
  856
  857
  858/* =======================================================================
  859   Determine Model Size (Relations)
  860========================================================================*/
  861
  862relSize(Model,Size):-
  863   Model = model(_,F), !,
  864%  findall(R,(member(f(2,_,E),F),member(R,E)),Rs),
  865   findall(R,(member(f(_,_,E),F),member(R,E)),Rs),
  866   length(Rs,Size).
  867
  868relSize(_,-1).
  869
  870
  871/* =======================================================================
  872   Output Prediction
  873========================================================================*/
  874
  875outputPrediction(Dir,Prediction,Proof,Contra,DomNovelty,RelNovelty,WNNovelty,SizeNovelty,Overlap):-
  876   atomic_list_concat([Dir,'/','prediction.txt'],File),
  877   open(File,write,Stream),
  878   write(Stream,Prediction), nl(Stream),
  879   close(Stream),
  880   inform('prediction: ~p',[Prediction]),
  881   outputDomSizeDif(Dir,Proof,Contra,DomNovelty,RelNovelty,WNNovelty,SizeNovelty,Overlap).
  882
  883
  884/* =======================================================================
  885   Output Domain Size Difference
  886========================================================================*/
  887
  888outputDomSizeDif(Dir,Proof,Contradiction,Dom,Rel,WordNet,Model,Overlap):-
  889   atomic_list_concat([Dir,'/','modsizedif.txt'],File),
  890   open(File,write,Stream),
  891   ( Contradiction=0, !, Prover=contradiction
  892   ; Proof=0, !, Prover=proof
  893   ; Prover=unknown ),
  894   format(Stream,'~p.   % prover output    ~n',[Prover]),
  895   format(Stream,'~p.   % domain novelty   ~n',[Dom]),
  896   format(Stream,'~p.   % relation novelty ~n',[Rel]),
  897   format(Stream,'~p.   % wordnet novelty  ~n',[WordNet]),
  898   format(Stream,'~p.   % model novelty    ~n',[Model]),
  899   format(Stream,'~p.   % word overlap     ~n',[Overlap]),
  900   close(Stream).
  901
  902
  903/* ========================================================================
  904   Compute Novelty of H given T
  905======================================================================== */
  906
  907computeNovelty(SizeT,SizeH,SizeTH,Novelty):-
  908   SizeT > 0, SizeH > 0, SizeTH > 0, !,
  909   Novelty is 1-((SizeTH-SizeT)/SizeH).
  910
  911computeNovelty(_,_,_,-1).
  912
  913
  914/* ========================================================================
  915   MiniWordNet
  916======================================================================== */
  917
  918computeMWN(DRS,Dir,File,Size):-   
  919   option('--wordnet',true), !,
  920   clearMWN,
  921   compConcepts(DRS,_),
  922   compISA,
  923   addTopMWN,                   %%% this can cause inconsistencies!
  924%  cutDownMWN,
  925   sizeMWN(Size),
  926   outputMWN(Dir,File),
  927   graphMWN(Dir,File).
  928
  929computeMWN(_,_,_,0).
  930
  931
  932/* =======================================================================
  933   Version
  934========================================================================*/
  935
  936version:-
  937   option('--version',do), !,
  938   version(V),
  939   format(user_error,'~p~n',[V]).
  940
  941version.
  942
  943
  944/* =======================================================================
  945   Help
  946========================================================================*/
  947
  948help:-
  949   option('--help',do), !,
  950   format(user_error,'usage: nc [options]~n~n',[]),
  951   showOptions(nutcracker).
  952
  953help:-
  954   option('--help',dont), !.
  955
  956
  957/* =======================================================================
  958   Definition of start
  959========================================================================*/
  960
  961start:-
  962   current_prolog_flag(argv,[_Comm|Args]), 
  963   \+ Args = [],
  964   set_prolog_flag(float_format,'%.20g'),
  965   setDefaultOptions(nutcracker), 
  966   parseOptions(nutcracker,Args),
  967   shell('chmod 755 src/prolog/nutcracker/startTPandMB.pl', Return),
  968   Return = 0,
  969%  catch(load_files(['working/symidf.pl'],[autoload(true),encoding(utf8)]),_,fail),
  970   main, !,
  971   halt.
  972
  973start:- 
  974   setDefaultOptions(nutcracker), 
  975   setOption(nutcracker,'--help',do), !,
  976   help,
  977   halt