2/* ========================================================================
    3   File Search Paths
    4=========================================================================*/
    5
    6:- prolog_load_context(file,File),
    7   absolute_file_name('..',X,[relative_to(File),file_type(directory)]),
    8   asserta(user:file_search_path(candc,X)).    9
   10:- set_prolog_flag(double_quotes,codes).   11
   12user:file_search_path(semlib,     candc(lib)).
   13user:file_search_path(boxer,      candc(boxer)).
   14user:file_search_path(knowledge,  boxer(knowledge)).
   15user:file_search_path(lex,        boxer(lex)).
   16
   17
   18/* ========================================================================
   19   VerbNet
   20=========================================================================*/
   21
   22:- dynamic verbnet/3.   23
   24/* ========================================================================
   25   Modules
   26=========================================================================*/
   27
   28:- use_module(library(lists),[member/2,reverse/2,append/3]).   29:- use_module(boxer(slashes)).   30:- use_module(knowledge(roles),[old2new/2]).   31
   32
   33/* ========================================================================
   34   Main
   35=========================================================================*/
   36
   37verbnet2prolog(File):- 
   38   load_xml_file(File,T), 
   39%  pretty_print(T,0),
   40   elements(T,['VNCLASS'],f(X,C)),   
   41   value(X,'ID',ID),
   42   members(C,File,ID,_).
   43
   44
   45/* ----------------------------------------------------------------------
   46   Pretty Printing XML
   47---------------------------------------------------------------------- */ 
   48
   49pretty_print([],_).
   50
   51pretty_print([element(A,B,C)|L],Tab):- !,
   52   tab(Tab), write(A), write(' '), write(B), nl,
   53   NewTab is Tab+3, 
   54   pretty_print(C,NewTab),
   55   pretty_print(L,Tab).
   56
   57pretty_print([E|L],Tab):-  
   58   tab(Tab), write(unknown:E),nl,
   59   pretty_print(L,Tab).
   60
   61
   62/* ----------------------------------------------------------------------
   63   Processing all members of a VerbNet class
   64---------------------------------------------------------------------- */ 
   65
   66members(X,File,XID,Names):-
   67   findall(Sub:YID,(elements(X,['SUBCLASSES','VNSUBCLASS'],f(Y,Sub)),value(Y,'ID',YID)),Subs),
   68   subclasses(Subs,File,Names1),
   69   findall(Frame,(elements(X,['FRAMES','FRAME'],f(_,Frame))),Frames),
   70   findall(Name,(elements(X,['MEMBERS','MEMBER'],f(Member,_)),value(Member,name,Name)),Names2),
   71   append(Names1,Names2,Names),
   72   frames(Frames,Names,XID,File).
   73
   74
   75/* ----------------------------------------------------------------------
   76   Processing all subclasses of a VerbNet class
   77---------------------------------------------------------------------- */ 
   78
   79subclasses([],_,[]).
   80
   81subclasses([X:XID|L],File,Names):-
   82   members(X,File,XID,Names1),
   83   append(Names1,Names2,Names),
   84   subclasses(L,File,Names2).
   85
   86
   87/* ----------------------------------------------------------------------
   88   Process frames
   89---------------------------------------------------------------------- */ 
   90
   91frames([],_,_,_):- !.
   92
   93frames([Frame|L],Names,ID,File):-
   94   elements(Frame,['DESCRIPTION'],f(De,_)),   
   95   value(De,primary,Primary),
   96   example(Frame,Example),
   97   elements(Frame,['SYNTAX'],f(_,Syntax)),  
   98   subcat(Syntax,[],SubCat), 
   99%write(syntax:Syntax),nl,
  100%write(subcat:SubCat),nl,
  101   ccg(SubCat,C^C,CCG,Missing,Roles0),
  102   roles2roles(Roles0,Roles1),
  103   append(Wrong,[Role],Roles1),
  104   reverse(Wrong,Right),
  105   append(Right,[Role],Roles),
  106   atom_chars(ID,IDChars),
  107   formatID(IDChars,[_,_|FID]),
  108   reverse(SubCat,Normal),
  109   format('~n%%% File:    ~p~n%%% Primary: ~p (~p)~n%%% Syntax:  ~p~n',[File,Primary,ID,Normal]),
  110   write('%%% CCG:     '), write(CCG), 
  111   format('~n%%% Roles:   ~p~n',[Roles]),
  112   ( Missing = [], !; format('%%% Missing: ~p~n',[Missing]) ),
  113   format('%%% Example: ~p~n%%%~n',[Example]),
  114   frameMembers(Names,CCG,FID,Roles), !,
  115   addPP(SubCat,FID),
  116   frames(L,Names,ID,File).
  117
  118frames([Frame|L],Names,ID,File):-
  119   format('% frameproblem: ~p~n',[Frame]),
  120   frames(L,Names,ID,File).
  121
  122
  123/* ----------------------------------------------------------------------
  124   Add PP/NP categories with role
  125---------------------------------------------------------------------- */ 
  126
  127addPP([X,lex:as|L],FID):- !, 
  128   addPP([X,prep:as|L],FID).
  129
  130addPP([np:Old,prep:Preps|_],FID):- !,
  131   old2new(Old,New), 
  132   atomic_list_concat(L,' ',Preps),
  133   findall(_,(member(Prep,L),format('verbnet(~p, pp/np, [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
  134
  135addPP([vp_ng:Old,prep:Preps|_],FID):- !, 
  136   old2new(Old,New), 
  137   atomic_list_concat(L,' ',Preps),
  138   findall(_,(member(Prep,L),format('verbnet(~p, pp/(s:ng\\np), [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
  139
  140addPP([vp_to:Old,prep:Preps|_],FID):- !, 
  141   old2new(Old,New), 
  142   atomic_list_concat(L,' ',Preps),
  143   findall(_,(member(Prep,L),format('verbnet(~p, pp/(s:to\\np), [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
  144
  145addPP([s:Old,prep:Preps|_],FID):- !, 
  146   old2new(Old,New), 
  147   atomic_list_concat(L,' ',Preps),
  148   findall(_,(member(Prep,L),format('verbnet(~p, pp/s:_, [~q], ~p).~n',[Prep,New,FID]),add(pp/np,[New])),_).
  149
  150addPP(_,_).
  151
  152
  153/* ----------------------------------------------------------------------
  154   Check if there is an example for a frame
  155---------------------------------------------------------------------- */ 
  156
  157example(Frame,Example):-
  158   elements(Frame,['EXAMPLES','EXAMPLE'],f(_,[Example])), !.
  159
  160example(_,'error (no example)').  
  161
  162
  163/* ----------------------------------------------------------------------
  164   Process all members of a frame
  165---------------------------------------------------------------------- */ 
  166
  167frameMembers([],_,_,_).
  168
  169frameMembers([Name1|L],CCG,FID,Roles):-
  170   reformatName(Name1,Name2),
  171   format('verbnet(~q, ',[Name2]),
  172   write(CCG),
  173   format(', ~q, ~q).~n',[Roles,FID]),
  174   add(CCG,Roles),
  175   frameMembers(L,CCG,FID,Roles).
  176
  177
  178/* ----------------------------------------------------------------------
  179   Reformat Verbnet names (underscores for spaces)
  180---------------------------------------------------------------------- */ 
  181
  182reformatName(N1,N2):-
  183   atom_chars(N1,C1),
  184   reformatString(C1,C2),
  185   atom_chars(N2,C2).
  186
  187reformatString([],[]).
  188reformatString([' '|L1],['_'|L2]):- !, reformatString(L1,L2).
  189reformatString([C|L1],[C|L2]):- reformatString(L1,L2).
  190
  191
  192/* ----------------------------------------------------------------------
  193   Add entries to Prolog database
  194---------------------------------------------------------------------- */ 
  195
  196add(CCG,Roles):-
  197   verbnet(CCG,Roles,Old), !,
  198   New is Old + 1,
  199   retract(verbnet(CCG,Roles,Old)),
  200   assert(verbnet(CCG,Roles,New)).
  201
  202add(CCG,Roles):-
  203   assert(verbnet(CCG,Roles,1)).
  204
  205
  206/* ----------------------------------------------------------------------
  207   Format VerbNet ID
  208---------------------------------------------------------------------- */ 
  209
  210formatID(Chars,[Pre,Sep1|L]):-
  211   Seps = ['-','.'], member(Sep1,Seps),
  212   append(PreChars,[Sep1|RestChars],Chars), 
  213   \+ ( member(Sep2,Seps), member(Sep2,PreChars) ), !,
  214   formatNumber(PreChars,Pre),
  215   formatID(RestChars,L).
  216
  217formatID(Chars,[ID]):-
  218   formatNumber(Chars,ID).
  219
  220formatNumber(Chars,Num):-
  221   Chars = [First|_], 
  222   member(First,['0','1','2','3','4','5','6','7','8','9']), !, 
  223   number_chars(Num,Chars).
  224
  225formatNumber(Chars,Atom):-
  226   atom_chars(Atom,Chars).
  227
  228/* ----------------------------------------------------------------------
  229   Printing the subcat frame
  230---------------------------------------------------------------------- */ 
  231
  232subcat([],Acc,Acc).
  233subcat([E|L],Acc1,Acc3):- cat(E,Acc1,Acc2), subcat(L,Acc2,Acc3).
  234
  235
  236/* ----------------------------------------------------------------------
  237   Converting Verbnet Roles to LIRICS
  238---------------------------------------------------------------------- */ 
  239
  240roles2roles([],[]).
  241roles2roles([X|L1],[Y|L2]):- old2new(X,Y), roles2roles(L1,L2).
  242
  243
  244/* ----------------------------------------------------------------------
  245   Constructing CCG category
  246---------------------------------------------------------------------- */ 
  247
  248% terminating
  249%
  250ccg([np:_,pp],X^C,C,[],[]):- !, X=pp.
  251ccg([np:_,prep:_],X^C,C,[],[]):- !, X=pp.
  252ccg([vp_ng:_,prep:_],X^C,C,[],[]):- !, X=pp.
  253ccg([vp_to:_,prep:_],X^C,C,[],[]):- !, X=pp.
  254ccg([np:_,lex:as],X^C,C,[],[]):- !, X=pp.
  255ccg([np:R],np^C,C,[],[R]):- !.
  256ccg([s:R],(s:'_')^C,C,[],[R]):- !.
  257%ccg([(s:ng\np):R],(s:ng\np)^C,C,[],[R]):- !.
  258ccg([pp:_],pp^C,C,[],[]):- !. % needed?
  259ccg([X],X^C,C,[],[]):- !.
  260
  261% recursive
  262%
  263ccg([np:_,lex:as|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
  264ccg([np:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
  265ccg([s:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
  266ccg([vp_to:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
  267ccg([vp_ng:_,prep:_|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
  268ccg([np:_,pp|L],X^Old,New,M,Roles):- !, ccg(L,X^(Old/pp),New,M,Roles).
  269ccg([np:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/np),New,M,Oles).
  270ccg([s_to:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:to\np)),New,M,Oles).
  271ccg([vp_ng:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:ng\np)),New,M,Oles).
  272ccg([vp_to:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:ng\np)),New,M,Oles).
  273ccg([vp_b:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/(s:b\np)),New,M,Oles).
  274ccg([s:R|L],X^Old,New,M,[R|Oles]):- !, ccg(L,X^(Old/s:'_'),New,M,Oles).
  275ccg([v|L],X^Old,New,M,Roles):- !, X=(s:'_'\Y), ccg(L,Y^Old,New,M,Roles).
  276ccg([adv|L],Old,New,M,Roles):- !, ccg(L,Old,New,M,Roles).
  277%ccg([lex:(\'s)|L],Old,New,M,Roles):- !, ccg(L,Old,New,M,Roles).  %%% not always correct!
  278ccg([U|L],Old,New,[U|M],Roles):- !, ccg(L,Old,New,M,Roles).
  279
  280
  281/* ----------------------------------------------------------------------
  282   Syntactic Restrictions
  283---------------------------------------------------------------------- */ 
  284
  285restr(Restr,Type):- 
  286  Restr = [element('SYNRESTRS',[],L)],
  287  member(element('SYNRESTR',['Value'='+',type=Type],[]),L), !.
  288
  289ing(acc_ing).
  290ing(oc_ing).
  291ing(ac_ing).
  292ing(be_sc_ing).
  293ing(np_omit_ing). 
  294
  295inf(oc_to_inf).
  296inf(ac_to_inf).
  297inf(sc_to_inf).
  298inf(vc_to_inf).
  299inf(rs_to_inf). 
  300inf(to_inf_rs). 
  301
  302bare(oc_bare_inf).
  303
  304s_restr(np_to_inf).
  305s_restr(that_comp).
  306s_restr(for_comp).
  307s_restr(wh_comp).
  308s_restr(quotation).
  309
  310% s_restr(np_ppart).     % ??? 
  311%s_restr(np_p_ing).     % ???
  312s_restr(np_ing).       % ???
  313
  314s_restr(how_extract).
  315s_restr(what_extract).
  316
  317s_restr(wh_inf).
  318s_restr(what_inf).
  319s_restr(wheth_inf).
  320
  321
  322
  323
  324/* ----------------------------------------------------------------------
  325   Printing a category
  326---------------------------------------------------------------------- */ 
  327
  328cat(element('NP', [value=Value], R),A,[vp_ng:Value|A]):- ing(Ing), restr(R,Ing), !.
  329cat(element('NP', [value=Value], R),A,[vp_to:Value|A]):- inf(Inf), restr(R,Inf), !.
  330cat(element('NP', [value=Value], R),A,[vp_b:Value|A]):- bare(B), restr(R,B), !.
  331cat(element('NP', [value=Value], R),A,[s:Value|A]):-     s_restr(S), restr(R,S), !.
  332cat(element('NP', [value=Value], _),A,[np:Value|A]):- !.
  333cat(element('PREP', [], _),A,[pp|A]):- !.
  334cat(element('PREP', [value=Value], _),A,[prep:Value|A]):- !.
  335cat(element('LEX', [value='[+be]'], _),A,[lex:be|A]):- !. 
  336cat(element('LEX', [value='it[+be]'], _),A,[lex:be,lex:it|A]):- !.
  337cat(element('LEX', [value=at], _),A,[prep:at|A]):- !.
  338cat(element('LEX', [value=of], _),A,[prep:of|A]):- !.
  339cat(element('LEX', [value=Value], _),A,[lex:Value|A]):- !.
  340cat(element('VERB',[],[]),A,[v|A]):- !.
  341cat(element('ADJ',[],[]),A,[adj|A]):- !.
  342cat(element('ADV',[],[]),A,[adv|A]):- !.
  343cat(U,A,[unk:U|A]):- !.
  344
  345
  346/* ----------------------------------------------------------------------
  347   Processing elements of the XML tree
  348---------------------------------------------------------------------- */ 
  349
  350elements([element(X,F,L)|_],[X],f(F,L)).
  351elements([element(X,_,L)|_],[X|R],A):- elements(L,R,A).
  352elements([_|L],X,A):- elements(L,X,A).
  353
  354
  355/* ----------------------------------------------------------------------
  356   Accessing a value
  357---------------------------------------------------------------------- */ 
  358
  359value([Name=Value|_],Name,Value):- !.
  360value([_|L],Name,Value):- value(L,Name,Value).
  361
  362
  363/* ----------------------------------------------------------------------
  364   VerbNet Directory
  365---------------------------------------------------------------------- */ 
  366
  367verbnet_dir('ext/VerbNet/').
  368
  369
  370/* ----------------------------------------------------------------------
  371   Processing all XML files
  372---------------------------------------------------------------------- */ 
  373
  374process([]):- 
  375   planB.
  376
  377process([File|L]):-
  378   verbnet2prolog(File), !,
  379   process(L).
  380
  381
  382/* ----------------------------------------------------------------------
  383   Plan B (verbs that are not in VerbNet)
  384---------------------------------------------------------------------- */ 
  385
  386planB:- 
  387   setof(X,A^B^verbnet(X,A,B),L), 
  388   format('~n%%% Most frequent roles for a particular CCG category.~n%%%~n',[]),
  389   format('verbnet(_, ~p, [~q], []). % ~n',[s:adj\np,'Theme']),
  390   planB(L).
  391
  392planB([]).
  393
  394planB([CCG|L]):-
  395   verbnet(CCG,R,N), \+ (verbnet(CCG,_,M), M>N),
  396   write('verbnet(_, '), 
  397   write(CCG), 
  398   format(' , ~q, []). % n=~p~n',[R,N]), 
  399   planB(L).
  400
  401
  402/* ----------------------------------------------------------------------
  403   Header
  404---------------------------------------------------------------------- */ 
  405
  406header:-
  407   format('%%% automatically generated by src/prolog/lib/verbnet2boxer.pl~n%%%~n',[]),
  408   format(':- module(verbnet,[verbnet/3,verbnet/4]).~n',[]),
  409   format(':- use_module(boxer(slashes)).~n~n',[]),
  410   format('%%% wrapper~n%%%~nverbnet(A,B,C):- verbnet(A,B,C,_).~n').
  411
  412
  413/* ----------------------------------------------------------------------
  414   Wildcard for XML files to be processed
  415---------------------------------------------------------------------- */ 
  416
  417wildCard('*.xml').
  418%wildCard('addict-96.xml').
  419%wildCard('accompany-51.7.xml').
  420%wildCard('cooperate-73.xml').
  421%wildCard('manner_speaking-37.3.xml').
  422%wildCard('put-9.1.xml').
  423%wildCard('matter-91.xml').
  424%wildCard('run-51.3.2.xml').
  425%wildCard('adjust-26.9.xml').
  426%wildCard('amalgamate-22.2.xml').
  427%wildCard('instr_communication-37.4.xml').
  428
  429
  430/* ----------------------------------------------------------------------
  431   Start Predicate
  432---------------------------------------------------------------------- */ 
  433
  434run:- 
  435   verbnet_dir(Dir), 
  436   exists_directory(Dir),
  437   wildCard(WildCard),
  438   atom_concat(Dir,WildCard,Expand),
  439   expand_file_name(Expand,Files),
  440   header,
  441   process(Files), 
  442   halt.
  443
  444:- run.