:- module(binarise,[binarise/3]). :- use_module(library(lists),[member/2,select/3,append/3]). :- use_module(printError,[error/2,error/3]). /* ----------------------------------------------------------------------- Binarising: put branches in a list ----------------------------------------------------------------------- */ binarise(Tree,Name,Bin):- Tree =.. [tree,Node|Branches], !, binarise(t(Node,Branches),Name,Bin). /* ----------------------------------------------------------------------- Binarising: definite prepositions ------------------------------------------------------------------------ */ binarise(t(Cat,[h:T1,a:T2]),B,C):- T1 = tree('PREP',Tok1), T2 = tree(DPARG, h:tree('Dbar', h:tree('ART~DE', Tok2), a:Tree)), constituent(DPARG,'DP-ARG'), atom_codes(Tok1,Codes1), atom_codes(Tok2,Codes2), append(_,[47|Codes],Codes1), append(_,[47|Codes],Codes2), !, binarise(t(Cat,[h:tree('DEFPREP',Tok1),a:Tree]),B,C). /* ----------------------------------------------------------------------- Binarising: repair of embedded and relative clauses ------------------------------------------------------------------------ */ binarise(t(Cat,[h:T1,a:T2]),B,C):- Cat = 'CONJP-OBJ', T1 = tree('CONJ',_), T2 =.. [tree,'S-ARG'|_], !, binarise(t(Cat,[m:T1,h:T2]),B,C). %binarise(t(Cat,[a:T1,h:T2]),B,C):- % Cat = 'S-RMOD+RELCL', % T1 = tree('NP-SUBJ',_), % T2 =.. [tree,'Sbar'|_], !, % binarise(t(Cat,[h:T1,a:T2]),B,C). /* ----------------------------------------------------------------------- Binarising: repair of double-headed preposition ------------------------------------------------------------------------ */ binarise(t(Cat,[h:T1,h:T2]),B,C):- T1 =.. [tree,'PREP'|_], T2 =.. [tree,'PP-CONTIN+PREP'|_], !, binarise(t(Cat,[h:T1,a:T2]),B,C). /* ----------------------------------------------------------------------- Binarising: repair of name/noun compounds ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,'Nbar'|_], T2 =.. [tree,'NP-CONTIN+DENOM'|_], !, sublist([h:t(Cat,[m:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,'Nbar'|_], T2 =.. [tree,'NOU~CA-CONTIN'|_], !, sublist([h:t(Cat,[m:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: repair of numerical compounds ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,'NUMR'|_], T2 =.. [tree,'DP-CONTIN+NUM'|_], !, sublist([h:t(Cat,[a:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,'NUMR'|_], T2 =.. [tree,NUMRC|_], constituent(NUMRC,'NUMR-CONTIN+NUM'), !, sublist([h:t(Cat,[a:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,'NUMR'|_], T2 =.. [tree,'Dbar-CONTIN+NUM'|_], !, sublist([h:t(Cat,[a:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: repair of comparative ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,'ADVB-COORDANTEC+COMPAR'|_], T2 =.. [tree,'ADJ~QU'|_], !, sublist([h:t(Cat,[h:T1,a:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: repair of percentage expressions ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([h:T1,a:T2,h:T3],L1,Before,After), T1 =.. [tree,'NUMR'|_], T2 =.. [tree,'SPECIAL-ARG-PERCENT'|_], T3 =.. [tree,'PP-ARG-PARTITIVE'|_], !, sublist([h:t(Cat,[h:T1,a:T2,a:T3])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([h:T1,a:T2,h:T3],L1,Before,After), T1 =.. [tree,'NUMR'|_], T2 =.. [tree,'PP-ARG-PERCENT'|_], T3 =.. [tree,'PP-ARG-PARTITIVE'|_], !, sublist([h:t(Cat,[h:T1,a:T2,a:T3])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([h:T1,a:T2,a:T3,h:T4],L1,Before,After), T1 =.. [tree,'NUMR'|_], T2 =.. [tree,'SPECIAL-ARG-PERCENT'|_], T3 =.. [tree,'PP-ARG'|_], T4 =.. [tree,'PP-ARG-PARTITIVE'|_], !, sublist([h:t(Cat,[h:T1,a:T2,a:T3,a:T4])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: repair of sentences with VP as subject ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([h:T1,h:T2],L1,Before,After), T1 =.. [tree,VPSUBJ|_], constituent(VPSUBJ,'VP-SUBJ'), T2 =.. [tree,'Sbar'|_], !, sublist([h:t(Cat,[a:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: possessives (ALB-19) ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([a:T1,a:T2],L1,Before,After), T1 =.. [tree,'ADJ~PO'|_], T2 =.. [tree,'NOU~CS-ARG'|_], !, sublist([h:t(Cat,[h:T1,a:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: DP modifiers (ALB-33, ALB-381) ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- sublist([m:T1,a:T2],L1,Before,After), T1 =.. [tree,'ADVP-RMOD'|_], T2 =.. [tree,'Dbar'|_], !, sublist([h:t(Cat,[m:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([m:T1,a:T2],L1,Before,After), T1 =.. [tree,'PRDT-RMOD'|_], T2 =.. [tree,'Dbar'|_], !, sublist([h:t(Cat,[m:T1,h:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). binarise(t(Cat,L1),B,C):- sublist([a:T1,a:T2],L1,Before,After), T1 =.. [tree,ADJ|_], constituent(ADJ,'ADJ'), T2 =.. [tree,NOU|_], constituent(NOU,'NOU'), !, sublist([h:t(Cat,[h:T1,a:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ----------------------------------------------------------------------- Binarising: embedded clauses (CHIAM-47, V-19) ------------------------------------------------------------------------ */ binarise(t(Cat,L1),B,C):- Cat = 'Vbar', sublist([a:T1,m:T2],L1,Before,After), T1 =.. [tree,VMA|_], constituent(VMA,'VMA~'), T2 =.. [tree,CON|_], constituent(CON,'CONJP-OBJ'), !, sublist([h:t(Cat,[h:T1,a:T2])],L2,Before,After), binarise(t(Cat,L2),B,C). /* ------------------------------------------------------ Binarising: headless monsters ------------------------------------------------------ */ binarise(t(Cat,Branches),Name,_):- Branches = [_,_|_], \+ member(h:_,Branches), error('headless monster in ~p (~p)~n',[Name,Cat]), printBranches(Branches), !, fail. /* ------------------------------------------------------ Binarising: two headed monsters ------------------------------------------------------ */ binarise(t(Cat,Branches),Name,_):- select(h:_,Branches,Rest), member(h:_,Rest), error('two-headed monster in ~p (~p)~n',[Name,Cat]), printBranches(Branches), !, fail. /* ------------------------------------------------------ Binarising: traces ------------------------------------------------------ */ binarise(t(Cat,[]),_,trace(S,Trace)):- atom_chars(Cat,Chars), append(Pre,['*','['|List],Chars), !, atom_chars(Trace,['['|List]), atom_chars(S,Pre). binarise(t(Node,_,[h:tree('Nbar',h:tree(_,Word))]),_,trace(Node,'[]')):- atom(Word), atom_chars(Word,Chars), append(_,['*','[',']'],Chars), !. /* ------------------------------------------------------ Binarising: leaf nodes ------------------------------------------------------ */ binarise(t(Cat,[Word]),_,leaf(NewCat,Number,Token)):- atom(Word), atom_codes(Word,Codes), append(NumCodes,[47|TokenCodes],Codes), !, number_codes(Number,NumCodes), atom_codes(Token,TokenCodes), ( atom_codes(Cat,CatCodes), append(NewCatCodes,[42|_],CatCodes), !, atom_codes(NewCat,NewCatCodes) ; NewCat = Cat ). /* ------------------------------------------------------ Binarising: unary branching ------------------------------------------------------ */ binarise(t(N,[T:A]),Name,tree(N,T:B)):- !, binarise(A,Name,B). /* ------------------------------------------------------ Binarising: binary branching ------------------------------------------------------ */ binarise(t(N,[T1:A1,T2:A2]),Name,tree(N,T1:B1,T2:B2)):- !, binarise(A1,Name,B1), binarise(A2,Name,B2). /* ------------------------------------------------------ Binarising: aux verbs ------------------------------------------------------ */ binarise(t(Node,B1),Name,Bin):- Node = 'Vbar', sublist([m:T1,h:T2],B1,Before,After), T1 =.. [tree,'VP'|_], T2 =.. [tree,Verb|_], member(Verb,['VMA~GE','VMA~PA','VMO~PA']), !, sublist([h:t(Node,[m:T1,h:T2])],B2,Before,After), binarise(t(Node,B2),Name,Bin). /* ------------------------------------------------------ Binarising: punctuation (itemisation) ------------------------------------------------------ */ binarise(t(Node,Branches),Name,Bin):- Branches = [Mod1,Mod2|Rest], Mod1 = m:T1, T1 =.. [tree,'DP-RMOD-LISTPOS'|_], Mod2 = m:T2, T2 =.. [tree,'PUNCT-SEPARATOR'|_], !, Tree = t(Node,[m:tree('LISTPOS',a:T1,h:T2)|Rest]), binarise(Tree,Name,Bin). /* ------------------------------------------------------ Binarising: punctuation (separators) ------------------------------------------------------ */ binarise(t(Node,Branches),Name,Bin):- Mod = m:tree('PUNCT-SEPARATOR',_), member(Mod,Branches), append([a:Tree3,Mod],Rest,Branches), !, Tree = t(Node,[a:tree(Node,h:Tree3,Mod)|Rest]), binarise(Tree,Name,Bin). /* ------------------------------------------------------ Binarising: punctuation (brackets, quotes) ------------------------------------------------------ */ binarise(t(Node,Branches),Name,Bin):- member(Open, ['PUNCT-OPEN+QUOTES', 'PUNCT-OPEN+PARENTHETICAL']), member(Close,['PUNCT-CLOSE+QUOTES','PUNCT-CLOSE+PARENTHETICAL']), TFi = m:tree(Open,Fi), TLa = m:tree(Close,La), append(Before,[TFi|Temp],Branches), append(Mid,[TLa|After],Temp), !, NTFi = m:tree(Open,Fi), NTLa = m:tree(Close,La), New = h:tree(Node,h:tree('PUNCT',NTFi,h:t(Node,Mid)),NTLa), sublist([New],NewBranches,Before,After), binarise(t(Node,NewBranches),Name,Bin). /* ------------------------------------------------------ Binarising: ternary (or more) branching ------------------------------------------------------ */ binarise(t(Node,B1),Name,Bin):- member(Pattern,[[h:_,a:_],[a:_,h:_],[h:_,m:_],[m:_,h:_]]), sublist( Pattern, B1,Before,After), !, sublist([h:t(Node,Pattern)],B2,Before,After), binarise(t(Node,B2),Name,Bin). /* =========================================================== SubList =========================================================== */ sublist(Sub,List,Before,After):- append(Temp,After,List), append(Before,Sub,Temp),!. /* =========================================================== Print branches =========================================================== */ printBranches([]):- error('.............~n',[]). printBranches([X|L]):- error(' ~p~n',[X]), printBranches(L). /* =================================================================== Constituent checking =================================================================== */ constituent(Cons,Goal):- atom(Cons), atom(Goal), atom_chars(Goal,GoalChars), atom_chars(Cons,ConsChars), append(GoalChars,_,ConsChars), !.