1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2%% WN_CONNECT v1.3 : wn_gen_prox_equations 3%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4/* 5AUTHORS: Pascual Julia´n-Iranzo (Universidad de Castilla-La Mancha, Spain) 6Fernando Sa´enz-Pe´rez (Universidad Complutense de Madrid, Spain) 7 8WN_CONNECT is licensed for research and educational purposes only and it is 9distributed with NO WARRANTY OF ANY KIND. You are freely allowed to use, copy 10and distribute WN_CONNECT provided that you make no modifications to any of its 11files and give credit to its original authors. 12*******************************************************************************/ 13%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 14% Generation of proximity equations based on WordNet 15 16:- module(wn_gen_prox_equations, [ 17 wn_gen_ontology_file/3, % +ListOfListOfWords, +File, +Measure 18 wn_gen_prox_equations_list/3, % +ListOfListOfWords, +Measure, -Equations 19 wn_auto_gen_prox_equations/4 % +Directives, +Rules, -InEquations, -OutEquations 20 ] 21 ). 22 23:- use_module(wn_sim_measures). 24:- use_module(wn_utilities). 25%%:- use_module(utilities). 26%:- use_module(library(ordsets)). 27 28 29%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 30 31 32%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33%%% wn_gen_ontology_file(+ListOfListOfWords, +File, +Measure) 34%%% Given a list of list of words, ListOfListOfWords, the name of a file, File, 35%%% and the acronym of a measure, Measure (by now [path, wup, lch, res, lin, jcn, 36%%% yarm]), it generates a set of proximity equations and stored them into the file File. 37%%% 38wn_gen_ontology_file(ListOfListOfWords, File, Measure) :- 39 file_name_extension(_, Extension, File), 40 ((Extension = ont) -> File_ont = File; 41 (Extension = '') -> file_name_extension(File, ont, File_ont) 42 ), 43 (exists_file(File_ont) -> 44 write('The file '), write(File), write(' or '), write(File_ont), write(' does exists.'), nl 45 ; 46 (member(Measure, [path, wup, lch, res, lin, jcn, hso, lesk, vector, yar]) -> 47 wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations), 48 open(File_ont, write, OutputStream), 49 write(OutputStream,'%% PROXIMITY EQUATIONS'), nl(OutputStream), 50 write_equations(Equations, OutputStream), 51 close(OutputStream) 52 ; 53 write(Measure), write(' is not a similarity or relatedness measure.'), nl 54 ) 55 ). 56 57%%% write_equations(+Equations, +OutputStream) 58%%% 59write_equations([], _). 60write_equations([sim(W1,W2,D)|Equations], OutputStream) :- 61 concat_atom([W1,'~', W2, '=', D, '.'], ProxEqu), 62 write(OutputStream,ProxEqu), nl(OutputStream), 63 write_equations(Equations, OutputStream). 64 65 66%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 67%%% wn_auto_gen_prox_equations(+Directives, +Rules, -InEquations, -OutEquations) 68%%% 69%%% If Directives is [:- directive(wn_gen_prox_equations, [Measure, Auto])], then 70%%% return in OutEquations the equations in InEquations plus all the proximity 71%%% equations derived from the following: 72%%% - Extract three sets from Rules: constant, functor and predicate identifiers, 73%%% - For each word W1 in a set compare it to any other word W2 in the same set to 74%%% determine their relatedness degree D, and generate a proximity equation 75%%% sim(W1, W2, D) in OutEquations. 76%%% Otherwise, just return InEquations in OutEquations 77%%% 78 79wn_auto_gen_prox_equations([:- directive(wn_gen_prox_equations, [Measure, Auto])], Rules, InEquations, OutEquations) :- 80 \+ is_list(Auto), 81 atoms_functors_in_term(Rules, AllAtoms, AllFunctors), 82 exception_words(ExceptionWords), 83 ordsets:ord_subtract(AllAtoms, ExceptionWords, Atoms), 84 ordsets:ord_subtract(AllFunctors, ExceptionWords, AllValidFunctors), 85 bpl_predicates(AllValidFunctors, Functors, Predicates), 86 !, 87 wn_gen_prox_equations_list([Atoms, Functors, Predicates], Measure, NewEquations), 88 lists:append(InEquations, NewEquations, OutEquations). 89 90wn_auto_gen_prox_equations(_Directives, _Rules, Equations, Equations). 91 92 93% List of words that are not to be related with WordNet words. 94% This must be an ordered set 95exception_words([ 96 ':-', 97 true % true is the body of a fact, it is dismissed 98 ]). 99 100 101 102%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 103%%% wn_gen_prox_equations_list(+ListOfListOfWords, +Measure, -Equations) 104%%% Given a ListOfListOfWords computes all proximity equations that can be formed paring 105%%% the words of each list between them and then computing their proximity degree using 106%%% the measure Measure. 107%%% 108%%% NOTES: Each list of ListOfListOfWords must be compounded by words of the same part of 109%%% speech (either nouns, verbs or adjectives) 110%%% 111%%% "sim(Word1, Word2, Degree)" is the internal Bousi~Prolog representation of a 112%%% proximity equation "Word1 ~ Word2 = Degree" (i.e., Word1 is close to Word2 with 113%%% approximation degree Degree). 114%%% 115 116wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations) :- 117 wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations, []). 118 119%%% 120%%% wn_gen_prox_equations_list/4 is implemented by a set of Definite Clause Grammar 121%%% rules. Grammar rules are expanded automatically into Prolog clauses with two extra 122%%% arguments added as the two arguments of the predicate in order to represent the 123%%% input (or output) tokens as a difference list. 124%%% 125%%% 126 127wn_gen_prox_equations_list([], _Measure) --> 128 []. 129 130wn_gen_prox_equations_list([ListOfWords|ListOfListOfWords], Measure) --> 131 wn_gen_prox_equations(ListOfWords, Measure), 132 wn_gen_prox_equations_list(ListOfListOfWords, Measure). 133 134 135wn_gen_prox_equations([], _Measure) --> 136 []. 137 138wn_gen_prox_equations([Word|ListOfWords], Measure) --> 139 wn_gen_prox_equations(ListOfWords, Word, Measure), 140 wn_gen_prox_equations(ListOfWords, Measure). 141 142 143wn_gen_prox_equations([], _Word1, _Measure) --> 144 []. 145 146wn_gen_prox_equations([Word2|ListOfWords], Word1, Measure) --> 147 {gen_prox_equation(Measure, Word1, Word2, Equation), 148 !}, 149 [Equation], 150 wn_gen_prox_equations(ListOfWords, Word1, Measure). 151 152wn_gen_prox_equations([_Word2|ListOfWords], Word1, Measure) --> % Word1 and Word2 are not related 153 wn_gen_prox_equations(ListOfWords, Word1, Measure). 154 155 156%%% gen_prox_equation(+Measure, +Pattern1, +Pattern2, -Equation) 157%%% 158%%% Return the Equation for the given Measure and words. 159%%% Words come expressed as the term Word:Type:SenseNumber 160 161gen_prox_equation(Measure, Word1:Type1:Sense1, Word2:Type2:Sense2, Equation) :- 162 atom(Word1), 163 atom(Word2), 164 valid_word_type(Type1), 165 valid_word_type(Type2), 166 number(Sense1), 167 number(Sense2), 168 !, 169 gen_prox_equation_aux(Measure, Word1:Type1:Sense1, Word2:Type2:Sense2, Equation). 170 171gen_prox_equation(Measure, Word1, Word2, Equation) :- 172 atom(Word1), 173 atom(Word2), 174 !, 175 gen_prox_equation_aux(Measure, Word1:Type:1, Word2:Type:1, Equation). 176 177gen_prox_equation(_Measure, Word1, Word2, _Equation) :- 178 format('ERROR: Incorrect pattern for ~p and/or ~p. Expected either plain words or patterns Word:Type:Sense, where Type is in {n,v} and Sense is a number.', [Word1, Word2]), 179 fail. 180 181 182gen_prox_equation_aux(Measure, Pattern1, Pattern2, sim(Word1, Word2, NormalizedDegree)) :- 183 wn_measure_module_goal(Measure, Module, MeasureGoalName), 184 MeasureGoal =.. [MeasureGoalName, Pattern1, Pattern2, Degree], 185 Module:MeasureGoal, 186 measure_max_value(Measure, Max), 187 NormalizedDegree is Degree/Max, 188 Pattern1 = Word1:_:_, 189 Pattern2 = Word2:_:_. 190 191 192%%% valid_word_type(+Type). 193%%% Valid word types. Currently, only nouns (n) and verbs (v) 194 195valid_word_type(n). 196 197valid_word_type(v). 198 199 200%%% wn_measure_module_goal(?Measure, ?Module, ?MeasureGoalName) 201%%% This predicate stores a list of parameters: 202%%% Measure: it is the name of the measure used to compute the similarity or 203%%% relatedness of two ListOfWords. 204%%% Module: it is the module where is implemented the corresponding measure. 205%%% MeasureGoalName: it is the name of the predicate that implements the measure. 206%%% 207 208wn_measure_module_goal(path, wn_sim_measures, wn_path). 209wn_measure_module_goal(wup, wn_sim_measures, wn_wup). 210wn_measure_module_goal(lch, wn_sim_measures, wn_lch). 211wn_measure_module_goal(res, wn_ic_measures, wn_res). 212wn_measure_module_goal(jcn, wn_ic_measures, wn_jcn). 213wn_measure_module_goal(lin, wn_ic_measures, wn_lin). 214wn_measure_module_goal(yarm, wn_rel_measures, wn_yarm). 215 216 217%%% measure_max_value(?Measure, ?Value) 218%%% Measure: Name of the measure (path, lch, ...). 219%%% Value: Maximum value the measure can take. 220%%% 221 222measure_max_value(path, 1). 223measure_max_value(wup, 1). 224measure_max_value(lch, 3.6888794541139363). 225measure_max_value(res, 1) :- % WARNING: Check this value! 226 nl, write('WARNING: Normalization is not checked.'), nl. 227measure_max_value(jcn, 1) :- % WARNING: Check this value! 228 nl, write('WARNING: Normalization is not checked.'), nl. 229measure_max_value(lin, 1). 230%measure_max_value(lesk, ???). 231measure_max_value(hso, 16). 232measure_max_value(yarm, 1)