```    1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%% WN_CONNECT v1.3 : wn_gen_prox_equations
3%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4/*
5AUTHORS: Pascual Julia´n-Iranzo (Universidad de Castilla-La Mancha, Spain)
7
8WN_CONNECT is licensed for research and educational purposes only and it is
9distributed with NO WARRANTY OF ANY KIND. You are freely allowed to use, copy
10and distribute WN_CONNECT provided that you make no modifications to any of its
11files and give credit to its original authors.
12*******************************************************************************/
13%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
14% Generation of proximity equations based on WordNet
15
16:- module(wn_gen_prox_equations, [
17  wn_gen_ontology_file/3,       % +ListOfListOfWords, +File, +Measure
18	wn_gen_prox_equations_list/3, % +ListOfListOfWords, +Measure, -Equations
19	wn_auto_gen_prox_equations/4  % +Directives, +Rules, -InEquations, -OutEquations
20	]
21	).   22
23:- use_module(wn_sim_measures).   24:- use_module(wn_utilities).   25%%:- use_module(utilities).
26%:- use_module(library(ordsets)).
27
28
29%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30
31
32%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
33%%% wn_gen_ontology_file(+ListOfListOfWords, +File, +Measure)
34%%% Given a list of list of words, ListOfListOfWords, the name of a file, File,
35%%% and the acronym of a measure, Measure (by now [path, wup, lch, res, lin, jcn,
36%%% yarm]), it generates a set of proximity equations and stored them into the file File.
37%%%
38wn_gen_ontology_file(ListOfListOfWords, File, Measure) :-
39    file_name_extension(_, Extension, File),
40    ((Extension = ont) -> File_ont = File;
41     (Extension = '')  -> file_name_extension(File, ont, File_ont)
42    ),
43    (exists_file(File_ont) ->
44        write('The file '), write(File), write(' or '), write(File_ont), write(' does exists.'), nl
45        ;
46        (member(Measure, [path, wup, lch, res, lin, jcn, hso, lesk, vector, yar]) ->
47            wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations),
48            open(File_ont, write, OutputStream),
49            write(OutputStream,'%% PROXIMITY EQUATIONS'), nl(OutputStream),
50            write_equations(Equations, OutputStream),
51            close(OutputStream)
52            ;
53            write(Measure), write(' is not a similarity or relatedness measure.'), nl
54        )
55    ).
56
57%%% write_equations(+Equations, +OutputStream)
58%%%
59write_equations([], _).
60write_equations([sim(W1,W2,D)|Equations], OutputStream) :-
61            concat_atom([W1,'~', W2, '=', D, '.'], ProxEqu),
62            write(OutputStream,ProxEqu), nl(OutputStream),
63            write_equations(Equations, OutputStream).
64
65
66%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
67%%% wn_auto_gen_prox_equations(+Directives, +Rules, -InEquations, -OutEquations)
68%%%
69%%% If Directives is [:- directive(wn_gen_prox_equations, [Measure, Auto])], then
70%%% return in OutEquations the equations in InEquations plus all the proximity
71%%% equations derived from the following:
72%%% - Extract three sets from Rules: constant, functor and predicate identifiers,
73%%% - For each word W1 in a set compare it to any other word W2 in the same set to
74%%%   determine their relatedness degree D, and generate a proximity equation
75%%%   sim(W1, W2, D) in OutEquations.
76%%% Otherwise, just return InEquations in OutEquations
77%%%
78
79wn_auto_gen_prox_equations([:- directive(wn_gen_prox_equations, [Measure, Auto])], Rules, InEquations, OutEquations) :-
80  \+ is_list(Auto),
81  atoms_functors_in_term(Rules, AllAtoms, AllFunctors),
82  exception_words(ExceptionWords),
83  ordsets:ord_subtract(AllAtoms, ExceptionWords, Atoms),
84  ordsets:ord_subtract(AllFunctors, ExceptionWords, AllValidFunctors),
85  bpl_predicates(AllValidFunctors, Functors, Predicates),
86  !,
87  wn_gen_prox_equations_list([Atoms, Functors, Predicates], Measure, NewEquations),
88  lists:append(InEquations, NewEquations, OutEquations).
89
90wn_auto_gen_prox_equations(_Directives, _Rules, Equations, Equations).
91
92
93% List of words that are not to be related with WordNet words.
94% This must be an ordered set
95exception_words([
96  ':-',
97  true   % true is the body of a fact, it is dismissed
98  ]).
99
100
101
102%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
103%%% wn_gen_prox_equations_list(+ListOfListOfWords, +Measure, -Equations)
104%%% Given a ListOfListOfWords computes all proximity equations that can be formed paring
105%%% the words of each list between them and then computing their proximity degree using
106%%% the measure Measure.
107%%%
108%%% NOTES: Each list of ListOfListOfWords must be compounded by words of the same part of
109%%%        speech (either nouns, verbs or adjectives)
110%%%
111%%%        "sim(Word1, Word2, Degree)" is the internal Bousi~Prolog representation of a
112%%%        proximity equation "Word1 ~ Word2 = Degree" (i.e., Word1 is close to Word2 with
113%%%        approximation degree Degree).
114%%%
115
116wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations) :-
117    wn_gen_prox_equations_list(ListOfListOfWords, Measure, Equations, []).
118
119%%%
120%%% wn_gen_prox_equations_list/4 is implemented by a set of Definite Clause Grammar
121%%% rules. Grammar rules are expanded automatically into Prolog clauses with two extra
122%%% arguments added as the two arguments of the predicate in order to represent the
123%%% input (or output) tokens as a difference list.
124%%%
125%%%
126
127wn_gen_prox_equations_list([], _Measure) -->
128  [].
129
130wn_gen_prox_equations_list([ListOfWords|ListOfListOfWords], Measure) -->
131  wn_gen_prox_equations(ListOfWords, Measure),
132  wn_gen_prox_equations_list(ListOfListOfWords, Measure).
133
134
135wn_gen_prox_equations([], _Measure) -->
136  [].
137
138wn_gen_prox_equations([Word|ListOfWords], Measure) -->
139  wn_gen_prox_equations(ListOfWords, Word, Measure),
140  wn_gen_prox_equations(ListOfWords, Measure).
141
142
143wn_gen_prox_equations([], _Word1, _Measure) -->
144  [].
145
146wn_gen_prox_equations([Word2|ListOfWords], Word1, Measure) -->
147  {gen_prox_equation(Measure, Word1, Word2, Equation),
148   !},
149  [Equation],
150  wn_gen_prox_equations(ListOfWords, Word1, Measure).
151
152wn_gen_prox_equations([_Word2|ListOfWords], Word1, Measure) --> % Word1 and Word2 are not related
153  wn_gen_prox_equations(ListOfWords, Word1, Measure).
154
155
156%%% gen_prox_equation(+Measure, +Pattern1, +Pattern2, -Equation)
157%%%
158%%%   Return the Equation for the given Measure and words.
159%%%   Words come expressed as the term Word:Type:SenseNumber
160
161gen_prox_equation(Measure, Word1:Type1:Sense1, Word2:Type2:Sense2, Equation) :-
162  atom(Word1),
163  atom(Word2),
164  valid_word_type(Type1),
165  valid_word_type(Type2),
166  number(Sense1),
167  number(Sense2),
168  !,
169  gen_prox_equation_aux(Measure, Word1:Type1:Sense1, Word2:Type2:Sense2, Equation).
170
171gen_prox_equation(Measure, Word1, Word2, Equation) :-
172  atom(Word1),
173  atom(Word2),
174  !,
175  gen_prox_equation_aux(Measure, Word1:Type:1, Word2:Type:1, Equation).
176
177gen_prox_equation(_Measure, Word1, Word2, _Equation) :-
178  format('ERROR: Incorrect pattern for ~p and/or ~p. Expected either plain words or patterns Word:Type:Sense, where Type is in {n,v} and Sense is a number.', [Word1, Word2]),
179  fail.
180
181
182gen_prox_equation_aux(Measure, Pattern1, Pattern2, sim(Word1, Word2, NormalizedDegree)) :-
183   wn_measure_module_goal(Measure, Module, MeasureGoalName),
184   MeasureGoal =.. [MeasureGoalName, Pattern1, Pattern2, Degree],
185   Module:MeasureGoal,
186   measure_max_value(Measure, Max),
187   NormalizedDegree is Degree/Max,
188   Pattern1 = Word1:_:_,
189   Pattern2 = Word2:_:_.
190
191
192%%% valid_word_type(+Type).
193%%%   Valid word types. Currently, only nouns (n) and verbs (v)
194
195valid_word_type(n).
196
197valid_word_type(v).
198
199
200%%% wn_measure_module_goal(?Measure, ?Module, ?MeasureGoalName)
201%%% This predicate stores a list of parameters:
202%%% Measure: it is the name of the measure used to compute the similarity or
203%%% relatedness of two ListOfWords.
204%%% Module: it is the module where is implemented the corresponding measure.
205%%% MeasureGoalName: it is the name of the predicate that implements the measure.
206%%%
207
208wn_measure_module_goal(path, wn_sim_measures, wn_path).
209wn_measure_module_goal(wup,  wn_sim_measures, wn_wup).
210wn_measure_module_goal(lch,  wn_sim_measures, wn_lch).
211wn_measure_module_goal(res,  wn_ic_measures,  wn_res).
212wn_measure_module_goal(jcn,  wn_ic_measures,  wn_jcn).
213wn_measure_module_goal(lin,  wn_ic_measures,  wn_lin).
214wn_measure_module_goal(yarm, wn_rel_measures, wn_yarm).
215
216
217%%% measure_max_value(?Measure, ?Value)
218%%% Measure: Name of the measure (path, lch, ...).
219%%% Value: Maximum value the measure can take.
220%%%
221
222measure_max_value(path, 1).
223measure_max_value(wup, 1).
224measure_max_value(lch, 3.6888794541139363).
225measure_max_value(res, 1) :- % WARNING: Check this value!
226  nl, write('WARNING: Normalization is not checked.'), nl.
227measure_max_value(jcn, 1) :- % WARNING: Check this value!
228  nl, write('WARNING: Normalization is not checked.'), nl.
229measure_max_value(lin, 1).
230%measure_max_value(lesk,  ???).
231measure_max_value(hso, 16).
232measure_max_value(yarm, 1)```