1% This file is part of the Attempto Parsing Engine (APE). 2% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). 3% 4% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it 5% under the terms of the GNU Lesser General Public License as published by the Free Software 6% Foundation, either version 3 of the License, or (at your option) any later version. 7% 8% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT 9% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 10% PURPOSE. See the GNU Lesser General Public License for more details. 11% 12% You should have received a copy of the GNU Lesser General Public License along with the Attempto 13% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. 14 15 16:- module(ape_utils, [ 17 cpu_time/2, 18 handle_unknown_words/4, 19 new_npid/1 20 ]).
31:- use_module(grammar_words). 32:- use_module('../lexicon/spellcheck'). 33:- use_module('../lexicon/is_in_lexicon'). 34:- use_module('../lexicon/functionwords'). 35:- use_module('../lexicon/chars', [is_capitalized/1]). 36:- use_module('../logger/error_logger', [ 37 add_error_messagelist/4, 38 add_error_message/4 39 ]). 40 41:- use_module(sentence_failure, [ 42 get_all_sentence_errors/2 43 ]). 44 45:- use_module('../utils/ace_niceace', [ 46 ace_niceace/2 47 ]). 48 49 50:- op(400, fy, -). 51:- op(400, fy, ~). 52:- op(500, xfx, =>). 53:- op(500, xfx, v). 54 55%:- debug(known_word). 56 57cpu_time(Goal, Duration) :- 58 statistics(runtime, [Start | _]), 59 ignore(Goal), 60 statistics(runtime, [Finish | _]), 61 Duration is (Finish-Start) * 0.001. 62 63% @deprecated 64read_text_from_commandline([C|R]) :- 65 get0(C), 66 line_continues(C), 67 !, 68 read_text_from_commandline(R). 69 70read_text_from_commandline([]). 71 72% @deprecated 73line_continues(C) :- C \== 10, C \== -1, !.
80list_close([], []) :- !. 81 82list_close([Head | Tail], [Head | ClosedTail]) :- 83 list_close(Tail, ClosedTail).
96member_of_domain(E, [F | _]) :- 97 E==F, 98 !. 99 100member_of_domain(E, [_ | R]) :- 101 nonvar(R), % catches a bug 102 member_of_domain(E,R).
We have to do it recursively for the following tests to work.
122list_of_conds_and_anaphors(Tail, [], []) :- var(Tail), !. 123 124list_of_conds_and_anaphors([], [], []). 125 126list_of_conds_and_anaphors([Head | Tail], CondsTail, [Head|AnaAnte]) :- 127 functor(Head, antecedent, _), 128 !, 129 list_of_conds_and_anaphors(Tail, CondsTail, AnaAnte). 130 131list_of_conds_and_anaphors([Head | Tail], CondsTail, [Head|AnaAnte]) :- 132 Head = anaphor(_, _, _, AnaphorConds, _, _, _, _, _, _, _), 133 !, 134 list_of_conds_and_anaphors(Tail, CondsTailTemp1, AnaAnte), 135 append(AnaphorConds, CondsTailTemp1, CondsTailTemp2), 136 sort(CondsTailTemp2, CondsTail). 137 138list_of_conds_and_anaphors([Condition | CondsTail], [ConditionPruned | CondsTailPruned], AnaAnte) :- 139 handle_condition(Condition, ConditionPruned), 140 !, 141 list_of_conds_and_anaphors(CondsTail, CondsTailPruned, AnaAnte). 142 143% Simple condition. 144list_of_conds_and_anaphors([Head | Tail], [Head | CondsTail], AnaAnte) :- 145 list_of_conds_and_anaphors(Tail, CondsTail, AnaAnte).
152handle_condition([First|Rest], CondsPruned) :- 153 list_of_conds_and_anaphors([First|Rest], CondsPruned, _). 154 155handle_condition(-drs(Dom, Conds), -drs(Dom, CondsPruned)) :- 156 list_of_conds_and_anaphors(Conds, CondsPruned, _). 157 158handle_condition(Label:drs(Dom, Conds), Label:drs(Dom, CondsPruned)) :- 159 list_of_conds_and_anaphors(Conds, CondsPruned, _). 160 161handle_condition(can(drs(Dom, Conds)), can(drs(Dom, CondsPruned))) :- 162 list_of_conds_and_anaphors(Conds, CondsPruned, _). 163 164handle_condition(must(drs(Dom, Conds)), must(drs(Dom, CondsPruned))) :- 165 list_of_conds_and_anaphors(Conds, CondsPruned, _). 166 167handle_condition(should(drs(Dom, Conds)), should(drs(Dom, CondsPruned))) :- 168 list_of_conds_and_anaphors(Conds, CondsPruned, _). 169 170handle_condition(may(drs(Dom, Conds)), may(drs(Dom, CondsPruned))) :- 171 list_of_conds_and_anaphors(Conds, CondsPruned, _). 172 173handle_condition(~drs(Dom, Conds), ~drs(Dom, CondsPruned)) :- 174 list_of_conds_and_anaphors(Conds, CondsPruned, _). 175 176handle_condition(drs(Dom1, Conds1) => drs(Dom2, Conds2), drs(Dom1, Conds1Pruned) => drs(Dom2, Conds2Pruned)) :- 177 list_of_conds_and_anaphors(Conds1, Conds1Pruned, _), 178 list_of_conds_and_anaphors(Conds2, Conds2Pruned, _). 179 180handle_condition(drs(Dom1, Conds1) v drs(Dom2, Conds2), drs(Dom1, Conds1Pruned) v drs(Dom2, Conds2Pruned)) :- 181 list_of_conds_and_anaphors(Conds1, Conds1Pruned, _), 182 list_of_conds_and_anaphors(Conds2, Conds2Pruned, _).
197report_failed_sentence(SentenceID, FirstSentence) :-
198 get_unparsed_tokens_number(UnparsedTokensNumber),
199 length(UnparsedTokens, UnparsedTokensNumber),
200 append(ParsedTokens, UnparsedTokens, FirstSentence),
201 append(ParsedTokens, ['<>'|UnparsedTokens], FirstSentenceX),
202 get_all_sentence_errors(FirstSentenceX, ErrorTextList),
203 ace_niceace(FirstSentenceX, FirstSentenceNice),
204 concat_atom(FirstSentenceNice, ' ', FirstSentenceAtom),
205 length(FirstSentence, AllTokensNumber),
206 TokenID is (AllTokensNumber - UnparsedTokensNumber),
207 add_error_messagelist(sentence, SentenceID-TokenID, FirstSentenceAtom, ErrorTextList).
214spellcheck_and_report_unknown_words(UnknownTokenList, N) :- 215 spellcheck:damerau_rules(UnknownTokenList, Candidates, NoSolutions), 216 !, 217 report_candidates(Candidates, N), 218 report_nosolutions(NoSolutions, N). 219 220report_candidates([], _). 221 222report_candidates([Bad-Good | T], N) :- 223 add_error_message(word, N-'', Bad, Good), 224 report_candidates(T, N). 225 226report_nosolutions([], _). 227 228report_nosolutions([Bad | T], N) :- 229 add_error_message(word, N-'', Bad, 'Use the prefix n:, v:, a: or p:.'), 230 report_nosolutions(T, N).
235handle_unknown_words(_, [], [], _). 236 237handle_unknown_words(on, [SentenceIn|RestIn], [SentenceOut|RestOut], _) :- 238 !, 239 prefix_unknown_words(SentenceIn, SentenceOut), 240 handle_unknown_words(on, RestIn, RestOut, _). 241 242handle_unknown_words(G, [Sentence|Rest], [Sentence|Rest], N) :- 243 get_unknown_words(Sentence, UnknownTokens), 244 spellcheck_and_report_unknown_words(UnknownTokens, N), 245 NextN is N + 1, 246 handle_unknown_words(G, Rest, Rest, NextN).
255get_unknown_words(TokenList, UnknownTokenList) :- 256 get_unknown_words(TokenList, [x, x], UnknownTokenList). 257 258get_unknown_words([], _, []). 259 260get_unknown_words([Token | TokensTail], [Left2, Left1], UnknownTokenList) :- 261 get_right_context(TokensTail, Right1), 262 ( 263 known_or_capitalized(Left2, Left1, Token, Right1) -> UnknownTokenList = UnknownTokenListTail 264 ; 265 UnknownTokenList = [Token | UnknownTokenListTail] 266 ), 267 get_unknown_words(TokensTail, [Left1, Token], UnknownTokenListTail).
273known_or_capitalized(_, _, WordForm, _) :- 274 chars:is_capitalized(WordForm), 275 !. 276 277known_or_capitalized(Left2, Left1, WordForm, Right1) :- 278 debug(known_word, "~w ~w *~w* ~w~n", [Left2, Left1, WordForm, Right1]), 279 known_word(Left2, Left1, WordForm, Right1), 280 debug(known_word, "ok~n", []).
Note that propernames are difficult to guess since they are not introduced by a marker (like 'a' or 'every'). Adding a variable prefix makes the variable unify also with function words (e.g. 'if'). This causes a performance problem and doesn't work anyway. So, for the time being, we simply don't guess propernames (eventhough propernames deserve guessing the most, being a very open class).
296prefix_unknown_words(TokenList, TokenListWithPrefixes) :- 297 prefix_unknown_words(TokenList, [x, x], TokenListWithPrefixes). 298 299prefix_unknown_words([], _, []). 300 301prefix_unknown_words([Token | TokensTail], [Left2, Left1], TokenListWithPrefixes1) :- 302 get_right_context(TokensTail, Right1), 303 ( 304 known_or_capitalized(Left2, Left1, Token, Right1) -> 305 TokenListWithPrefixes1 = [Token | TokenListWithPrefixes] 306 ; 307 TokenListWithPrefixes1 = [unknowncat, :, Token | TokenListWithPrefixes] 308 ), 309 prefix_unknown_words(TokensTail, [Left1, Token], TokenListWithPrefixes).
318get_right_context([], x). 319 320get_right_context([Head | _], Head).
it is in quotation marks; it is prefixed; it is a prefix symbol; it is in the lexicon.
332known_word(_, _, String, _) :- 333 atom_concat('"', S, String), 334 atom_concat(_, '"', S). 335 336known_word(Left2, ':', _, _) :- 337 ( 338 functionwords:propername_prefix(Left2, _) 339 ; 340 functionwords:noun_prefix(Left2, _) 341 ; 342 functionwords:verb_prefix(Left2) 343 ; 344 functionwords:modif_prefix(Left2) 345 ), 346 !. 347 348known_word(_, _, Prefix, ':') :- 349 ( 350 functionwords:propername_prefix(Prefix, _) 351 ; 352 functionwords:noun_prefix(Prefix, _) 353 ; 354 functionwords:verb_prefix(Prefix) 355 ; 356 functionwords:modif_prefix(Prefix) 357 ), 358 !. 359 360known_word(_, _, WordForm, _) :- 361 is_in_lexicon:is_in_lexicon(WordForm). 362 363 364%---------------------------------------------------------------------------- 365% new_npid(-NPID:number) is det. 366% 367% new_npid delivers the next idenfifier (ID) for an NP. 368% NP IDs are used for anaphora resolution recency calculations. 369%---------------------------------------------------------------------------- 370 371new_npid(NewNPID) :- 372 b_getval(npid, OldNPID), 373 NewNPID is OldNPID + 1, 374 b_setval(npid, NewNPID)
APE utils
*/