% This file is part of the Attempto Parsing Engine (APE). % Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch). % % The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it % under the terms of the GNU Lesser General Public License as published by the Free Software % Foundation, either version 3 of the License, or (at your option) any later version. % % The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT % ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR % PURPOSE. See the GNU Lesser General Public License for more details. % % You should have received a copy of the GNU Lesser General Public License along with the Attempto % Parsing Engine (APE). If not, see http://www.gnu.org/licenses/. :- module(is_in_lexicon, [ is_in_lexicon/1, is_functionword/1, is_contentword/1 ]). /** Is a token in the lexicon? @author Kaarel Kaljurand @author Tobias Kuhn @version 2007-12-06 */ :- use_module(functionwords). :- use_module(lexicon_interface). :- use_module(illegalwords). :- use_module(chars). %% is_in_lexicon(+WordForm:atom) is semidet. % % @param WordForm is an ACE wordform % % Succeeds if WordForm is among the ACE words, possibly % one of the illegal words like `any' or `this'. % is_in_lexicon(WordForm) :- ( is_functionword(WordForm) ; is_contentword(WordForm) ; is_illegalword(WordForm, _) ). %% is_functionword(+WordForm:atom) is nondet. % % @param WordForm is an ACE wordform % % Succeeds if WordForm is among the ACE function words. % is_functionword(WordForm) :- ( functionwords:rawnumber_number(WordForm, _) ; functionwords:functionword(WordForm) ; functionwords:variable(WordForm) ). %% is_contentword(+WordForm:atom) is nondet. % % @param WordForm is an ACE wordform % % Succeeds if WordForm is in the content word lexicon. % is_contentword(WordForm) :- ( adv(WordForm, _) ; adv_comp(WordForm, _) ; adv_sup(WordForm, _) ; adj_itr(WordForm, _) ; adj_itr_comp(WordForm, _) ; adj_itr_sup(WordForm, _) ; adj_tr(WordForm, _, _) ; adj_tr_comp(WordForm, _, _) ; adj_tr_sup(WordForm, _, _) ; noun_sg(WordForm, _, _) ; noun_pl(WordForm, _, _) ; noun_mass(WordForm, _, _) ; mn_sg(WordForm, _) ; mn_pl(WordForm, _) ; pn_sg(WordForm, _, _) ; pn_pl(WordForm, _, _) ; pndef_sg(WordForm, _, _) ; pndef_pl(WordForm, _, _) ; iv_finsg(WordForm, _) ; iv_infpl(WordForm, _) ; tv_finsg(WordForm, _) ; tv_infpl(WordForm, _) ; tv_pp(WordForm, _) ; dv_finsg(WordForm, _, _) ; dv_infpl(WordForm, _, _) ; dv_pp(WordForm, _, _) ; ( chars:to_lowercase(WordForm, WordFormL), prep(WordFormL, _) ) ).