% This file is part of the Attempto Parsing Engine (APE).
% Copyright 2008-2013, Attempto Group, University of Zurich (see http://attempto.ifi.uzh.ch).
%
% The Attempto Parsing Engine (APE) is free software: you can redistribute it and/or modify it
% under the terms of the GNU Lesser General Public License as published by the Free Software
% Foundation, either version 3 of the License, or (at your option) any later version.
%
% The Attempto Parsing Engine (APE) is distributed in the hope that it will be useful, but WITHOUT
% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
% PURPOSE. See the GNU Lesser General Public License for more details.
%
% You should have received a copy of the GNU Lesser General Public License along with the Attempto
% Parsing Engine (APE). If not, see http://www.gnu.org/licenses/.
/* APE client for the Unix command-line.
@author Tobias Kuhn
@author Kaarel Kaljurand
Building the executable file:
==
swipl -g "working_directory(_, 'prolog/parser'), [fit_to_plp], halt."
swipl -O -F none -g "[ape], qsave_program('ape.exe', [goal(ape), toplevel(halt)])." -t halt
==
TODO:
- Do better checking of which arguments can be used together. E.g. if the user uses both -server
and -httpserver, what should ape.exe do then?
- All: Should we drop ulexreload? Otherwise all the interfaces should support it, and one would
need to add also "acetextreload" for reasons of uniformity.
- All: make timelimits configurable, currently they are hard-coded
- Socket: The socket server currently supports exactly these arguments that get_ape_results supports.
This is different from the other interfaces which also support other arguments (e.g. for fetching
files from URLs). More unification is needed.
- Java: assuming that it supports the file-parameter, how are the possible errors
communicated to Java?
- HTTP: Add logging
*/
:- consult('pack.pl').
:- use_module('prolog/ape', [
get_ape_results/2,
get_ape_results_timelimit/3,
get_ape_results_timelimit/4
]).
:- use_module('prolog/logger/error_logger').
% Default encoding used for opening files in text mode.
:- set_prolog_flag(encoding, utf8).
:- initialization on_signal(int, _, default).
%% argument(?Arg, -Value, -Desc)
%
% @tbd maybe we could describe the types more formally, so that they could be checked,
% e.g. the type of the solo value is one of [drs, drsxml, ...], which could be checked
% with memberchk/2.
%
argument('-text', '"TEXT"', 'The input ACE text. If neither -text nor -file is present then the ACE text is read from stdin.').
argument('-file', 'FILENAME', 'The name or URL of the input file containing the ACE text.').
argument('-ulextext', '"TEXT"', 'The user lexicon (taken from a string).').
argument('-ulexfile', 'FILENAME', 'The user lexicon (taken from a file or URL).').
argument('-solo', 'OUTPUT', 'Output just one output component. OUTPUT has to be one of {paraphrase,paraphrase1,paraphrase2,owlfss,owlfsspp,owlrdf,owlxml,ruleml,fol,pnf,tptp,tokens,syntax,syntaxpp,syntaxd,syntaxdpp,drs,drsxml,drspp,drshtml}.').
argument('-cinput', '', hidden).
argument('-cdrs', '', 'Output the DRS as a Prolog term.').
argument('-cdrsxml', '', 'Output the DRS in XML.').
argument('-cdrspp', '', 'Output the DRS in pretty-printed form in plain text.').
argument('-cdrshtml', '', 'Output the DRS in pretty-printed form in HTML.').
argument('-cparaphrase', '', 'Output a paraphrase which is a "best-effort" combination of paraphrase1 and paraphrase2.').
argument('-cparaphrase1', '', 'Output a paraphrase which uses full sentences instead of relative clauses.').
argument('-cparaphrase2', '', 'Output a paraphrase which uses relative clauses instead of full sentences.').
argument('-ctokens', '', 'Output tokens as a Prolog list of lists.').
argument('-csentences', '', 'Output sentences as a Prolog list.').
argument('-csyntax', '', 'Output simplified syntax trees as a Prolog list.').
argument('-csyntaxpp', '', 'Output simplified syntax trees in pretty-printed form.').
argument('-csyntaxd', '', 'Output plain syntax trees as a Prolog list (for debugging).').
argument('-csyntaxdpp', '', 'Output plain syntax trees in pretty-printed form (for debugging).').
argument('-cowlfss', '', 'Output OWL/SWRL in the Functional-Style Syntax representation (as Prolog term).').
argument('-cowlfsspp', '', 'Output OWL/SWRL in the Functional-Style Syntax representation (pretty-printed).').
argument('-cowlxml', '', 'Output OWL/SWRL in the XML representation.').
argument('-cowlrdf', '', 'Output OWL/SWRL in the RDF/XML representation. DEPRECATED').
argument('-cruleml', '', 'Output RuleML representation of the DRS.').
argument('-cfol', '', 'Output standard first-order logic representations (default form) of the DRS as a Prolog term.').
argument('-cpnf', '', 'Output standard first-order logic representations (prenex normal form) of the DRS as a Prolog term.').
argument('-ctptp', '', 'Output TPTP representation of the DRS.').
argument('-uri', 'URI', 'URI for the OWL outputs.').
argument('-noclex', '', 'Ignore the lexicon entries that are compiled into the executable.').
argument('-guess', '', 'Guess the word-class of unknown words.').
argument('-server', '', 'Launch a socket interface to APE at port 2766 (0xACE).').
argument('-httpserver', '', 'Launch an HTTP interface to APE at port 8000.').
argument('-port', 'NUMBER', 'Override the default port of either the socket or the HTTP interface.').
argument('-version', '', 'Shows version information.').
argument('-help', '', 'Shows this help page.').
%% ape is det.
%
% This is the default goal of =|ape.exe|=.
% Parses the command-line arguments, processes the arguments,
% if an ACE text is specified then parses the ACE text.
% Pretty-prints an error message in case an exception was thrown.
%
ape :-
current_prolog_flag(argv, RawArgList),
get_arglist(RawArgList, ArgList),
catch(
( arglist_namevaluelist(ArgList, InputList1), process_input(InputList1) ),
Exception,
format_error_for_terminal(Exception)
).
%% get_arglist(+RawArgList, -ArgList)
%
% Returns the list of arguments.
% In SWI v6.6.0+ this can be achieved simply by:
%
% get_arglist(ArgList) :-
% current_prolog_flag(argv, ArgList).
%
% For backwards compatibility we assume that the argument
% list can contain '--', or the path to ape.exe before the
% first argument (which must start with '-').
%
get_arglist(RawArgList, ArgList) :-
append(_, ['--'|ArgList], RawArgList),
!.
% TODO: on which OS is this needed?
get_arglist([NonFlag|ArgList], ArgList) :-
\+ atom_concat('-', _, NonFlag),
!.
get_arglist(ArgList, ArgList).
%% process_input(+InputList:list) is det.
%
% @param InputList is a list of input parameters
%
process_input(InputList) :-
( InputList = [] ; member(help=on, InputList) ),
!,
show_help.
process_input(InputList) :-
memberchk(version=on, InputList),
!,
show_version.
process_input([server=on]) :-
!,
server.
process_input([server=on, port=Port]) :-
!,
server(Port).
process_input([port=Port, server=on]) :-
!,
server(Port).
process_input([httpserver=on]) :-
!,
http_server.
process_input([httpserver=on, port=Port]) :-
!,
http_server(Port).
process_input([port=Port, httpserver=on]) :-
!,
http_server(Port).
% @tbd Any other usage of server, httpserver, or port is illegal
process_input(InputList1) :-
read_ulex(InputList1, InputList2),
read_file(InputList2, InputList3),
get_ape_results(InputList3, Content),
set_utf8_encoding(user_output),
writeln(Content).
%% show_help
%
show_help :-
show_version,
write('Copyright 2008-2013, Attempto Group, University of Zurich\n'),
write('This program comes with ABSOLUTELY NO WARRANTY.\n'),
write('This is free software, and you are welcome to redistribute it under certain conditions.\n'),
write('Please visit http://attempto.ifi.uzh.ch for details.\n'),
nl,
write('Command-line arguments:\n'),
argument(Arg, Value, Desc),
\+ Desc = hidden,
format('~w ~w~20|~w~n', [Arg, Value, Desc]),
fail.
show_help.
%% show_version is det.
%
% Prints the version information.
%
show_version :-
version(Version),
format("Attempto Parsing Engine for ACE 6.7, version ~w~n", [Version]).
%% arglist_namevaluelist(+ArgList:list, -NameValueList:list) is det.
%
% @param ArgList is a list of arguments
% @param NameValueList is a list of ArgumentName=ArgumentValue pairs
arglist_namevaluelist([], []).
arglist_namevaluelist([Arg|Tail1], [Name=on|Tail2]) :-
argument(Arg, '', _),
!,
atom_concat('-', Name, Arg),
arglist_namevaluelist(Tail1, Tail2).
arglist_namevaluelist([Arg,ValueAtom|Tail1], [Name=Value|Tail2]) :-
argument(Arg, _, _),
\+ argument(ValueAtom, _, _),
!,
atom_concat('-', Name, Arg),
(
catch(atom_number(ValueAtom, ValueNumber), _, fail)
->
Value = ValueNumber
;
Value = ValueAtom
),
arglist_namevaluelist(Tail1, Tail2).
arglist_namevaluelist([Arg|_], _) :-
argument(Arg, _, _),
!,
throw(error('Missing value for argument', context(arglist_namevaluelist/2, Arg))).
arglist_namevaluelist([Arg|_], _) :-
throw(error('Illegal argument', context(arglist_namevaluelist/2, Arg))).
%% read_file(+InputListIn:list, -InputListOut:list) is det.
%
% @param InputListIn is a list of APE parameters
% @param InputListOut is a modified list of APE parameters
%
read_file(InputListIn, [text=AceText | InputListOut]) :-
select(file=AceFile, InputListIn, InputListOut),
!,
filename_to_filecontent(AceFile, AceText).
read_file(InputList, InputList) :-
member(text=_, InputList),
!.
read_file(InputList, [text=AceText | InputList]) :-
prompt(_, ''),
read_stream_to_codes(user_input, AceTextCodes),
atom_codes(AceText, AceTextCodes).
%% read_ulex(+InputListIn:list, -InputListOut:list) is det.
%
% Stores the user lexicon in a local (temporary) file.
% Modifies the list of APE parameters to include the name
% of the local file.
%
% @param InputListIn is a list of APE parameters
% @param InputListOut is a modified list of APE parameters
%
read_ulex(InputListIn, [ulextext=UlexText | InputListOut]) :-
select(ulexfile=UlexFile, InputListIn, InputListOut),
!,
filename_to_filecontent(UlexFile, UlexText).
read_ulex(InputList, InputList).
% Note: we use: set_stream(In, encoding(utf8))
% This makes characters travel correctly through the socket.
% There might be other (better) solutions though.
:- use_module(library(streampool)).
:- style_check(-singleton).
port(2766). % ape (0xACE)
%port(2767). % ape-alpha (0xACF)
%port(2768). % ape-old (0xAD0)
server :-
port(Port),
server(Port).
server(Port) :-
get_time_formatted(Time),
format(user_error, "~w: Starting a socket interface for APE at port ~w ...~n", [Time, Port]),
tcp_socket(Socket),
tcp_bind(Socket, Port),
tcp_listen(Socket, 5),
tcp_open_socket(Socket, In, _Out),
add_stream_to_pool(In, accept(Socket)),
stream_pool_main_loop.
accept(Socket) :-
tcp_accept(Socket, Slave, Peer),
tcp_open_socket(Slave, In, Out),
set_utf8_encoding(In),
set_utf8_encoding(Out),
add_stream_to_pool(In, client(In, Out, Peer)).
client(In, Out, _Peer) :-
catch(
client_x(In, Out, _Peer),
CatchType,
(
format(user_error, "~w~n", [CatchType]),
(is_stream(In) -> close(In), delete_stream_from_pool(In) ; true),
(is_stream(Out) -> close(Out) ; true)
)
).
% NB: 'APESERVERSTREAMEND' on a separate line marks the end of the stream.
% @bug should we use write_canonical/2 here?
client_x(In, Out, _Peer) :-
% We suppress warnings of the atom being longer than 5 lines.
% This declaration seems to have effect only here.
style_check(-atom),
read(In, ClientRequest),
close(In),
(
ClientRequest = get(I)
->
get_ape_results_timelimit(I, O, 20), % 20 seconds timelimit
format(Out, '~w~nAPESERVERSTREAMEND~n', [O])
;
format(Out, 'fail~nAPESERVERSTREAMEND~n')
),
close(Out),
delete_stream_from_pool(In).
/**
The HTTP interface relies on SWI-Prolog HTTP support.
@see http://www.swi-prolog.org/packages/http.html
Note that the number of workers is set to 1 (default is 5).
Multiple workers would share the same assert/retract space and we do not want that
because APE is not completely thread-safe.
A better solution would be to make APE thread-safe and let the user decide on
the command-line on the number of workers (because the best-performing
number depends on the number of processor cores).
Setting the number of workers to 1 seems to make the server unresponsive at times,
thus lowering the timeout from the default 60 sec to 5 sec.
At Prolog prompt, stop the server by:
==
?- http_stop_server(8000, []).
==
Note that http_stop_server/2 does not make the port immediately available,
you have to wait a few (two?) minutes. Observe it with 'netstat -na'.
*/
:- use_module(library('http/thread_httpd')).
:- use_module(library('http/http_dispatch')).
:- use_module(library('http/http_parameters')).
:- use_module(library('http/http_client')).
:- use_module(library('http/http_cors')).
:- use_module(library('http/http_files')).
% Configure the port.
http_port(8000).
% Allow any origin.
% TODO: make configurable on the command-line
:- set_setting(http:cors, [*]).
% Configure the www root.
:- http_handler('/', ape, []).
%:- http_handler('/ape/', ape, []).
:-
getenv('APE_HTTPSERVER_FILES_PATH', Path),
getenv('APE_HTTPSERVER_FILES_DIR', Dir),
http_handler(Path, http_reply_from_files(Dir, []), [prefix]);
true.
% Configure the APE webservice parameters.
% @bug unify this with the main argument description
% Note that while 'text' and 'file' are optional, at least one of them must be present.
% It is probably impossible to say this declaratively.
parameters([
text(_, [optional(true)]),
file(_, [optional(true)]),
ulextext(_, [optional(true)]),
ulexfile(_, [optional(true)]),
noclex(_, [oneof([on, off]), optional(true)]),
guess(_, [oneof([on, off]), optional(true)]),
% ulexreload(_, [optional(true)]), % @tbd
uri(_, [default('http://attempto.ifi.uzh.ch/ontologies/owlswrl/test')]),
cinput(_, [oneof([on, off]), optional(true)]), % @bug deprecated
cdrs(_, [oneof([on, off]), optional(true)]),
cdrsxml(_, [oneof([on, off]), optional(true)]),
cdrspp(_, [oneof([on, off]), optional(true)]),
cdrshtml(_, [oneof([on, off]), optional(true)]),
cparaphrase(_, [oneof([on, off]), optional(true)]),
cparaphrase1(_, [oneof([on, off]), optional(true)]),
cparaphrase2(_, [oneof([on, off]), optional(true)]),
ctokens(_, [oneof([on, off]), optional(true)]),
csentences(_, [oneof([on, off]), optional(true)]),
csyntax(_, [oneof([on, off]), optional(true)]),
csyntaxpp(_, [oneof([on, off]), optional(true)]),
csyntaxd(_, [oneof([on, off]), optional(true)]),
csyntaxdpp(_, [oneof([on, off]), optional(true)]),
cowlfss(_, [oneof([on, off]), optional(true)]),
cowlfsspp(_, [oneof([on, off]), optional(true)]),
cowlrdf(_, [oneof([on, off]), optional(true)]),
cowlxml(_, [oneof([on, off]), optional(true)]),
cruleml(_, [oneof([on, off]), optional(true)]),
cfol(_, [oneof([on, off]), optional(true)]),
ctptp(_, [oneof([on, off]), optional(true)]),
solo(_, [oneof([drs, drsxml, drspp, drshtml,
paraphrase, paraphrase1, paraphrase2,
tokens, sentences,
syntax, syntaxpp, syntaxd, syntaxdpp,
owlfss, owlfsspp, owlrdf, owlxml,
ruleml,
fol, pnf, tptp]), optional(true)])
]).
http_server :-
http_port(Port),
http_server(Port).
http_server(Port) :-
get_time_formatted(Time),
format(user_error, "~w: Starting an HTTP interface for APE at port ~w ...~n", [Time, Port]),
http_server(http_dispatch, [port(Port), workers(1), timeout(5)]),
thread_get_message(_),
halt.
%% ape(+Request) is det.
%
% This is the HTTP interface toplevel where input is received
% and results/errors are output.
%
ape(Request) :-
parameters(Parameters),
catch(
(
http_parameters(Request, Parameters),
http_parameters_to_ape_parameters(Parameters, ApeParameters),
get_ape_results_timelimit(ApeParameters, ContentType, Content, 10) % 10 second timelimit
),
Exception,
format_error_for_http(Exception, ContentType, Content)
),
cors_enable(Request, [methods([get])]),
format('Content-type: ~w\r\n\r\n~w', [ContentType, Content]).
%% http_parameters_to_ape_parameters(+HttpParameters:list, -ApeParameters:list) is det.
%
% Converts the parameters that have been instantiated by http_parameters/2
% into the parameters' format that APE accepts. Uninstantiated parameters are filtered out.
%
% @param HttpParameters is the result of http_parameters/2
% @param ApeParameters is the inputlist for get_ape_results_timelimit/4
%
http_parameters_to_ape_parameters([], []).
http_parameters_to_ape_parameters([Parameter | Parameters], Out) :-
arg(1, Parameter, Arg1),
(
nonvar(Arg1)
->
functor(Parameter, Functor, _),
key_value_to_parameter(Functor, Arg1, ApeParameter),
Out = [ApeParameter | ApeParameters]
;
Out = ApeParameters
),
http_parameters_to_ape_parameters(Parameters, ApeParameters).
%% key_value_to_parameter(+Key:atom, +Value:atom, -ApeParameter:term) is det.
%
% Constructs an APE parameter which has a form Key = Value.
% Maps 'file' to 'text', 'ulextext' to 'ulexfile', etc.
%
% Note that for security reasons, the value of 'file' and 'ulexfile' cannot point
% to a local file (e.g. /etc/passwd). Only http-urls are allowed.
% The following example shows an illegal query.
%
%==
% http://attempto.ifi.uzh.ch:8000/?file=/etc/passwd
%==
%
% @param Key is a parameter name
% @param Value is the parameter value
% @param ApeParameter is the corresponding APE parameter
%
key_value_to_parameter(file, HttpUrl, text = FileContent) :-
!,
httpurl_to_filecontent(HttpUrl, FileContent).
key_value_to_parameter(ulexfile, HttpUrl, ulextext = FileContent) :-
!,
httpurl_to_filecontent(HttpUrl, FileContent).
key_value_to_parameter(Key, Value, Key = Value).
%% filename_to_filecontent(+FileName:atom, -FileContent:atom) is det.
%
% Reads the content of a file that is specified by FileName into an atom.
% FileName can be a URL or a regular file name. In case of URLs, only the http-prefix
% is allowed. The content of the file is expected to be encoded in UTF-8.
%
% @param FileName is the name (possibly a URL) of a file
% @param FileContent is the content (as an atom) of the file
%
filename_to_filecontent(FileName, FileContent) :-
(
is_http_url(FileName)
->
httpurl_to_filecontent(FileName, FileContent)
;
read_file_to_codes(FileName, Codes, [encoding(utf8)]),
atom_codes(FileContent, Codes)
).
%% httpurl_to_filecontent(+HttpUrl:atom, -FileContent:atom) is det.
%
% Makes an HTTP GET request to a http-resource.
%
% @param HttpUrl is a URL (expected to start as 'http://')
% @param FileContent is the content (as an atom) of the resource
% @throws socket_error (and other exceptions by http_get/3)
% @throws 'HTTP request failed' in case HTTP status code was not 'ok' (200)
%
% @bug Check if http_get expects UTF-8 or can handle other encodings as well
% @bug Make sure that http_get/3 never accesses local files (e.g. that it does not support 'file://')
%
httpurl_to_filecontent(HttpUrl, FileContent) :-
http_get(HttpUrl, FileContent, [ user_agent('ape.exe (http://attempto.ifi.uzh.ch)'), reply_header(ReplyHeader) ]),
memberchk(status(Code, Message), ReplyHeader),
(
Code = ok
->
true
;
with_output_to(atom(MessageAtom), format("~s: ~w", [Message, HttpUrl])),
throw(error('HTTP request failed', context(httpurl_to_filecontent/2, MessageAtom)))
).
%% is_http_url(+Atom:atom) is det.
%
% Tests is an atom is an HTTP URL.
% The test is quite naive.
%
% @param Atom is an atom to be tested.
%
% @bug Make use of SWI library url.pl
% @bug What is faster for substring matching sub_atom/5 or concat_atom/2,
% e.g. concat_atom(['http://', _], FileName)
%
is_http_url(Atom) :-
sub_atom(Atom, 0, 7, _, 'http://').
%% format_error_for_terminal(+Exception:term) is det.
%
% Pretty-prints the exception term for the terminal.
%
% @param Exception is the exception term, usually in the form
% error(Formal, context(Module:Name/Arity, Message))
%
format_error_for_terminal(error(Formal, context(Predicate, Message))) :-
!,
format_message(Message, FMessage),
format(user_error, "ERROR: ~w: ~w: ~w~n", [Formal, Predicate, FMessage]).
format_error_for_terminal(Error) :-
format(user_error, "ERROR: ~w~n", [Error]).
%% format_error_for_http(+Exception:term, -ContentType:atom, -Content:term) is det.
%
% Generates an error message from the exception term.
%
% @param Exception is the exception term, usually in the form
% error(Formal, context(Module:Name/Arity, Message))
% @param ContentType is the content type that message is formatted into, e.g. text/xml
% @param Content is the formatted error message
%
% @tbd return the same error messages as apews.perl
% @bug not XML-safe
%
format_error_for_http(error(Formal, context(Predicate, Message)), 'text/xml', Xml) :-
!,
functor(Formal, Name, _),
format_message(Message, FMessage),
with_output_to(atom(Xml), format("~w: ~w: ~w", [Name, Formal, Predicate, FMessage])).
format_error_for_http(Error, 'text/xml', Xml) :-
with_output_to(atom(Xml), format("~w", [Error])).
%% format_message(+Message:term, -FormattedMessage:atom)
%
% Formats the message that sometimes comes with the
% exception term. Often this message is unbound. The
% purpose of this rule is to return an empty atom if this
% is the case.
format_message(Message, '') :-
var(Message),
!.
format_message(Message, Message).
%% set_utf8_encoding(+Stream)
%
% Sets the encoding of the given stream to UTF-8. For some unknown reason, an error is sometimes
% thrown under Windows XP when calling set_stream/2, and this error is catched here.
set_utf8_encoding(Stream) :-
catch(
set_stream(Stream, encoding(utf8)),
_,
true
).
%% get_time_formatted(-FormattedTimestamp)
%
% Generates a timestamp for the current time
%
get_time_formatted(FormattedTimestamp) :-
get_time(Timestamp),
format_time(atom(FormattedTimestamp), '%F %T%z', Timestamp).