14
15
16:- module(tokens_to_sentences, [
17 tokens_to_sentences/2,
18 tokens_to_paragraphs/2
19 ]). 20
21
22:- use_module('../lexicon/chars', [
23 is_sentence_end_symbol/1
24 ]).
68tokens_to_sentences([], []).
69
70tokens_to_sentences(Tokens, [[^|Sentence]|Sentences]) :-
71 first_sentence(Tokens, Sentence, RestTokens),
72 tokens_to_sentences(RestTokens, Sentences).
85first_sentence([SentenceEndSymbol | RestTokens], [SentenceEndSymbol], RestTokens) :-
86 nonvar(SentenceEndSymbol),
87 is_sentence_end_symbol(SentenceEndSymbol),
88 !.
89
90first_sentence([Token | RestTokens], RestSentence, RestTokens2) :-
91 Token == '<p>',
92 !,
93 first_sentence(RestTokens, RestSentence, RestTokens2).
94
95first_sentence([Token | RestTokens], [Token | RestSentence], RestTokens2) :-
96 first_sentence(RestTokens, RestSentence, RestTokens2).
101tokens_to_paragraphs([], []).
102
103tokens_to_paragraphs(Tokens, [Paragraph|Paragraphs]) :-
104 first_paragraph(Tokens, Paragraph, RestTokens),
105 tokens_to_paragraphs(RestTokens, Paragraphs).
110first_paragraph([Token | RestTokens], [], RestTokens) :-
111 Token == '<p>',
112 !.
113
114first_paragraph([Token | RestTokens], [Token | RestParagraph], RestTokens2) :-
115 first_paragraph(RestTokens, RestParagraph, RestTokens2).
116
117first_paragraph([], [], [])
APE Sentence splitter
Converts a flat list of tokens into a list of sentences, each of which is a list of tokens. Sentences end with one of the three symbols: '.', '?', and '!'.
For example, the following list of tokens:
is converted into the following list of sentences
*/