1:- module(
2 xml_ext,
3 [
4 call_on_xml/3, % +In, +Names, :Goal_1
5 'Char'//1, % +Version
6 'Char'//2, % +Version, ?Code
7 load_xml/2, % +Source, -Dom
8 xml_encoding/2, % +In, -Encoding
9 xml_file_encoding/2 % +File, -Encoding
10 ]
11).
20:- use_module(library(apply)). 21:- use_module(library(pure_input)). 22:- use_module(library(sgml)). 23:- use_module(library(yall)). 24 25:- use_module(library(atom_ext)). 26:- use_module(library(dcg)). 27:- use_module(library(file_ext)). 28:- use_module(library(stream_ext)). 29 30:- meta_predicate 31 call_on_xml( , , ).
42call_on_xml(In, Names, Goal_1) :- 43 b_setval(xml_stream_record_names, Names), 44 b_setval(xml_stream_goal, Goal_1), 45 setup_call_cleanup( 46 new_sgml_parser(Parser, []), 47 ( 48 maplist(set_sgml_parser(Parser), [dialect(xml),space(remove)]), 49 sgml_parse(Parser, [call(begin,on_begin_),source(In)]) 50 ), 51 free_sgml_parser(Parser) 52 ). 53 54on_begin_(Name, Attr, Parser) :- 55 b_getval(xml_stream_goal, Goal_1), 56 b_getval(xml_stream_record_names, Names), 57 memberchk(Name, Names), !, 58 sgml_parse(Parser, [document(Dom),parse(content)]), 59 ( call(Goal_1, [element(Name,Attr,Dom)]) 60 -> true 61 ; print_message(warning, xml_error(element(Name,Attr,Dom))) 62 ).
Char ::= #x9 // Horizontal tab | #xA // Line feed | #xD // Carriage return | [#x20-#xD7FF] // Space, punctuation, numbers, letters | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
Avoid comapatibility characters [Unicode, section 2.3]. Avoid the following characters (control characters, permanently undefined Unicode characters):
[#x7F-#x84] // Delete, ... [#x86-#x9F] [#xFDD0-#xFDEF], [#x1FFFE-#x1FFFF] [#x2FFFE-#x2FFFF] [#x3FFFE-#x3FFFF] [#x4FFFE-#x4FFFF] [#x5FFFE-#x5FFFF] [#x6FFFE-#x6FFFF] [#x7FFFE-#x7FFFF] [#x8FFFE-#x8FFFF] [#x9FFFE-#x9FFFF] [#xAFFFE-#xAFFFF] [#xBFFFE-#xBFFFF] [#xCFFFE-#xCFFFF] [#xDFFFE-#xDFFFF] [#xEFFFE-#xEFFFF] [#xFFFFE-#xFFFFF] [#x10FFFE-#x10FFFF]
Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
122'Char'(Version) --> 123 'Char'(Version, _). 124 125 126'Char'(version(1,0), 0x9) --> [0x9]. 127'Char'(version(1,0), 0xA) --> [0xA]. 128'Char'(version(1,0), 0xD) --> [0xD]. 129'Char'(version(1,0), Code) --> dcg_between(0x20, 0xD7FF, Code). 130'Char'(version(1,0), Code) --> dcg_between(0xE000, 0xFFFD, Code). 131'Char'(version(1,0), Code) --> dcg_between(0x10000, 0x10FFFF, Code). 132'Char'(version(1,1), Code) --> dcg_between(0x1, 0xD7FF, Code). 133'Char'(version(1,1), Code) --> dcg_between(0xE000, 0xFFFD, Code). 134'Char'(version(1,1), Code) --> dcg_between(0x10000, 0x10FFFF, Code).
140load_xml(Source, Dom) :-
141 load_xml(Source, Dom, [space(remove)]).
147xml_encoding(In, Encoding) :- 148 phrase_from_stream(xml_encoding(Encoding0), In), 149 nonvar(Encoding0), 150 stream_ext:clean_encoding_(Encoding0, Encoding). 151 152xml_encoding(Encoding) --> 153 'XMLDecl'(_,Encoding,_), 154 remainder(_).
160xml_file_encoding(File, Encoding) :- 161 read_from_file(File, {Encoding}/[In0]>>xml_encoding(In0, Encoding)). 162 163 164 165 166 167% GRAMMAR %
EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
compat XML 1.0.5 [81] compat XML 1.1.2 [81]
178'EncName'(Encoding) --> 179 alpha(H), 180 'enc_name_char*'(T), 181 {atom_codes(Encoding, [H|T])}. 182 183'enc_name_char*'([H|T]) --> 184 enc_name_char(H), !, 185 'enc_name_char*'(T). 186'enc_name_char*'([]) --> "". 187 188enc_name_char(Code) --> alphanum(Code). 189enc_name_char(0'.) --> ".". 190enc_name_char(0'_) --> "_". 191enc_name_char(0'-) --> "-".
EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
204'EncodingDecl'(Encoding) -->
205 'S+',
206 "encoding",
207 'Eq',
208 ( "\""
209 -> 'EncName'(Encoding),
210 must_see_code(0'")%"
211 ; "'"
212 -> 'EncName'(Encoding),
213 must_see_code(0'')
214 ).
Eq ::= S? '=' S?
227'Eq' -->
228 'S*',
229 "=",
230 'S*'.
S ::= ( #x20 | #x9 | #xD | #xA )+ // Any consecutive number of spaces, // carriage returns, line feeds, and // horizontal tabs.
The presence of carriage_return// in the above production is maintained purely for backward compatibility with the First Edition. All `#xD` characters literally present in an XML document are either removed or replaced by line_feed// (i.e., `#xA`) characters before any other processing is done.
250'S' --> [0x20]. 251'S' --> [0x9]. 252'S' --> [0xD]. 253'S' --> [0xA]. 254 255'S+' --> 256 'S', 257 'S*'. 258 259'S*' --> 260 'S', !, 261 'S*'. 262'S*' --> "".
SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
279'SDDecl'(Standalone) --> 280 'S+', 281 "standalone", 282 'Eq', 283 ( "'" 284 -> yesno(Standalone), 285 must_see_code(0'') 286 ; "\"" 287 -> yesno(Standalone), 288 must_see_code(0'")%" 289 ). 290 291yesno(true) -->"yes". 292yesno(false) --> "no".
XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
305'XMLDecl'(Version, Encoding, Standalone) -->
306 "<?xml",
307 'VersionInfo'(Version),
308 ('EncodingDecl'(Encoding) -> "" ; ""),
309 ('SDDecl'(Standalone) -> "" ; ""),
310 'S*',
311 "?>".
VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
324'VersionInfo'(Version) -->
325 'S+',
326 "version",
327 'Eq',
328 ( "'"
329 -> 'VersionNum'(Version),
330 "'"
331 ; "\""
332 -> 'VersionNum'(Version),
333 "\""
334 ).
VersionNum ::= '1.' [0-9]+
VersionNum ::= '1.1'
358'VersionNum'(version(1,Minor)) --> 359 "1.", 360 integer(Minor). 361'VersionNum'(version(1,1)) --> 362 "1.1". 363 364 365 366 367 368% HELPERS %
372must_see_code(Code) -->
373 must_see_code(Code, 'S*')
Extended support for XML
Extends the support for working with XML provided by the SWI-Prolog standard library.
*/