View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2000-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(sgml,
   38          [ load_html/3,                % +Input, -DOM, +Options
   39            load_xml/3,                 % +Input, -DOM, +Options
   40            load_sgml/3,                % +Input, -DOM, +Options
   41
   42            load_sgml_file/2,           % +File, -ListOfContent
   43            load_xml_file/2,            % +File, -ListOfContent
   44            load_html_file/2,           % +File, -Document
   45
   46            load_structure/3,           % +File, -Term, +Options
   47
   48            load_dtd/2,                 % +DTD, +File
   49            load_dtd/3,                 % +DTD, +File, +Options
   50            dtd/2,                      % +Type, -DTD
   51            dtd_property/2,             % +DTD, ?Property
   52
   53            new_dtd/2,                  % +Doctype, -DTD
   54            free_dtd/1,                 % +DTD
   55            open_dtd/3,                 % +DTD, +Options, -Stream
   56
   57            new_sgml_parser/2,          % -Parser, +Options
   58            free_sgml_parser/1,         % +Parser
   59            set_sgml_parser/2,          % +Parser, +Options
   60            get_sgml_parser/2,          % +Parser, +Options
   61            sgml_parse/2,               % +Parser, +Options
   62
   63            sgml_register_catalog_file/2, % +File, +StartOrEnd
   64
   65            xml_quote_attribute/3,      % +In, -Quoted, +Encoding
   66            xml_quote_cdata/3,          % +In, -Quoted, +Encoding
   67            xml_quote_attribute/2,      % +In, -Quoted
   68            xml_quote_cdata/2,          % +In, -Quoted
   69            xml_name/1,                 % +In
   70            xml_name/2,                 % +In, +Encoding
   71
   72            xsd_number_string/2,        % ?Number, ?String
   73            xsd_time_string/3,          % ?Term, ?Type, ?String
   74
   75            xml_basechar/1,             % +Code
   76            xml_ideographic/1,          % +Code
   77            xml_combining_char/1,       % +Code
   78            xml_digit/1,                % +Code
   79            xml_extender/1,             % +Code
   80
   81            iri_xml_namespace/2,        % +IRI, -Namespace
   82            iri_xml_namespace/3,        % +IRI, -Namespace, -LocalName
   83            xml_is_dom/1                % +Term
   84          ]).   85:- autoload(library(error),[instantiation_error/1]).   86:- autoload(library(iostream),[open_any/5,close_any/1]).   87:- autoload(library(lists),[member/2,selectchk/3]).   88:- autoload(library(option),[select_option/3,merge_options/3]).   89
   90:- meta_predicate
   91    load_structure(+, -, :),
   92    load_html(+, -, :),
   93    load_xml(+, -, :),
   94    load_sgml(+, -, :).   95
   96:- predicate_options(load_structure/3, 3,
   97                     [ charpos(integer),
   98                       cdata(oneof([atom,string])),
   99                       defaults(boolean),
  100                       dialect(oneof([html,html4,html5,sgml,xhtml,xhtml5,xml,xmlns])),
  101                       doctype(atom),
  102                       dtd(any),
  103                       encoding(oneof(['iso-8859-1', 'utf-8', 'us-ascii'])),
  104                       entity(atom,atom),
  105                       keep_prefix(boolean),
  106                       file(atom),
  107                       line(integer),
  108                       offset(integer),
  109                       number(oneof([token,integer])),
  110                       qualify_attributes(boolean),
  111                       shorttag(boolean),
  112                       case_sensitive_attributes(boolean),
  113                       case_preserving_attributes(boolean),
  114                       system_entities(boolean),
  115                       max_memory(integer),
  116                       space(oneof([sgml,preserve,default,remove,strict])),
  117                       xmlns(atom),
  118                       xmlns(atom,atom),
  119                       pass_to(sgml_parse/2, 2)
  120                     ]).  121:- predicate_options(load_html/3, 3,
  122                     [ pass_to(load_structure/3, 3)
  123                     ]).  124:- predicate_options(load_xml/3, 3,
  125                     [ pass_to(load_structure/3, 3)
  126                     ]).  127:- predicate_options(load_sgml/3, 3,
  128                     [ pass_to(load_structure/3, 3)
  129                     ]).  130:- predicate_options(load_dtd/3, 3,
  131                     [ dialect(oneof([sgml,xml,xmlns])),
  132                       pass_to(open/4, 4)
  133                     ]).  134:- predicate_options(sgml_parse/2, 2,
  135                     [ call(oneof([begin,end,cdata,pi,decl,error,xmlns,urlns]),
  136                            callable),
  137                       cdata(oneof([atom,string])),
  138                       content_length(integer),
  139                       document(-any),
  140                       max_errors(integer),
  141                       parse(oneof([file,element,content,declaration,input])),
  142                       source(any),
  143                       syntax_errors(oneof([quiet,print,style])),
  144                       xml_no_ns(oneof([error,quiet]))
  145                     ]).  146:- predicate_options(new_sgml_parser/2, 2,
  147                     [ dtd(any)
  148                     ]).

SGML, XML and HTML parser

This library allows you to parse SGML, XML and HTML data into a Prolog data structure. The library defines several families of predicates:

High-level predicates
Most users will only use load_html/3, load_xml/3 or load_sgml/3 to parse arbitrary input into a DOM structure. These predicates all call load_structure/3, which provides more options and may be used for processing non-standard documents.

The DOM structure can be used by library(xpath) to extract information from the document.

The low-level parser
The actual parser is written in C and consists of two parts: one for processing DTD (Document Type Definitions) and one for parsing data. The data can either be parsed to a Prolog (DOM) term or the parser can perform callbacks for the DOM events.
Utility predicates
Finally, this library provides prmitives for classifying characters and strings according to the XML specification such as xml_name/1 to verify whether an atom is a valid XML name (identifier). It also provides primitives to quote attributes and CDATA elements. */
  178:- multifile user:file_search_path/2.  179:- dynamic   user:file_search_path/2.  180
  181user:file_search_path(dtd, '.').
  182user:file_search_path(dtd, swi('library/DTD')).
  183
  184sgml_register_catalog_file(File, Location) :-
  185    prolog_to_os_filename(File, OsFile),
  186    '_sgml_register_catalog_file'(OsFile, Location).
  187
  188:- use_foreign_library(foreign(sgml2pl)).  189
  190register_catalog(Base) :-
  191    absolute_file_name(dtd(Base),
  192                           [ extensions([soc]),
  193                             access(read),
  194                             file_errors(fail)
  195                           ],
  196                           SocFile),
  197    sgml_register_catalog_file(SocFile, end).
  198
  199:- initialization
  200    ignore(register_catalog('HTML4')).  201
  202
  203                 /*******************************
  204                 *         DTD HANDLING         *
  205                 *******************************/
  206
  207/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  208Note that concurrent access to DTD objects  is not allowed, and hence we
  209will allocate and destroy them in each   thread.  Possibibly it would be
  210nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
  211diagnosed to mess with the entity resolution by Fabien Todescato.
  212- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  213
  214:- thread_local
  215    current_dtd/2.  216:- volatile
  217    current_dtd/2.  218:- thread_local
  219    registered_cleanup/0.  220:- volatile
  221    registered_cleanup/0.  222
  223:- multifile
  224    dtd_alias/2.  225
  226:- create_prolog_flag(html_dialect, html5, [type(atom)]).  227
  228dtd_alias(html4, 'HTML4').
  229dtd_alias(html5, 'HTML5').
  230dtd_alias(html,  DTD) :-
  231    current_prolog_flag(html_dialect, Dialect),
  232    dtd_alias(Dialect, DTD).
 dtd(+Type, -DTD) is det
DTD is a DTD object created from the file dtd(Type). Loaded DTD objects are cached. Note that DTD objects may not be shared between threads. Therefore, dtd/2 maintains the pool of DTD objects using a thread_local predicate. DTD objects are destroyed if a thread terminates.
Errors
- existence_error(source_sink, dtd(Type))
  244dtd(Type, DTD) :-
  245    current_dtd(Type, DTD),
  246    !.
  247dtd(Type, DTD) :-
  248    new_dtd(Type, DTD),
  249    (   dtd_alias(Type, Base)
  250    ->  true
  251    ;   Base = Type
  252    ),
  253    absolute_file_name(dtd(Base),
  254                       [ extensions([dtd]),
  255                         access(read)
  256                       ], DtdFile),
  257    load_dtd(DTD, DtdFile),
  258    register_cleanup,
  259    asserta(current_dtd(Type, DTD)).
 load_dtd(+DTD, +DtdFile, +Options)
Load DtdFile into a DTD. Defined options are:
dialect(+Dialect)
Dialect to use (xml, xmlns, sgml)
encoding(+Encoding)
Encoding of DTD file
Arguments:
DTD- is a fresh DTD object, normally created using new_dtd/1.
  274load_dtd(DTD, DtdFile) :-
  275    load_dtd(DTD, DtdFile, []).
  276load_dtd(DTD, DtdFile, Options) :-
  277    sgml_open_options(sgml:Options, OpenOptions, sgml:DTDOptions),
  278    setup_call_cleanup(
  279        open_dtd(DTD, DTDOptions, DtdOut),
  280        setup_call_cleanup(
  281            open(DtdFile, read, DtdIn, OpenOptions),
  282            copy_stream_data(DtdIn, DtdOut),
  283            close(DtdIn)),
  284        close(DtdOut)).
 destroy_dtds
Destroy DTDs cached by this thread as they will become unreachable anyway.
  291:- public
  292    destroy_dtds/0.                     % called through register_cleanup/0
  293
  294destroy_dtds :-
  295    (   current_dtd(_Type, DTD),
  296        free_dtd(DTD),
  297        fail
  298    ;   true
  299    ).
 register_cleanup
Register cleanup of DTDs created for this thread.
  305register_cleanup :-
  306    registered_cleanup,
  307    !.
  308register_cleanup :-
  309    (   thread_self(main)
  310    ->  at_halt(destroy_dtds)
  311    ;   current_prolog_flag(threads, true)
  312    ->  prolog_listen(this_thread_exit, destroy_dtds)
  313    ;   true
  314    ),
  315    assert(registered_cleanup).
  316
  317
  318                 /*******************************
  319                 *          EXAMINE DTD         *
  320                 *******************************/
  321
  322prop(doctype(_), _).
  323prop(elements(_), _).
  324prop(entities(_), _).
  325prop(notations(_), _).
  326prop(entity(E, _), DTD) :-
  327    (   nonvar(E)
  328    ->  true
  329    ;   '$dtd_property'(DTD, entities(EL)),
  330        member(E, EL)
  331    ).
  332prop(element(E, _, _), DTD) :-
  333    (   nonvar(E)
  334    ->  true
  335    ;   '$dtd_property'(DTD, elements(EL)),
  336        member(E, EL)
  337    ).
  338prop(attributes(E, _), DTD) :-
  339    (   nonvar(E)
  340    ->  true
  341    ;   '$dtd_property'(DTD, elements(EL)),
  342        member(E, EL)
  343    ).
  344prop(attribute(E, A, _, _), DTD) :-
  345    (   nonvar(E)
  346    ->  true
  347    ;   '$dtd_property'(DTD, elements(EL)),
  348        member(E, EL)
  349    ),
  350    (   nonvar(A)
  351    ->  true
  352    ;   '$dtd_property'(DTD, attributes(E, AL)),
  353        member(A, AL)
  354    ).
  355prop(notation(N, _), DTD) :-
  356    (   nonvar(N)
  357    ->  true
  358    ;   '$dtd_property'(DTD, notations(NL)),
  359        member(N, NL)
  360    ).
  361
  362dtd_property(DTD, Prop) :-
  363    prop(Prop, DTD),
  364    '$dtd_property'(DTD, Prop).
  365
  366
  367                 /*******************************
  368                 *             SGML             *
  369                 *******************************/
 load_structure(+Source, -ListOfContent, :Options) is det
Parse Source and return the resulting structure in ListOfContent. Source is handed to open_any/5, which allows for processing an extensible set of input sources.

A proper XML document contains only a single toplevel element whose name matches the document type. Nevertheless, a list is returned for consistency with the representation of element content.

The encoding(+Encoding) option is treated special for compatibility reasons:

  393load_structure(Spec, DOM, Options) :-
  394    sgml_open_options(Options, OpenOptions, SGMLOptions),
  395    setup_call_cleanup(
  396        open_any(Spec, read, In, Close, OpenOptions),
  397        load_structure_from_stream(In, DOM, SGMLOptions),
  398        close_any(Close)).
  399
  400sgml_open_options(Options, OpenOptions, SGMLOptions) :-
  401    Options = M:Plain,
  402    (   select_option(encoding(Encoding), Plain, NoEnc)
  403    ->  (   sgml_encoding(Encoding)
  404        ->  merge_options(NoEnc, [type(binary)], OpenOptions),
  405            SGMLOptions = Options
  406        ;   OpenOptions = Plain,
  407            SGMLOptions = M:NoEnc
  408        )
  409    ;   merge_options(Plain, [type(binary)], OpenOptions),
  410        SGMLOptions = Options
  411    ).
  412
  413sgml_encoding(Enc) :-
  414    downcase_atom(Enc, Enc1),
  415    sgml_encoding_l(Enc1).
  416
  417sgml_encoding_l('iso-8859-1').
  418sgml_encoding_l('us-ascii').
  419sgml_encoding_l('utf-8').
  420sgml_encoding_l('utf8').
  421sgml_encoding_l('iso_latin_1').
  422sgml_encoding_l('ascii').
  423
  424load_structure_from_stream(In, Term, M:Options) :-
  425    (   select_option(dtd(DTD), Options, Options1)
  426    ->  ExplicitDTD = true
  427    ;   ExplicitDTD = false,
  428        Options1 = Options
  429    ),
  430    move_front(Options1, dialect(_), Options2), % dialect sets defaults
  431    setup_call_cleanup(
  432        new_sgml_parser(Parser,
  433                        [ dtd(DTD)
  434                        ]),
  435        parse(Parser, M:Options2, TermRead, In),
  436        free_sgml_parser(Parser)),
  437    (   ExplicitDTD == true
  438    ->  (   DTD = dtd(_, DocType),
  439            dtd_property(DTD, doctype(DocType))
  440        ->  true
  441        ;   true
  442        )
  443    ;   free_dtd(DTD)
  444    ),
  445    Term = TermRead.
  446
  447move_front(Options0, Opt, Options) :-
  448    selectchk(Opt, Options0, Options1),
  449    !,
  450    Options = [Opt|Options1].
  451move_front(Options, _, Options).
  452
  453
  454parse(Parser, M:Options, Document, In) :-
  455    set_parser_options(Options, Parser, In, Options1),
  456    parser_meta_options(Options1, M, Options2),
  457    set_input_location(Parser, In),
  458    sgml_parse(Parser,
  459               [ document(Document),
  460                 source(In)
  461               | Options2
  462               ]).
  463
  464set_parser_options([], _, _, []).
  465set_parser_options([H|T], Parser, In, Rest) :-
  466    (   set_parser_option(H, Parser, In)
  467    ->  set_parser_options(T, Parser, In, Rest)
  468    ;   Rest = [H|R2],
  469        set_parser_options(T, Parser, In, R2)
  470    ).
  471
  472set_parser_option(Var, _Parser, _In) :-
  473    var(Var),
  474    !,
  475    instantiation_error(Var).
  476set_parser_option(Option, Parser, _) :-
  477    def_entity(Option, Parser),
  478    !.
  479set_parser_option(offset(Offset), _Parser, In) :-
  480    !,
  481    seek(In, Offset, bof, _).
  482set_parser_option(Option, Parser, _In) :-
  483    parser_option(Option),
  484    !,
  485    set_sgml_parser(Parser, Option).
  486set_parser_option(Name=Value, Parser, In) :-
  487    Option =.. [Name,Value],
  488    set_parser_option(Option, Parser, In).
  489
  490
  491parser_option(dialect(_)).
  492parser_option(shorttag(_)).
  493parser_option(case_sensitive_attributes(_)).
  494parser_option(case_preserving_attributes(_)).
  495parser_option(system_entities(_)).
  496parser_option(max_memory(_)).
  497parser_option(file(_)).
  498parser_option(line(_)).
  499parser_option(space(_)).
  500parser_option(number(_)).
  501parser_option(defaults(_)).
  502parser_option(doctype(_)).
  503parser_option(qualify_attributes(_)).
  504parser_option(encoding(_)).
  505parser_option(keep_prefix(_)).
  506
  507
  508def_entity(entity(Name, Value), Parser) :-
  509    get_sgml_parser(Parser, dtd(DTD)),
  510    xml_quote_attribute(Value, QValue),
  511    setup_call_cleanup(open_dtd(DTD, [], Stream),
  512                       format(Stream, '<!ENTITY ~w "~w">~n',
  513                              [Name, QValue]),
  514                       close(Stream)).
  515def_entity(xmlns(URI), Parser) :-
  516    set_sgml_parser(Parser, xmlns(URI)).
  517def_entity(xmlns(NS, URI), Parser) :-
  518    set_sgml_parser(Parser, xmlns(NS, URI)).
 parser_meta_options(+Options0, +Module, -Options)
Qualify meta-calling options to the parser.
  524parser_meta_options([], _, []).
  525parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :-
  526    !,
  527    parser_meta_options(T0, M, T).
  528parser_meta_options([H|T0], M, [H|T]) :-
  529    parser_meta_options(T0, M, T).
 set_input_location(+Parser, +In:stream) is det
Set the input location if this was not set explicitly
  536set_input_location(Parser, _In) :-
  537    get_sgml_parser(Parser, file(_)),
  538    !.
  539set_input_location(Parser, In) :-
  540    stream_property(In, file_name(File)),
  541    !,
  542    set_sgml_parser(Parser, file(File)),
  543    stream_property(In, position(Pos)),
  544    set_sgml_parser(Parser, position(Pos)).
  545set_input_location(_, _).
  546
  547                 /*******************************
  548                 *           UTILITIES          *
  549                 *******************************/
 load_sgml_file(+File, -DOM) is det
Load SGML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_sgml/3.
  558load_sgml_file(File, Term) :-
  559    load_sgml(File, Term, []).
 load_xml_file(+File, -DOM) is det
Load XML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_xml/3.
  568load_xml_file(File, Term) :-
  569    load_xml(File, Term, []).
 load_html_file(+File, -DOM) is det
Load HTML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_html/3.
  578load_html_file(File, DOM) :-
  579    load_html(File, DOM, []).
 load_html(+Input, -DOM, +Options) is det
Load HTML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
dtd(DTD)
Pass the DTD for HTML as obtained using dtd(html, DTD).
dialect(Dialect)
Current dialect from the Prolog flag html_dialect
max_errors(-1)
syntax_errors(quiet)
Most HTML encountered in the wild contains errors. Even in the context of errors, the resulting DOM term is often a reasonable guess at the intent of the author.

You may also want to use the library(http/http_open) to support loading from HTTP and HTTPS URLs. For example:

:- use_module(library(http/http_open)).
:- use_module(library(sgml)).

load_html_url(URL, DOM) :-
    load_html(URL, DOM, []).
  608load_html(File, Term, M:Options) :-
  609    current_prolog_flag(html_dialect, Dialect),
  610    dtd(Dialect, DTD),
  611    merge_options(Options,
  612                  [ dtd(DTD),
  613                    dialect(Dialect),
  614                    max_errors(-1),
  615                    syntax_errors(quiet)
  616                  ], Options1),
  617    load_structure(File, Term, M:Options1).
 load_xml(+Input, -DOM, +Options) is det
Load XML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
  627load_xml(Input, DOM, M:Options) :-
  628    merge_options(Options,
  629                  [ dialect(xml)
  630                  ], Options1),
  631    load_structure(Input, DOM, M:Options1).
 load_sgml(+Input, -DOM, +Options) is det
Load SGML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
  641load_sgml(Input, DOM, M:Options) :-
  642    merge_options(Options,
  643                  [ dialect(sgml)
  644                  ], Options1),
  645    load_structure(Input, DOM, M:Options1).
  646
  647
  648
  649                 /*******************************
  650                 *            ENCODING          *
  651                 *******************************/
 xml_quote_attribute(+In, -Quoted) is det
 xml_quote_cdata(+In, -Quoted) is det
Backward compatibility for versions that allow to specify encoding. All characters that cannot fit the encoding are mapped to XML character entities (&#dd;). Using ASCII is the safest value.
  661xml_quote_attribute(In, Quoted) :-
  662    xml_quote_attribute(In, Quoted, ascii).
  663
  664xml_quote_cdata(In, Quoted) :-
  665    xml_quote_cdata(In, Quoted, ascii).
 xml_name(+Atom) is semidet
True if Atom is a valid XML name.
  671xml_name(In) :-
  672    xml_name(In, ascii).
  673
  674
  675                 /*******************************
  676                 *    XML CHARACTER CLASSES     *
  677                 *******************************/
 xml_basechar(+CodeOrChar) is semidet
 xml_ideographic(+CodeOrChar) is semidet
 xml_combining_char(+CodeOrChar) is semidet
 xml_digit(+CodeOrChar) is semidet
 xml_extender(+CodeOrChar) is semidet
XML character classification predicates. Each of these predicates accept both a character (one-character atom) and a code (integer).
See also
- http://www.w3.org/TR/2006/REC-xml-20060816
  692                 /*******************************
  693                 *         TYPE CHECKING        *
  694                 *******************************/
 xml_is_dom(@Term) is semidet
True if term statisfies the structure as returned by load_structure/3 and friends.
  701xml_is_dom(0) :- !, fail.               % catch variables
  702xml_is_dom(List) :-
  703    is_list(List),
  704    !,
  705    xml_is_content_list(List).
  706xml_is_dom(Term) :-
  707    xml_is_element(Term).
  708
  709xml_is_content_list([]).
  710xml_is_content_list([H|T]) :-
  711    xml_is_content(H),
  712    xml_is_content_list(T).
  713
  714xml_is_content(0) :- !, fail.
  715xml_is_content(pi(Pi)) :-
  716    !,
  717    atom(Pi).
  718xml_is_content(CDATA) :-
  719    atom(CDATA),
  720    !.
  721xml_is_content(CDATA) :-
  722    string(CDATA),
  723    !.
  724xml_is_content(Term) :-
  725    xml_is_element(Term).
  726
  727xml_is_element(element(Name, Attributes, Content)) :-
  728    dom_name(Name),
  729    dom_attributes(Attributes),
  730    xml_is_content_list(Content).
  731
  732dom_name(NS:Local) :-
  733    atom(NS),
  734    atom(Local),
  735    !.
  736dom_name(Local) :-
  737    atom(Local).
  738
  739dom_attributes(0) :- !, fail.
  740dom_attributes([]).
  741dom_attributes([H|T]) :-
  742    dom_attribute(H),
  743    dom_attributes(T).
  744
  745dom_attribute(Name=Value) :-
  746    dom_name(Name),
  747    atomic(Value).
  748
  749
  750                 /*******************************
  751                 *            MESSAGES          *
  752                 *******************************/
  753:- multifile
  754    prolog:message/3.  755
  756%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  757
  758prolog:message(sgml(Parser, File, Line, Message)) -->
  759    { get_sgml_parser(Parser, dialect(Dialect))
  760    },
  761    [ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
  762
  763
  764                 /*******************************
  765                 *         XREF SUPPORT         *
  766                 *******************************/
  767
  768:- multifile
  769    prolog:called_by/2.  770
  771prolog:called_by(sgml_parse(_, Options), Called) :-
  772    findall(Meta, meta_call_term(_, Meta, Options), Called).
  773
  774meta_call_term(T, G+N, Options) :-
  775    T = call(Event, G),
  776    pmember(T, Options),
  777    call_params(Event, Term),
  778    functor(Term, _, N).
  779
  780pmember(X, List) :-                     % member for partial lists
  781    nonvar(List),
  782    List = [H|T],
  783    (   X = H
  784    ;   pmember(X, T)
  785    ).
  786
  787call_params(begin, begin(tag,attributes,parser)).
  788call_params(end,   end(tag,parser)).
  789call_params(cdata, cdata(cdata,parser)).
  790call_params(pi,    pi(cdata,parser)).
  791call_params(decl,  decl(cdata,parser)).
  792call_params(error, error(severity,message,parser)).
  793call_params(xmlns, xmlns(namespace,url,parser)).
  794call_params(urlns, urlns(url,url,parser)).
  795
  796                 /*******************************
  797                 *           SANDBOX            *
  798                 *******************************/
  799
  800:- multifile
  801    sandbox:safe_primitive/1,
  802    sandbox:safe_meta_predicate/1.  803
  804sandbox:safe_meta_predicate(sgml:load_structure/3).
  805sandbox:safe_primitive(sgml:dtd(Dialect, _)) :-
  806    dtd_alias(Dialect, _).
  807sandbox:safe_primitive(sgml:xml_quote_attribute(_,_,_)).
  808sandbox:safe_primitive(sgml:xml_quote_cdata(_,_,_)).
  809sandbox:safe_primitive(sgml:xml_name(_,_)).
  810sandbox:safe_primitive(sgml:xml_basechar(_)).
  811sandbox:safe_primitive(sgml:xml_ideographic(_)).
  812sandbox:safe_primitive(sgml:xml_combining_char(_)).
  813sandbox:safe_primitive(sgml:xml_digit(_)).
  814sandbox:safe_primitive(sgml:xml_extender(_)).
  815sandbox:safe_primitive(sgml:iri_xml_namespace(_,_,_)).
  816sandbox:safe_primitive(sgml:xsd_number_string(_,_)).
  817sandbox:safe_primitive(sgml:xsd_time_string(_,_,_))