View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2023, University of Amsterdam
    7                              VU University Amsterdam
    8                              SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(dcg_basics,
   38          [ white//0,                   % <white inside line>
   39            whites//0,                  % <white inside line>*
   40            blank//0,                   % <blank>
   41            blanks//0,                  % <blank>*
   42            nonblank//1,                % <nonblank>
   43            nonblanks//1,               % <nonblank>* --> chars         (long)
   44            blanks_to_nl//0,            % [space,tab,ret]*nl
   45            string//1,                  % <any>* -->chars               (short)
   46            string_without//2,          % Exclude, -->chars             (long)
   47                                        % Characters
   48            alpha_to_lower//1,          % Get lower|upper, return lower
   49                                        % Decimal numbers
   50            digits//1,                  % [0-9]* -->chars
   51            digit//1,                   % [0-9] --> char
   52            integer//1,                 % [+-][0-9]+ --> integer
   53            float//1,                   % [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   54            number//1,                  % integer | float
   55                                        % Hexadecimal numbers
   56            xdigits//1,                 % [0-9A-Fa-f]* --> 0-15*
   57            xdigit//1,                  % [0-9A-Fa-f] --> 0-15
   58            xinteger//1,                % [0-9A-Fa-f]+ --> integer
   59
   60            prolog_var_name//1,         % Read a Prolog variable name
   61            csym//1,                    % Read a C symbol
   62
   63            eol//0,                     % End of line
   64            eos//0,                     % Test end of input.
   65            remainder//1,               % -List
   66
   67                                        % generation (TBD)
   68            atom//1                     % generate atom
   69          ]).   70:- use_module(library(lists)).   71:- use_module(library(error)).   72
   73
   74/** <module> Various general DCG utilities
   75
   76This library provides various commonly  used   DCG  primitives acting on
   77list  of  character  *codes*.  Character   classification  is  based  on
   78code_type/2.
   79
   80This module started its life as  library(http/dcg_basics) to support the
   81HTTP protocol. Since then, it was increasingly  used in code that has no
   82relation to HTTP and therefore  this  library   was  moved  to  the core
   83library.
   84
   85@tbd    This is just a starting point. We need a comprehensive set of
   86        generally useful DCG primitives.
   87*/
   88
   89%!  string_without(+EndCodes, -Codes)// is det.
   90%
   91%   Take as many codes from the input  until the next character code
   92%   appears in the list EndCodes.  The   terminating  code itself is
   93%   left on the input.  Typical  use  is   to  read  upto  a defined
   94%   delimiter such as a newline  or   other  reserved character. For
   95%   example:
   96%
   97%       ==
   98%           ...,
   99%           string_without("\n", RestOfLine)
  100%       ==
  101%
  102%   @arg EndCodes is a list of character codes.
  103%   @see string//1.
  104
  105string_without(End, Codes) -->
  106    { string(End),
  107      !,
  108      string_codes(End, EndCodes)
  109    },
  110    list_string_without(EndCodes, Codes).
  111string_without(End, Codes) -->
  112    list_string_without(End, Codes).
  113
  114list_string_without(Not, [C|T]) -->
  115    [C],
  116    { \+ memberchk(C, Not)
  117    },
  118    !,
  119    list_string_without(Not, T).
  120list_string_without(_, []) -->
  121    [].
  122
  123%!  string(-Codes)// is nondet.
  124%
  125%   Take as few as possible tokens from the input, taking one more
  126%   each time on backtracking. This code is normally followed by a
  127%   test for a delimiter.  For example:
  128%
  129%   ==
  130%   upto_colon(Atom) -->
  131%           string(Codes), ":", !,
  132%           { atom_codes(Atom, Codes) }.
  133%   ==
  134%
  135%   @see string_without//2.
  136
  137string([]) -->
  138    [].
  139string([H|T]) -->
  140    [H],
  141    string(T).
  142
  143%!  blanks// is det.
  144%
  145%   Skip zero or more white-space characters.
  146
  147blanks -->
  148    blank,
  149    !,
  150    blanks.
  151blanks -->
  152    [].
  153
  154%!  blank// is semidet.
  155%
  156%   Take next =space= character from input. Space characters include
  157%   newline.
  158%
  159%   @see white//0
  160
  161blank -->
  162    [C],
  163    { nonvar(C),
  164      code_type(C, space)
  165    }.
  166
  167%!  nonblanks(-Codes)// is det.
  168%
  169%   Take all =graph= characters
  170
  171nonblanks([H|T]) -->
  172    [H],
  173    { code_type(H, graph)
  174    },
  175    !,
  176    nonblanks(T).
  177nonblanks([]) -->
  178    [].
  179
  180%!  nonblank(-Code)// is semidet.
  181%
  182%   Code is the next non-blank (=graph=) character.
  183
  184nonblank(H) -->
  185    [H],
  186    { code_type(H, graph)
  187    }.
  188
  189%!  blanks_to_nl// is semidet.
  190%
  191%   Take a sequence of blank//0 codes if blanks are followed by a
  192%   newline or end of the input.
  193
  194blanks_to_nl -->
  195    "\n",
  196    !.
  197blanks_to_nl -->
  198    blank,
  199    !,
  200    blanks_to_nl.
  201blanks_to_nl -->
  202    eos.
  203
  204%!  whites// is det.
  205%
  206%   Skip white space _inside_ a line.
  207%
  208%   @see blanks//0 also skips newlines.
  209
  210whites -->
  211    white,
  212    !,
  213    whites.
  214whites -->
  215    [].
  216
  217%!  white// is semidet.
  218%
  219%   Take next =white= character from input. White characters do
  220%   _not_ include newline.
  221
  222white -->
  223    [C],
  224    { nonvar(C),
  225      code_type(C, white)
  226    }.
  227
  228
  229                 /*******************************
  230                 *       CHARACTER STUFF        *
  231                 *******************************/
  232
  233%!  alpha_to_lower(?C)// is semidet.
  234%
  235%   Read a letter (class  =alpha=)  and   return  it  as a lowercase
  236%   letter. If C is instantiated and the  DCG list is already bound,
  237%   C must be =lower= and matches both a lower and uppercase letter.
  238%   If the output list is unbound, its first element is bound to C.
  239%   For example:
  240%
  241%     ==
  242%     ?- alpha_to_lower(0'a, `AB`, R).
  243%     R = [66].
  244%     ?- alpha_to_lower(C, `AB`, R).
  245%     C = 97, R = [66].
  246%     ?- alpha_to_lower(0'a, L, R).
  247%     L = [97|R].
  248%     ==
  249
  250alpha_to_lower(L) -->
  251    [C],
  252    {   nonvar(C)
  253    ->  code_type(C, alpha),
  254        code_type(C, to_upper(L))
  255    ;   L = C
  256    }.
  257
  258
  259                 /*******************************
  260                 *            NUMBERS           *
  261                 *******************************/
  262
  263%!  digits(?Chars)// is det.
  264%!  digit(?Char)// is det.
  265%!  integer(?Integer)// is det.
  266%
  267%   Number processing. The predicate  digits//1   matches a possibly
  268%   empty set of digits,  digit//1  processes   a  single  digit and
  269%   integer processes an  optional  sign   followed  by  a non-empty
  270%   sequence of digits into an integer.
  271
  272digits([H|T]) -->
  273    digit(H),
  274    !,
  275    digits(T).
  276digits([]) -->
  277    [].
  278
  279digit(C) -->
  280    [C],
  281    { code_type(C, digit)
  282    }.
  283
  284integer(I, Head, Tail) :-
  285    nonvar(I),
  286    !,
  287    format(codes(Head, Tail), '~d', [I]).
  288integer(I) -->
  289    int_codes(Codes),
  290    { number_codes(I, Codes)
  291    }.
  292
  293int_codes([C,D0|D]) -->
  294    sign(C),
  295    !,
  296    digit(D0),
  297    digits(D).
  298int_codes([D0|D]) -->
  299    digit(D0),
  300    digits(D).
  301
  302
  303%!  float(?Float)// is det.
  304%
  305%   Process a floating  point  number.   The  actual  conversion  is
  306%   controlled by number_codes/2.
  307
  308float(F, Head, Tail) :-
  309    float(F),
  310    !,
  311    with_output_to(codes(Head, Tail), write(F)).
  312float(F) -->
  313    number(F),
  314    { float(F) }.
  315
  316%!  number(+Number)// is det.
  317%!  number(-Number)// is semidet.
  318%
  319%   Generate extract a number. Handles   both  integers and floating
  320%   point numbers.
  321
  322number(N, Head, Tail) :-
  323    number(N),
  324    !,
  325    format(codes(Head, Tail), '~w', N).
  326number(N) -->
  327    { var(N)
  328    },
  329    !,
  330    int_codes(I),
  331    (   dot,
  332        digit(DF0),
  333        digits(DF)
  334    ->  {F = [0'., DF0|DF]}
  335    ;   {F = []}
  336    ),
  337    (   exp
  338    ->  int_codes(DI),
  339        {E=[0'e|DI]}
  340    ;   {E = []}
  341    ),
  342    { append([I, F, E], Codes),
  343      number_codes(N, Codes)
  344    }.
  345number(N) -->
  346    { type_error(number, N) }.
  347
  348sign(0'-) --> "-".
  349sign(0'+) --> "+".
  350
  351dot --> ".".
  352
  353exp --> "e".
  354exp --> "E".
  355
  356                 /*******************************
  357                 *          HEX NUMBERS         *
  358                 *******************************/
  359
  360%!  xinteger(+Integer)// is det.
  361%!  xinteger(-Integer)// is semidet.
  362%
  363%   Generate or extract an integer from   a  sequence of hexadecimal
  364%   digits. Hexadecimal characters include both  uppercase (A-F) and
  365%   lowercase (a-f) letters. The value may   be  preceded by  a sign
  366%   (+/-)
  367
  368xinteger(Val, Head, Tail) :-
  369    integer(Val),
  370    !,
  371    format(codes(Head, Tail), '~16r', [Val]).
  372xinteger(Val) -->
  373    sign(C),
  374    !,
  375    xdigit(D0),
  376    xdigits(D),
  377    { mkval([D0|D], 16, Val0),
  378      (   C == 0'-
  379      ->  Val is -Val0
  380      ;   Val = Val0
  381      )
  382    }.
  383xinteger(Val) -->
  384    xdigit(D0),
  385    xdigits(D),
  386    { mkval([D0|D], 16, Val)
  387    }.
  388
  389%!  xdigit(-Weight)// is semidet.
  390%
  391%   True if the next code is a  hexdecimal digit with Weight. Weight
  392%   is  between  0  and  15.  Hexadecimal  characters  include  both
  393%   uppercase (A-F) and lowercase (a-f) letters.
  394
  395xdigit(D) -->
  396    [C],
  397    { code_type(C, xdigit(D))
  398    }.
  399
  400%!  xdigits(-WeightList)// is det.
  401%
  402%   List of weights of a sequence   of hexadecimal codes. WeightList
  403%   may be empty. Hexadecimal  characters   include  both  uppercase
  404%   (A-F) and lowercase (a-f) letters.
  405
  406xdigits([D0|D]) -->
  407    xdigit(D0),
  408    !,
  409    xdigits(D).
  410xdigits([]) -->
  411    [].
  412
  413mkval([W0|Weights], Base, Val) :-
  414    mkval(Weights, Base, W0, Val).
  415
  416mkval([], _, W, W).
  417mkval([H|T], Base, W0, W) :-
  418    W1 is W0*Base+H,
  419    mkval(T, Base, W1, W).
  420
  421
  422                 /*******************************
  423                 *         END-OF-STRING        *
  424                 *******************************/
  425
  426%!  eol//
  427%
  428%   Matches end-of-line. Matching \r\n, \n or end of input (eos//0).
  429
  430eol --> "\n", !.
  431eol --> "\r\n", !.
  432eol --> eos.
  433
  434%!  eos//
  435%
  436%   Matches  end-of-input.  The  implementation    behaves   as  the
  437%   following portable implementation:
  438%
  439%     ==
  440%     eos --> call(eos_).
  441%     eos_([], []).
  442%     ==
  443%
  444%   @tbd    This is a difficult concept and violates the _context free_
  445%           property of DCGs.  Explain the exact problems.
  446
  447eos([], []).
  448
  449%!  remainder(-List)//
  450%
  451%   Unify List with the remainder of the input.
  452
  453remainder(List, List, []).
  454
  455
  456                 /*******************************
  457                 *         PROLOG SYNTAX                *
  458                 *******************************/
  459
  460%!  prolog_var_name(-Name:atom)// is semidet.
  461%
  462%   Matches a Prolog variable name. Primarily  intended to deal with
  463%   quasi quotations that embed Prolog variables.
  464
  465prolog_var_name(Name) -->
  466    [C0], { code_type(C0, prolog_var_start) },
  467    !,
  468    prolog_id_cont(CL),
  469    { atom_codes(Name, [C0|CL]) }.
  470
  471prolog_id_cont([H|T]) -->
  472    [H], { code_type(H, prolog_identifier_continue) },
  473    !,
  474    prolog_id_cont(T).
  475prolog_id_cont([]) --> "".
  476
  477
  478                 /*******************************
  479                 *          IDENTIFIERS         *
  480                 *******************************/
  481
  482%!  csym(?Symbol:atom)// is semidet.
  483%
  484%   Recognise a C symbol according to   the  `csymf` and `csym` code
  485%   type classification provided by the C library.
  486
  487csym(Name, Head, Tail) :-
  488    nonvar(Name),
  489    format(codes(Head, Tail), '~w', [Name]).
  490csym(Name) -->
  491    [F], {code_type(F, csymf)},
  492    csyms(Rest),
  493    { atom_codes(Name, [F|Rest]) }.
  494
  495csyms([H|T]) -->
  496    [H], {code_type(H, csym)},
  497    !,
  498    csyms(T).
  499csyms([]) -->
  500    "".
  501
  502
  503                 /*******************************
  504                 *           GENERATION         *
  505                 *******************************/
  506
  507%!  atom(++Atom)// is det.
  508%
  509%   Generate codes of Atom.  Current implementation uses write/1,
  510%   dealing with any Prolog term.  Atom must be ground though.
  511
  512atom(Atom, Head, Tail) :-
  513    must_be(ground, Atom),
  514    format(codes(Head, Tail), '~w', [Atom])