View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2008-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(pure_input,
   37          [ phrase_from_file/2,         % :Grammar, +File
   38            phrase_from_file/3,         % :Grammar, +File, +Options
   39            phrase_from_stream/2,       % :Grammar, +Stream
   40            stream_to_lazy_list/2,      % :Stream -List
   41
   42            syntax_error//1,            % +ErrorTerm
   43                                        % Low level interface
   44            lazy_list_location//1,      % -Location
   45            lazy_list_character_count//1 % -CharacterCount
   46          ]).   47:- autoload(library(error),[type_error/2,permission_error/3]).   48
   49
   50:- set_prolog_flag(generate_debug_info, false).

Pure Input from files and streams

This module is part of pio.pl, dealing with pure input: processing input streams from the outside world using pure predicates, notably grammar rules (DCG). Using pure predicates makes non-deterministic processing of input much simpler.

Pure input uses attributed variables to read input from the external source into a list on demand. The overhead of lazy reading is more than compensated for by using block reads based on read_pending_codes/3.

Ulrich Neumerkel came up with the idea to use coroutining for creating a lazy list. His implementation repositioned the file to deal with re-reading that can be necessary on backtracking. The current implementation uses destructive assignment together with more low-level attribute handling to realise pure input on any (buffered) stream.

To be done
- Provide support for alternative input readers, e.g. reading terms, tokens, etc. */
   73:- predicate_options(phrase_from_file/3, 3,
   74                     [ pass_to(system:open/4, 4)
   75                     ]).
 phrase_from_file(:Grammar, +File) is nondet
Process the content of File using the DCG rule Grammar. The space usage of this mechanism depends on the length of the not committed part of Grammar. Committed parts of the temporary list are reclaimed by the garbage collector, while the list is extended on demand due to unification of the attributed tail variable. Below is an example that counts the number of times a string appears in a file. The library dcg/basics provides string//1 matching an arbitrary string and remainder//1 which matches the remainder of the input without parsing.
:- use_module(library(dcg/basics)).

file_contains(File, Pattern) :-
        phrase_from_file(match(Pattern), File).

match(Pattern) -->
        string(_),
        string(Pattern),
        remainder(_).

match_count(File, Pattern, Count) :-
        aggregate_all(count, file_contains(File, Pattern), Count).

This can be called as (note that the pattern must be a string (code list)):

?- match_count('pure_input.pl', `file`, Count).
  111:- meta_predicate
  112    phrase_from_file(//, +),
  113    phrase_from_file(//, +, +),
  114    phrase_from_stream(//, +).  115
  116phrase_from_file(Grammar, File) :-
  117    phrase_from_file(Grammar, File, []).
 phrase_from_file(:Grammar, +File, +Options) is nondet
As phrase_from_file/2, providing additional Options. Options are passed to open/4.
  124phrase_from_file(Grammar, File, Options) :-
  125    setup_call_cleanup(
  126        open(File, read, In, Options),
  127        phrase_from_stream(Grammar, In),
  128        close(In)).
 phrase_from_stream(:Grammar, +Stream)
Run Grammer against the character codes on Stream. Stream must be buffered.
  135phrase_from_stream(Grammar, In) :-
  136    stream_to_lazy_list(In, List),
  137    phrase(Grammar, List).
 syntax_error(+Error)//
Throw the syntax error Error at the current location of the input. This predicate is designed to be called from the handler of phrase_from_file/3.
throws
- error(syntax_error(Error), Location)
  147syntax_error(Error) -->
  148    lazy_list_location(Location),
  149    { throw(error(syntax_error(Error), Location))
  150    }.
 lazy_list_location(-Location)// is det
Determine current (error) location in a lazy list. True when Location is an (error) location term that represents the current location in the DCG list.
Arguments:
Location- is a term file(Name, Line, LinePos, CharNo) or stream(Stream, Line, LinePos, CharNo) if no file is associated to the stream RestLazyList. Finally, if the Lazy list is fully materialized (ends in []), Location is unified with end_of_file-CharCount.
See also
- lazy_list_character_count//1 only provides the character count.
  166lazy_list_location(Location, Here, Here) :-
  167    lazy_list_location(Here, Location).
  168
  169lazy_list_location(Here, Location) :-
  170    '$skip_list'(Skipped, Here, Tail),
  171    (   attvar(Tail)
  172    ->  get_attr(Tail, pure_input, State),
  173        State = lazy_input(Stream, PrevPos, Pos, _),
  174        Details = [Line, LinePos, CharNo],
  175        (   stream_property(Stream, file_name(File))
  176        ->  PosParts = [file, File|Details]
  177        ;   PosParts = [stream, Stream|Details]
  178        ),
  179        Location =.. PosParts,
  180        (   PrevPos == (-)                  % nothing is read.
  181        ->  Line = 1, LinePos = 0, CharNo = 0
  182        ;   stream_position_data(char_count, Pos, EndRecordCharNo),
  183            CharNo is EndRecordCharNo - Skipped,
  184            set_stream_position(Stream, PrevPos),
  185            stream_position_data(char_count, PrevPos, StartRecordCharNo),
  186            Skip is CharNo-StartRecordCharNo,
  187            forall(between(1, Skip, _), get_code(Stream, _)),
  188            stream_property(Stream, position(ErrorPos)),
  189            stream_position_data(line_count, ErrorPos, Line),
  190            stream_position_data(line_position, ErrorPos, LinePos)
  191        )
  192    ;   Tail == []
  193    ->  Location = end_of_file-Skipped
  194    ;   type_error(lazy_list, Here)
  195    ).
 lazy_list_character_count(-CharCount)//
True when CharCount is the current character count in the Lazy list. The character count is computed by finding the distance to the next frozen tail of the lazy list. CharCount is one of:
See also
- lazy_list_location//1 provides full details of the location for error reporting.
  210lazy_list_character_count(Location, Here, Here) :-
  211    lazy_list_character_count(Here, Location).
  212
  213lazy_list_character_count(Here, CharNo) :-
  214    '$skip_list'(Skipped, Here, Tail),
  215    (   attvar(Tail)
  216    ->  get_attr(Tail, pure_input, State),
  217        arg(3, State, Pos),
  218        stream_position_data(char_count, Pos, EndRecordCharNo),
  219        CharNo is EndRecordCharNo - Skipped
  220    ;   Tail == []
  221    ->  CharNo = end_of_file-Skipped
  222    ;   type_error(lazy_list, Here)
  223    ).
 stream_to_lazy_list(+Stream, -List) is det
Create a lazy list representing the character codes in Stream. List is a partial list ending in an attributed variable. Unifying this variable reads the next block of data. The block is stored with the attribute value such that there is no need to re-read it.
Compatibility
- Unlike the previous version of this predicate this version does not require a repositionable stream. It does require a buffer size of at least the maximum number of bytes of a multi-byte sequence (6).
  239stream_to_lazy_list(Stream, List) :-
  240    (   stream_property(Stream, buffer(false))
  241    ->  permission_error(create, lazy_list, Stream)
  242    ;   true
  243    ),
  244    stream_to_lazy_list(Stream, -, List).
  245
  246stream_to_lazy_list(Stream, PrevPos, List) :-
  247    stream_property(Stream, position(Pos)),
  248    put_attr(List, pure_input, lazy_input(Stream, PrevPos, Pos, _)).
  249
  250attr_unify_hook(State, Value) :-
  251    '$notrace'(attr_unify_hook_ndebug(State, Value)).
  252
  253attr_unify_hook_ndebug(State, Value) :-
  254    State = lazy_input(Stream, _PrevPos, Pos, Read),
  255    (   var(Read)
  256    ->  fill_buffer(Stream),
  257        read_pending_codes(Stream, NewList, Tail),
  258        (   Tail == []
  259        ->  nb_setarg(4, State, []),
  260            Value = []
  261        ;   stream_to_lazy_list(Stream, Pos, Tail),
  262            nb_linkarg(4, State, NewList),
  263            Value = NewList
  264        )
  265    ;   Value = Read
  266    )