34
35:- module(c99_tokens,
36 [ c99_tokens//1, 37 c99_token//1 38 ]). 39:- use_module(library(dcg/basics),
40 [ blanks//0, string//1, string_without//2, eos//0]).
70c99_tokens(List) -->
71 c99_token(H), !,
72 ( {skip_token(H)}
73 -> c99_tokens(List)
74 ; {List = [H|T]},
75 c99_tokens(T)
76 ).
77c99_tokens([]) -->
78 blanks.
79
80skip_token(pp(_)).
81
82c99_token(Token) -->
83 blanks,
84 token(Token).
90token(T) --> keyword(T), \+ identifier_cont_char(_).
91token(T) --> identifier(T).
92token(T) --> constant(T).
93token(T) --> string_literal(T).
94token(T) --> pp_line(T).
95token(T) --> punctuator(T).
96token(T) --> header_name(T).
97token(T) --> pp_number(T).
98
99keyword(auto) --> "auto".
100keyword(break) --> "break".
101keyword(case) --> "case".
102keyword(char) --> "char".
103keyword(const) --> "const".
104keyword(continue) --> "continue".
105keyword(default) --> "default".
106keyword(do) --> "do".
107keyword(double) --> "double".
108keyword(else) --> "else".
109keyword(enum) --> "enum".
110keyword(extern) --> "extern".
111keyword(float) --> "float".
112keyword(for) --> "for".
113keyword(goto) --> "goto".
114keyword(if) --> "if".
115keyword(inline) --> "inline".
116keyword(int) --> "int".
117keyword(size_t) --> "size_t". 118keyword(long) --> "long".
119keyword(register) --> "register".
120keyword(restrict) --> "restrict".
121keyword(return) --> "return".
122keyword(short) --> "short".
123keyword(signed) --> "signed".
124keyword(sizeof) --> "sizeof".
125keyword(static) --> "static".
126keyword(struct) --> "struct".
127keyword(switch) --> "switch".
128keyword(typedef) --> "typedef".
129keyword(union) --> "union".
130keyword(unsigned) --> "unsigned".
131keyword(void) --> "void".
132keyword(volatile) --> "volatile".
133keyword(while) --> "while".
134keyword('_Bool') --> "_Bool".
135keyword('_Complex') --> "_Complex".
136keyword('_Imaginary') --> "_Imaginary".
137keyword('_Float128') --> "_Float128". 138keyword('__attribute__') --> "__attribute__". 139keyword('__restrict__') --> "__restrict__".
140keyword('__restrict__') --> "__restrict".
141keyword('__extension__') --> "__extension__".
142keyword(inline) --> "__inline__".
143keyword(inline) --> "__inline".
144keyword('__builtin_va_list') --> "__builtin_va_list".
145keyword('__gnuc_va_list') --> "__gnuc_va_list".
146keyword('__asm__') --> "__asm__".
147keyword('__asm__') --> "__asm".
148keyword('__alignof__') --> "__alignof__".
149keyword('_Nonnull') --> "_Nonnull".
150keyword('_Nullable') --> "_Nullable".
151
152identifier(Id) --> identifier_nondigit(H), identifier_cont(T),
153 {atom_chars(I, [H|T]), Id = id(I)}.
154
155identifier_cont([H|T]) -->
156 identifier_cont_char(H), !,
157 identifier_cont(T).
158identifier_cont([]) --> [].
159
160identifier_cont_char(H) -->
161 identifier_nondigit(H), !.
162identifier_cont_char(H) -->
163 digit(H).
164
165identifier_nondigit(I) --> nondigit(I).
166identifier_nondigit(I) --> universal_character_name(I).
168
169term_expansion((nondigit(_) --> "_"), Clauses) :-
170 findall((nondigit(C) --> S), nondigit_code(C, S), Clauses).
171
172nondigit_code(Char, [C]) :-
173 ( C = 0'_ ; between(0'a,0'z,C) ; between(0'A,0'Z,C) ),
174 char_code(Char, C).
175
176nondigit(_) --> "_".
177
178digit('0') --> "0".
179digit('1') --> "1".
180digit('2') --> "2".
181digit('3') --> "3".
182digit('4') --> "4".
183digit('5') --> "5".
184digit('6') --> "6".
185digit('7') --> "7".
186digit('8') --> "8".
187digit('9') --> "9".
188
189universal_character_name(C) -->
190 "\\u", hex_quad(V),
191 { char_code(C, V) }.
192universal_character_name(C) -->
193 "\\U", hex_quad(V), hex_quad(W),
194 { Code is (V<<32) + W, char_code(C, Code) }.
195
196hex_quad(V) -->
197 hexadecimal_digit(X1),
198 hexadecimal_digit(X2),
199 hexadecimal_digit(X3),
200 hexadecimal_digit(X4),
201 { V is (X1<<12) + (X2<<8) + (X3<<4) + X4 }.
202
203
204constant(C) --> floating_constant(C).
205constant(C) --> integer_constant(C).
206constant(C) --> enumeration_constant(C).
207constant(C) --> character_constant(C).
208
209integer_constant(C) -->
210 decimal_constant(D), opt_integer_suffix(S), {mkic(S,D,C)}.
211integer_constant(C) -->
212 hexadecimal_constant(D), opt_integer_suffix(S), {mkic(S,D,C)}.
213integer_constant(C) -->
214 octal_constant(D), opt_integer_suffix(S), {mkic(S,D,C)}.
215
216mkic(Suffix, Value, Token) :- Token =.. [Suffix,Value].
217
218decimal_constant(D) -->
219 nonzero_digit(D0), digits(DT), { number_chars(D, [D0|DT]) }.
220
221octal_constant(D) -->
222 "0", octal_digits(L), { L == [] -> D = 0 ; number_chars(D, ['0',o|L]) }.
223
224digits([H|T]) --> digit(H), !, digits(T).
225digits([]) --> [].
226
227octal_digits([H|T]) --> octal_digit(H), !, octal_digits(T).
228octal_digits([]) --> [].
229
230hexadecimal_constant(D) -->
231 hexadecimal_prefix,
232 hexadecimal_digits(0, D).
233
234hexadecimal_prefix --> "0x".
235hexadecimal_prefix --> "0X".
236
237hexadecimal_digits(V0, V) -->
238 hexadecimal_digit(D),
239 !,
240 { V1 is V0*16+D },
241 hexadecimal_digits(V1, V).
242hexadecimal_digits(V, V) -->
243 [].
244
245nonzero_digit('1') --> "1".
246nonzero_digit('2') --> "2".
247nonzero_digit('3') --> "3".
248nonzero_digit('4') --> "4".
249nonzero_digit('5') --> "5".
250nonzero_digit('6') --> "6".
251nonzero_digit('7') --> "7".
252nonzero_digit('8') --> "8".
253nonzero_digit('9') --> "9".
254
255octal_digit('0') --> "0".
256octal_digit('1') --> "1".
257octal_digit('2') --> "2".
258octal_digit('3') --> "3".
259octal_digit('4') --> "4".
260octal_digit('5') --> "5".
261octal_digit('6') --> "6".
262octal_digit('7') --> "7".
263
264hexadecimal_digit(0) --> "0".
265hexadecimal_digit(1) --> "1".
266hexadecimal_digit(2) --> "2".
267hexadecimal_digit(3) --> "3".
268hexadecimal_digit(4) --> "4".
269hexadecimal_digit(5) --> "5".
270hexadecimal_digit(6) --> "6".
271hexadecimal_digit(7) --> "7".
272hexadecimal_digit(8) --> "8".
273hexadecimal_digit(9) --> "9".
274hexadecimal_digit(10) --> "a".
275hexadecimal_digit(11) --> "b".
276hexadecimal_digit(12) --> "c".
277hexadecimal_digit(13) --> "d".
278hexadecimal_digit(14) --> "e".
279hexadecimal_digit(15) --> "f".
280hexadecimal_digit(10) --> "A".
281hexadecimal_digit(11) --> "B".
282hexadecimal_digit(12) --> "C".
283hexadecimal_digit(13) --> "D".
284hexadecimal_digit(14) --> "E".
285hexadecimal_digit(15) --> "F".
291opt_integer_suffix(S) --> unsigned_suffix, long_suffix(L), !, {mkuis(L, S)}.
292opt_integer_suffix(S) --> long_suffix(L), unsigned_suffix, !, {mkuis(L, S)}.
293opt_integer_suffix(u) --> unsigned_suffix, !.
294opt_integer_suffix(L) --> long_suffix(L), !.
295opt_integer_suffix(i) --> [].
296
297mkuis(l, ul).
298mkuis(ll, ull).
299
300unsigned_suffix --> "u".
301unsigned_suffix --> "U".
302
303long_suffix(ll) --> "ll".
304long_suffix(ll) --> "LL".
305long_suffix(l) --> "l".
306long_suffix(l) --> "L".
307
308floating_constant(F) --> decimal_floating_constant(F).
309floating_constant(F) --> hexadecimal_floating_constant(F).
310
311decimal_floating_constant(F) -->
312 fractional_constant(FC),
313 opt_exponent_part(E, _),
314 floating_suffix(FS, _),
315 { mkf(FS, FC, E, F) }.
316decimal_floating_constant(F) -->
317 digit_sequence_value(FC),
318 opt_exponent_part(E, Expl),
319 floating_suffix(FS, Expl),
320 { Expl == true,
321 mkf(FS, FC, E, F)
322 }.
323
324hexadecimal_floating_constant(F) -->
325 hexadecimal_prefix,
326 ( "."
327 -> hexadecimal_fractional_part(FC)
328 ; hexadecimal_digits(0, IC),
329 ".",
330 hexadecimal_fractional_part(FCP),
331 { FC is IC+FCP }
332 ),
333 binary_exponent_part(E),
334 floating_suffix(FS, _),
335 { mkf(FS, FC, E, F) }.
336
337mkf(float, FC, E, float(V)) :- V is FC*E.
338mkf(double, FC, E, double(V)) :- V is FC*E.
339
340fractional_constant(FC) -->
341 digit_sequence(DC1), ".",
342 digits(DC2),
343 { DC2 == []
344 -> number_chars(FC, DC1)
345 ; append(DC1, [.|DC2], S),
346 number_chars(FC, S)
347 }.
348
349digit_sequence([D0|DL]) -->
350 digit(D0), digits(DL).
351
352digit_sequence_value(Value) -->
353 digit_sequence(S),
354 { number_chars(Value, S) }.
355
356hexadecimal_fractional_part(V) -->
357 hexadecimal_fractional_part(10, 0, V).
358
359hexadecimal_fractional_part(I, V0, V) -->
360 hexadecimal_digit(D), !,
361 { V1 is V0+D/I,
362 I2 is I/10
363 },
364 hexadecimal_fractional_part(I2, V1, V).
365hexadecimal_fractional_part(_, V, V) --> [].
366
367opt_exponent_part(M, true) -->
368 exp_e, !,
369 sign(S),
370 digit_sequence_value(V),
371 { M is 10**(S*V) }.
372opt_exponent_part(1, _) -->
373 [].
374
375binary_exponent_part(M) -->
376 bin_e,
377 sign(S),
378 digit_sequence_value(V),
379 { M is 10**(S*V) }.
380
381exp_e --> "e".
382exp_e --> "E".
383
384bin_e --> "p".
385bin_e --> "P".
386
387sign(-1) --> "-", !.
388sign(1) --> "+", !.
389sign(1) --> "".
390
391floating_suffix(float, true) --> "f", !.
392floating_suffix(float, true) --> "F", !.
393floating_suffix(double, _) --> "l", !.
394floating_suffix(double, _) --> "L", !.
395floating_suffix(double, _) --> "".
399enumeration_constant(enum_value(ID)) -->
400 identifier(ID).
401
402character_constant(C) -->
403 "'", c_char_sequence(V), "'",
404 { C = char(V) }.
405character_constant(C) -->
406 "L'", c_char_sequence(V), "'",
407 { C = wchar(V) }.
408
409c_char_sequence([H|T]) -->
410 c_char(H),
411 c_char_sequence_z(T).
412
413c_char_sequence_z([H|T]) --> c_char(H), !, c_char_sequence_z(T).
414c_char_sequence_z([]) --> "".
415
416c_char(C) --> [C], { \+ no_c_char(C) }, !.
417c_char(C) --> escape_sequence(C).
418
419no_c_char(0'\').
420no_c_char(0'\\).
421no_c_char(0'\n).
422
423escape_sequence(C) --> simple_escape_sequence(C).
424escape_sequence(C) --> octal_escape_sequence(C).
425escape_sequence(C) --> hexadecimal_escape_sequence(C).
426escape_sequence(C) --> universal_character_name(C).
427
428simple_escape_sequence(0'\') --> "\\'".
429simple_escape_sequence(0'\") --> "\\\"".
430simple_escape_sequence(0'?) --> "\\?".
431simple_escape_sequence(0'\a) --> "\\a".
432simple_escape_sequence(0'\b) --> "\\b".
433simple_escape_sequence(0'\f) --> "\\f".
434simple_escape_sequence(0'\n) --> "\\n".
435simple_escape_sequence(0'\r) --> "\\r".
436simple_escape_sequence(0'\t) --> "\\t".
437simple_escape_sequence(0'\v) --> "\\v".
438
439octal_escape_sequence(C) -->
440 "\\",
441 octal_digit(D0),
442 ( octal_digit(D1)
443 -> ( octal_digit(D2)
444 -> {number_chars(C, ['0',o,D0,D1,D2])}
445 ; {number_chars(C, ['0',o,D0,D1])}
446 )
447 ; {number_chars(C, ['0',o,D0])}
448 ).
449
450hexadecimal_escape_sequence(C) -->
451 "\\x",
452 hexadecimal_digits(0, Code),
453 { char_code(C, Code) }.
454
455string_literal(S) -->
456 "\"", s_char_sequence(Chars), "\"",
457 sstring_literal_cont(More),
458 { mkstring(Chars, More, Str),
459 S = str(Str)
460 }.
461string_literal(S) -->
462 "L\"", s_char_sequence(Chars), "\"",
463 wstring_literal_cont(More),
464 { mkstring(Chars, More, Str),
465 S = wstr(Str)
466 }.
467
468s_char_sequence([H|T]) --> s_char(H), !, s_char_sequence(T).
469s_char_sequence([]) --> "".
470
471s_char(C) --> [C], { \+ no_s_char(C) }, !.
472s_char(C) --> escape_sequence(C).
473
474no_s_char(0'\").
475no_s_char(0'\\).
476no_s_char(0'\n).
477
478sstring_literal_cont([H|T]) -->
479 blanks,
480 "\"", s_char_sequence(Chars), "\"", !,
481 { string_codes(H, Chars) },
482 sstring_literal_cont(T).
483sstring_literal_cont([]) --> "".
484
485wstring_literal_cont([H|T]) -->
486 blanks,
487 "L\"", s_char_sequence(Chars), "\"", !,
488 { string_codes(H, Chars) },
489 wstring_literal_cont(T).
490wstring_literal_cont([]) --> "".
491
492mkstring(Chars, [], Str) :- !,
493 string_codes(Str, Chars).
494mkstring(Chars, More, Str) :-
495 string_codes(Str0, Chars),
496 atomics_to_string([Str0|More], Str).
502punctuator('[') --> "[".
503punctuator(']') --> "]".
504punctuator('(') --> "(".
505punctuator(')') --> ")".
506punctuator('{') --> "{".
507punctuator('}') --> "}".
508punctuator('...') --> "...".
509punctuator('.') --> ".".
510punctuator('->') --> "->".
511punctuator('++') --> "++".
512punctuator('--') --> "--".
513punctuator('&=') --> "&=".
514punctuator('&&') --> "&&".
515punctuator('&') --> "&".
516punctuator('*=') --> "*=".
517punctuator('*') --> "*".
518punctuator('+=') --> "+=".
519punctuator('+') --> "+".
520punctuator('-=') --> "-=".
521punctuator('-') --> "-".
522punctuator('~') --> "~".
523punctuator('!=') --> "!=".
524punctuator('!') --> "!".
525punctuator('/=') --> "/=".
526punctuator('/') --> "/".
527punctuator('%=') --> "%=".
528punctuator('%>') --> "%>".
529punctuator('%:%:') --> "%:%:".
530punctuator('%:') --> "%:".
531punctuator('%') --> "%".
532punctuator('<<=') --> "<<=".
533punctuator('>>=') --> ">>=".
534punctuator('<<') --> "<<".
535punctuator('>>') --> ">>".
536punctuator('<:') --> "<:".
537punctuator('<=') --> "<=".
538punctuator('<%') --> "<%".
539punctuator('<') --> "<".
540punctuator(':>') --> ":>".
541punctuator('>=') --> ">=".
542punctuator('>') --> ">".
543punctuator('?') --> "?".
544punctuator(':') --> ":".
545punctuator(';') --> ";".
546punctuator('==') --> "==".
547punctuator('=') --> "=".
548punctuator(',') --> ",".
549punctuator('##') --> "##".
550punctuator('#') --> "#".
551punctuator('^=') --> "^=".
552punctuator('^') --> "^".
553punctuator('||') --> "||".
554punctuator('|=') --> "|=".
555punctuator('|') --> "|".
556
557check_punctuators :- 558 findall(P, punctuator(P,_,_), L),
559 ( append(_, [P|T], L),
560 member(P2, T),
561 sub_atom(P2, 0, _, _, P),
562 format('~p should be before ~p~n', [P2, P]),
563 fail
564 ; true
565 ).
566
567:- check_punctuators. 568
(Header) -->
570 "<", string_without(">\n", Codes), ">", !,
571 { atom_codes(Name, Codes),
572 Header = header(ab, Name)
573 }.
574header_name(Header) -->
575 "\"", string_without("\"\n", Codes), "\"", !,
576 { atom_codes(Name, Codes),
577 Header = header(dq, Name)
578 }.
579
580pp_number(PP) -->
581 digits(D1a),
582 ( ".",
583 digits(D2a)
584 -> {append(D1a, [.|D2a], D1)}
585 ; {D1 = D1a}
586 ),
587 ( identifier_nondigit(NDa)
588 -> {D2 = [NDa]}
589 ; {D2 = []}
590 ),
591 pp_e(D3),
592 ( "."
593 -> {D4 = [.]}
594 ; {D4 = []}
595 ),
596 { append([D1,D2,D3,D4], D),
597 string_chars(S, D),
598 PP = pp(S)
599 }.
600
601pp_e([C1|C2]) -->
602 pp_ee(C1),
603 pp_sign(C2).
604
605pp_ee(e) --> "e".
606pp_ee('E') --> "e".
607pp_ee(p) --> "p".
608pp_ee('P') --> "P".
609
610pp_sign('-') --> "-".
611pp_sign('+') --> "+".
612
613pp_line(pp(Line)) -->
614 "#", string(Codes), eol, !,
615 { string_codes(Line, [0'#|Codes]) }.
616
617eol --> "\n", !.
618eol --> eos