34
35:- module(c99_tokens,
36 [ c99_tokens//1, 37 c99_token//1 38 ]). 39:- use_module(library(dcg/basics),
40 [ blanks//0, string//1, string_without//2, eos//0]).
70c99_tokens(List) -->
71 c99_token(H), !,
72 ( {skip_token(H)}
73 -> c99_tokens(List)
74 ; {List = [H|T]},
75 c99_tokens(T)
76 ).
77c99_tokens([]) -->
78 blanks.
79
80skip_token(pp(_)).
81
82c99_token(Token) -->
83 blanks,
84 token(Token).
90token(T) --> keyword(T), \+ identifier_cont_char(_).
91token(T) --> identifier(T).
92token(T) --> constant(T).
93token(T) --> string_literal(T).
94token(T) --> pp_line(T).
95token(T) --> punctuator(T).
96token(T) --> header_name(T).
97token(T) --> pp_number(T).
98
99keyword(auto) --> "auto".
100keyword(break) --> "break".
101keyword(case) --> "case".
102keyword(char) --> "char".
103keyword(const) --> "const".
104keyword(continue) --> "continue".
105keyword(default) --> "default".
106keyword(do) --> "do".
107keyword(double) --> "double".
108keyword(else) --> "else".
109keyword(enum) --> "enum".
110keyword(extern) --> "extern".
111keyword(float) --> "float".
112keyword(for) --> "for".
113keyword(goto) --> "goto".
114keyword(if) --> "if".
115keyword(inline) --> "inline".
116keyword(int) --> "int".
117keyword(size_t) --> "size_t". 118keyword(long) --> "long".
119keyword(register) --> "register".
120keyword(restrict) --> "restrict".
121keyword(return) --> "return".
122keyword(short) --> "short".
123keyword(signed) --> "signed".
124keyword(sizeof) --> "sizeof".
125keyword(static) --> "static".
126keyword(struct) --> "struct".
127keyword(switch) --> "switch".
128keyword(typedef) --> "typedef".
129keyword(union) --> "union".
130keyword(unsigned) --> "unsigned".
131keyword(void) --> "void".
132keyword(volatile) --> "volatile".
133keyword(while) --> "while".
134keyword('_Bool') --> "_Bool".
135keyword('_Complex') --> "_Complex".
136keyword('_Imaginary') --> "_Imaginary".
137keyword('__signed__') --> "__signed__".
138keyword('__int128_t') --> "__int128_t".
139keyword('__uint128_t') --> "__uint128_t".
140keyword('_Float16') --> "_Float16". 141keyword('_Float32') --> "_Float32". 142keyword('_Float32x') --> "_Float32x". 143keyword('_Float64') --> "_Float64". 144keyword('_Float64x') --> "_Float64x". 145keyword('_Float80') --> "_Float80". 146keyword('_Float128') --> "_Float128". 147keyword('__attribute__') --> "__attribute__". 148keyword('__restrict__') --> "__restrict__".
149keyword('__restrict__') --> "__restrict".
150keyword('__extension__') --> "__extension__".
151keyword(inline) --> "__inline__".
152keyword(inline) --> "__inline".
153keyword('__builtin_va_list') --> "__builtin_va_list".
154keyword('__gnuc_va_list') --> "__gnuc_va_list".
155keyword('__asm__') --> "__asm__".
156keyword('__asm__') --> "__asm".
157keyword('__alignof__') --> "__alignof__".
158keyword('_Nonnull') --> "_Nonnull".
159keyword('_Nullable') --> "_Nullable".
160
161identifier(Id) --> identifier_nondigit(H), identifier_cont(T),
162 {atom_chars(I, [H|T]), Id = id(I)}.
163
164identifier_cont([H|T]) -->
165 identifier_cont_char(H), !,
166 identifier_cont(T).
167identifier_cont([]) --> [].
168
169identifier_cont_char(H) -->
170 identifier_nondigit(H), !.
171identifier_cont_char(H) -->
172 digit(H).
173
174identifier_nondigit(I) --> nondigit(I).
175identifier_nondigit(I) --> universal_character_name(I).
177
178term_expansion((nondigit(_) --> "_"), Clauses) :-
179 findall((nondigit(C) --> S), nondigit_code(C, S), Clauses).
180
181nondigit_code(Char, [C]) :-
182 ( C = 0'_ ; between(0'a,0'z,C) ; between(0'A,0'Z,C) ),
183 char_code(Char, C).
184
185nondigit(_) --> "_".
186
187digit('0') --> "0".
188digit('1') --> "1".
189digit('2') --> "2".
190digit('3') --> "3".
191digit('4') --> "4".
192digit('5') --> "5".
193digit('6') --> "6".
194digit('7') --> "7".
195digit('8') --> "8".
196digit('9') --> "9".
197
198universal_character_name(C) -->
199 "\\u", hex_quad(V),
200 { char_code(C, V) }.
201universal_character_name(C) -->
202 "\\U", hex_quad(V), hex_quad(W),
203 { Code is (V<<32) + W, char_code(C, Code) }.
204
205hex_quad(V) -->
206 hexadecimal_digit(X1),
207 hexadecimal_digit(X2),
208 hexadecimal_digit(X3),
209 hexadecimal_digit(X4),
210 { V is (X1<<12) + (X2<<8) + (X3<<4) + X4 }.
211
212
213constant(C) --> floating_constant(C).
214constant(C) --> integer_constant(C).
215constant(C) --> enumeration_constant(C).
216constant(C) --> character_constant(C).
217
218integer_constant(C) -->
219 decimal_constant(D), opt_integer_suffix(S), {mkic(S,D,C)}.
220integer_constant(C) -->
221 hexadecimal_constant(D), opt_integer_suffix(S), {mkic(S,D,C)}.
222integer_constant(C) -->
223 octal_constant(D), opt_integer_suffix(S), {mkic(S,D,C)}.
224
225mkic(Suffix, Value, Token) :- Token =.. [Suffix,Value].
226
227decimal_constant(D) -->
228 nonzero_digit(D0), digits(DT), { number_chars(D, [D0|DT]) }.
229
230octal_constant(D) -->
231 "0", octal_digits(L), { L == [] -> D = 0 ; number_chars(D, ['0',o|L]) }.
232
233digits([H|T]) --> digit(H), !, digits(T).
234digits([]) --> [].
235
236octal_digits([H|T]) --> octal_digit(H), !, octal_digits(T).
237octal_digits([]) --> [].
238
239hexadecimal_constant(D) -->
240 hexadecimal_prefix,
241 hexadecimal_digits(0, D).
242
243hexadecimal_prefix --> "0x".
244hexadecimal_prefix --> "0X".
245
246hexadecimal_digits(V0, V) -->
247 hexadecimal_digit(D),
248 !,
249 { V1 is V0*16+D },
250 hexadecimal_digits(V1, V).
251hexadecimal_digits(V, V) -->
252 [].
253
254nonzero_digit('1') --> "1".
255nonzero_digit('2') --> "2".
256nonzero_digit('3') --> "3".
257nonzero_digit('4') --> "4".
258nonzero_digit('5') --> "5".
259nonzero_digit('6') --> "6".
260nonzero_digit('7') --> "7".
261nonzero_digit('8') --> "8".
262nonzero_digit('9') --> "9".
263
264octal_digit('0') --> "0".
265octal_digit('1') --> "1".
266octal_digit('2') --> "2".
267octal_digit('3') --> "3".
268octal_digit('4') --> "4".
269octal_digit('5') --> "5".
270octal_digit('6') --> "6".
271octal_digit('7') --> "7".
272
273hexadecimal_digit(0) --> "0".
274hexadecimal_digit(1) --> "1".
275hexadecimal_digit(2) --> "2".
276hexadecimal_digit(3) --> "3".
277hexadecimal_digit(4) --> "4".
278hexadecimal_digit(5) --> "5".
279hexadecimal_digit(6) --> "6".
280hexadecimal_digit(7) --> "7".
281hexadecimal_digit(8) --> "8".
282hexadecimal_digit(9) --> "9".
283hexadecimal_digit(10) --> "a".
284hexadecimal_digit(11) --> "b".
285hexadecimal_digit(12) --> "c".
286hexadecimal_digit(13) --> "d".
287hexadecimal_digit(14) --> "e".
288hexadecimal_digit(15) --> "f".
289hexadecimal_digit(10) --> "A".
290hexadecimal_digit(11) --> "B".
291hexadecimal_digit(12) --> "C".
292hexadecimal_digit(13) --> "D".
293hexadecimal_digit(14) --> "E".
294hexadecimal_digit(15) --> "F".
300opt_integer_suffix(S) --> unsigned_suffix, long_suffix(L), !, {mkuis(L, S)}.
301opt_integer_suffix(S) --> long_suffix(L), unsigned_suffix, !, {mkuis(L, S)}.
302opt_integer_suffix(u) --> unsigned_suffix, !.
303opt_integer_suffix(L) --> long_suffix(L), !.
304opt_integer_suffix(i) --> [].
305
306mkuis(l, ul).
307mkuis(ll, ull).
308
309unsigned_suffix --> "u".
310unsigned_suffix --> "U".
311
312long_suffix(ll) --> "ll".
313long_suffix(ll) --> "LL".
314long_suffix(l) --> "l".
315long_suffix(l) --> "L".
316
317floating_constant(F) --> decimal_floating_constant(F).
318floating_constant(F) --> hexadecimal_floating_constant(F).
319
320decimal_floating_constant(F) -->
321 fractional_constant(FC),
322 opt_exponent_part(E, _),
323 floating_suffix(FS, _),
324 { mkf(FS, FC, E, F) }.
325decimal_floating_constant(F) -->
326 digit_sequence_value(FC),
327 opt_exponent_part(E, Expl),
328 floating_suffix(FS, Expl),
329 { Expl == true,
330 mkf(FS, FC, E, F)
331 }.
332
333hexadecimal_floating_constant(F) -->
334 hexadecimal_prefix,
335 ( "."
336 -> hexadecimal_fractional_part(FC)
337 ; hexadecimal_digits(0, IC),
338 ".",
339 hexadecimal_fractional_part(FCP),
340 { FC is IC+FCP }
341 ),
342 binary_exponent_part(E),
343 floating_suffix(FS, _),
344 { mkf(FS, FC, E, F) }.
345
346mkf(float, FC, E, float(V)) :- V is FC*E.
347mkf(double, FC, E, double(V)) :- V is FC*E.
348
349fractional_constant(FC) -->
350 digit_sequence(DC1), ".",
351 digits(DC2),
352 { DC2 == []
353 -> number_chars(FC, DC1)
354 ; append(DC1, [.|DC2], S),
355 number_chars(FC, S)
356 }.
357
358digit_sequence([D0|DL]) -->
359 digit(D0), digits(DL).
360
361digit_sequence_value(Value) -->
362 digit_sequence(S),
363 { number_chars(Value, S) }.
364
365hexadecimal_fractional_part(V) -->
366 hexadecimal_fractional_part(10, 0, V).
367
368hexadecimal_fractional_part(I, V0, V) -->
369 hexadecimal_digit(D), !,
370 { V1 is V0+D/I,
371 I2 is I/10
372 },
373 hexadecimal_fractional_part(I2, V1, V).
374hexadecimal_fractional_part(_, V, V) --> [].
375
376opt_exponent_part(M, true) -->
377 exp_e, !,
378 sign(S),
379 digit_sequence_value(V),
380 { M is 10**(S*V) }.
381opt_exponent_part(1, _) -->
382 [].
383
384binary_exponent_part(M) -->
385 bin_e,
386 sign(S),
387 digit_sequence_value(V),
388 { M is 10**(S*V) }.
389
390exp_e --> "e".
391exp_e --> "E".
392
393bin_e --> "p".
394bin_e --> "P".
395
396sign(-1) --> "-", !.
397sign(1) --> "+", !.
398sign(1) --> "".
399
400floating_suffix(float, true) --> "f", !.
401floating_suffix(float, true) --> "F", !.
402floating_suffix(double, _) --> "l", !.
403floating_suffix(double, _) --> "L", !.
404floating_suffix(double, _) --> "".
408enumeration_constant(enum_value(ID)) -->
409 identifier(ID).
410
411character_constant(C) -->
412 "'", c_char_sequence(V), "'",
413 { C = char(V) }.
414character_constant(C) -->
415 "L'", c_char_sequence(V), "'",
416 { C = wchar(V) }.
417
418c_char_sequence([H|T]) -->
419 c_char(H),
420 c_char_sequence_z(T).
421
422c_char_sequence_z([H|T]) --> c_char(H), !, c_char_sequence_z(T).
423c_char_sequence_z([]) --> "".
424
425c_char(C) --> [C], { \+ no_c_char(C) }, !.
426c_char(C) --> escape_sequence(C).
427
428no_c_char(0'\').
429no_c_char(0'\\).
430no_c_char(0'\n).
431
432escape_sequence(C) --> simple_escape_sequence(C).
433escape_sequence(C) --> octal_escape_sequence(C).
434escape_sequence(C) --> hexadecimal_escape_sequence(C).
435escape_sequence(C) --> universal_character_name(C).
436
437simple_escape_sequence(0'\') --> "\\'".
438simple_escape_sequence(0'\") --> "\\\"".
439simple_escape_sequence(0'?) --> "\\?".
440simple_escape_sequence(0'\a) --> "\\a".
441simple_escape_sequence(0'\b) --> "\\b".
442simple_escape_sequence(0'\f) --> "\\f".
443simple_escape_sequence(0'\n) --> "\\n".
444simple_escape_sequence(0'\r) --> "\\r".
445simple_escape_sequence(0'\t) --> "\\t".
446simple_escape_sequence(0'\v) --> "\\v".
447
448octal_escape_sequence(C) -->
449 "\\",
450 octal_digit(D0),
451 ( octal_digit(D1)
452 -> ( octal_digit(D2)
453 -> {number_chars(C, ['0',o,D0,D1,D2])}
454 ; {number_chars(C, ['0',o,D0,D1])}
455 )
456 ; {number_chars(C, ['0',o,D0])}
457 ).
458
459hexadecimal_escape_sequence(C) -->
460 "\\x",
461 hexadecimal_digits(0, Code),
462 { char_code(C, Code) }.
463
464string_literal(S) -->
465 "\"", s_char_sequence(Chars), "\"",
466 sstring_literal_cont(More),
467 { mkstring(Chars, More, Str),
468 S = str(Str)
469 }.
470string_literal(S) -->
471 "L\"", s_char_sequence(Chars), "\"",
472 wstring_literal_cont(More),
473 { mkstring(Chars, More, Str),
474 S = wstr(Str)
475 }.
476
477s_char_sequence([H|T]) --> s_char(H), !, s_char_sequence(T).
478s_char_sequence([]) --> "".
479
480s_char(C) --> [C], { \+ no_s_char(C) }, !.
481s_char(C) --> escape_sequence(C).
482
483no_s_char(0'\").
484no_s_char(0'\\).
485no_s_char(0'\n).
486
487sstring_literal_cont([H|T]) -->
488 blanks,
489 "\"", s_char_sequence(Chars), "\"", !,
490 { string_codes(H, Chars) },
491 sstring_literal_cont(T).
492sstring_literal_cont([]) --> "".
493
494wstring_literal_cont([H|T]) -->
495 blanks,
496 "L\"", s_char_sequence(Chars), "\"", !,
497 { string_codes(H, Chars) },
498 wstring_literal_cont(T).
499wstring_literal_cont([]) --> "".
500
501mkstring(Chars, [], Str) :- !,
502 string_codes(Str, Chars).
503mkstring(Chars, More, Str) :-
504 string_codes(Str0, Chars),
505 atomics_to_string([Str0|More], Str).
511punctuator('[') --> "[".
512punctuator(']') --> "]".
513punctuator('(') --> "(".
514punctuator(')') --> ")".
515punctuator('{') --> "{".
516punctuator('}') --> "}".
517punctuator('...') --> "...".
518punctuator('.') --> ".".
519punctuator('->') --> "->".
520punctuator('++') --> "++".
521punctuator('--') --> "--".
522punctuator('&=') --> "&=".
523punctuator('&&') --> "&&".
524punctuator('&') --> "&".
525punctuator('*=') --> "*=".
526punctuator('*') --> "*".
527punctuator('+=') --> "+=".
528punctuator('+') --> "+".
529punctuator('-=') --> "-=".
530punctuator('-') --> "-".
531punctuator('~') --> "~".
532punctuator('!=') --> "!=".
533punctuator('!') --> "!".
534punctuator('/=') --> "/=".
535punctuator('/') --> "/".
536punctuator('%=') --> "%=".
537punctuator('%>') --> "%>".
538punctuator('%:%:') --> "%:%:".
539punctuator('%:') --> "%:".
540punctuator('%') --> "%".
541punctuator('<<=') --> "<<=".
542punctuator('>>=') --> ">>=".
543punctuator('<<') --> "<<".
544punctuator('>>') --> ">>".
545punctuator('<:') --> "<:".
546punctuator('<=') --> "<=".
547punctuator('<%') --> "<%".
548punctuator('<') --> "<".
549punctuator(':>') --> ":>".
550punctuator('>=') --> ">=".
551punctuator('>') --> ">".
552punctuator('?') --> "?".
553punctuator(':') --> ":".
554punctuator(';') --> ";".
555punctuator('==') --> "==".
556punctuator('=') --> "=".
557punctuator(',') --> ",".
558punctuator('##') --> "##".
559punctuator('#') --> "#".
560punctuator('^=') --> "^=".
561punctuator('^') --> "^".
562punctuator('||') --> "||".
563punctuator('|=') --> "|=".
564punctuator('|') --> "|".
565
566check_punctuators :- 567 findall(P, punctuator(P,_,_), L),
568 ( append(_, [P|T], L),
569 member(P2, T),
570 sub_atom(P2, 0, _, _, P),
571 format('~p should be before ~p~n', [P2, P]),
572 fail
573 ; true
574 ).
575
576:- check_punctuators. 577
(Header) -->
579 "<", string_without(">\n", Codes), ">", !,
580 { atom_codes(Name, Codes),
581 Header = header(ab, Name)
582 }.
583header_name(Header) -->
584 "\"", string_without("\"\n", Codes), "\"", !,
585 { atom_codes(Name, Codes),
586 Header = header(dq, Name)
587 }.
588
589pp_number(PP) -->
590 digits(D1a),
591 ( ".",
592 digits(D2a)
593 -> {append(D1a, [.|D2a], D1)}
594 ; {D1 = D1a}
595 ),
596 ( identifier_nondigit(NDa)
597 -> {D2 = [NDa]}
598 ; {D2 = []}
599 ),
600 pp_e(D3),
601 ( "."
602 -> {D4 = [.]}
603 ; {D4 = []}
604 ),
605 { append([D1,D2,D3,D4], D),
606 string_chars(S, D),
607 PP = pp(S)
608 }.
609
610pp_e([C1|C2]) -->
611 pp_ee(C1),
612 pp_sign(C2).
613
614pp_ee(e) --> "e".
615pp_ee('E') --> "e".
616pp_ee(p) --> "p".
617pp_ee('P') --> "P".
618
619pp_sign('-') --> "-".
620pp_sign('+') --> "+".
621
622pp_line(pp(Line)) -->
623 "#", string(Codes), eol, !,
624 { string_codes(Line, [0'#|Codes]) }.
625
626eol --> "\n", !.
627eol --> eos