|
|
|
@@ -6,12 +6,12 @@ module;
|
|
|
|
|
import cstdio, cstring, cctype, cstdlib, common;
|
|
|
|
|
|
|
|
|
|
const
|
|
|
|
|
CHUNK_SIZE := 85536;
|
|
|
|
|
CHUNK_SIZE := 85536u;
|
|
|
|
|
|
|
|
|
|
type
|
|
|
|
|
(*
|
|
|
|
|
* Classification table assigns each possible character to a group (class). All
|
|
|
|
|
* characters of the same group a handled equivalently.
|
|
|
|
|
* characters of the same group are handled equivalently.
|
|
|
|
|
*
|
|
|
|
|
* Classification:
|
|
|
|
|
*)
|
|
|
|
@@ -119,7 +119,7 @@ type
|
|
|
|
|
range,
|
|
|
|
|
left_paren,
|
|
|
|
|
right_paren,
|
|
|
|
|
lefts_quare,
|
|
|
|
|
left_square,
|
|
|
|
|
right_square,
|
|
|
|
|
greater_equal,
|
|
|
|
|
less_equal,
|
|
|
|
@@ -409,16 +409,16 @@ begin
|
|
|
|
|
if lexer^.start.iterator^ = '"' then
|
|
|
|
|
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
|
|
|
|
|
|
|
|
|
|
token^.stringKind := String(malloc(text_length), text_length);
|
|
|
|
|
memcpy(@token^.stringKind.ptr, lexer^.start.iterator, text_length);
|
|
|
|
|
token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
|
|
|
|
|
memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
|
|
|
|
|
|
|
|
|
|
token^.kind := LexerKind.character
|
|
|
|
|
end;
|
|
|
|
|
if lexer^.start.iterator^ = "'" then
|
|
|
|
|
if lexer^.start.iterator^ = '\'' then
|
|
|
|
|
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
|
|
|
|
|
|
|
|
|
|
token^.stringKind := String(malloc(text_length), text_length);
|
|
|
|
|
memcpy(@token^.stringKind.ptr, lexer^.start.iterator, text_length);
|
|
|
|
|
token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
|
|
|
|
|
memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
|
|
|
|
|
|
|
|
|
|
token^.kind := LexerKind.string
|
|
|
|
|
end;
|
|
|
|
@@ -430,8 +430,8 @@ proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
|
|
|
|
|
begin
|
|
|
|
|
token^.kind := LexerKind.identifier;
|
|
|
|
|
|
|
|
|
|
token^.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
|
|
|
|
|
memcpy(@token^.identifierKind[2], lexer^.start.iterator, cast(token^.identifierKind[1]: Word));
|
|
|
|
|
token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
|
|
|
|
|
memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word));
|
|
|
|
|
|
|
|
|
|
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
|
|
|
|
|
token^.kind := LexerKind._program
|
|
|
|
@@ -519,11 +519,11 @@ begin
|
|
|
|
|
end;
|
|
|
|
|
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
|
|
|
|
|
token^.kind := LexerKind.boolean;
|
|
|
|
|
token^.booleanKind := true
|
|
|
|
|
token^.value.booleanKind := true
|
|
|
|
|
end;
|
|
|
|
|
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
|
|
|
|
|
token^.kind := LexerKind.boolean;
|
|
|
|
|
token^.booleanKind := false
|
|
|
|
|
token^.value.booleanKind := false
|
|
|
|
|
end
|
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
@@ -576,25 +576,24 @@ begin
|
|
|
|
|
if lexer^.current.iterator^ = '|' then
|
|
|
|
|
token^.kind := LexerKind.pipe
|
|
|
|
|
end;
|
|
|
|
|
increment(@lexer^.current.iterator)
|
|
|
|
|
increment(@lexer^.current)
|
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
(* Handle an integer literal. *)
|
|
|
|
|
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
|
|
|
|
|
var
|
|
|
|
|
buffer: String;
|
|
|
|
|
integer_length: Int;
|
|
|
|
|
integer_length: Word;
|
|
|
|
|
found: Bool;
|
|
|
|
|
begin
|
|
|
|
|
token^.kind := LexerKind.integer;
|
|
|
|
|
|
|
|
|
|
integer_length := lexer^.current.iterator - lexer^.start.iterator;
|
|
|
|
|
memset(@token^.identifierKind, 0, #size(Identifier));
|
|
|
|
|
memcpy(@token^.identifierKind[1], lexer^.start.iterator, integer_length);
|
|
|
|
|
integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word);
|
|
|
|
|
memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier));
|
|
|
|
|
memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length);
|
|
|
|
|
|
|
|
|
|
buffer := InitStringCharStar(@token^.identifierKind[1]);
|
|
|
|
|
token^.integerKind := StringToInteger(buffer, 10, found);
|
|
|
|
|
buffer := KillString(buffer)
|
|
|
|
|
token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0';
|
|
|
|
|
token^.value.integerKind := atoi(@token^.value.identifierKind[2])
|
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int;
|
|
|
|
@@ -893,10 +892,10 @@ end;
|
|
|
|
|
proc lexer_make*(lexer: ^Lexer, input: ^FILE);
|
|
|
|
|
begin
|
|
|
|
|
lexer^.input := input;
|
|
|
|
|
lexer^.length := 0;
|
|
|
|
|
lexer^.length := 0u;
|
|
|
|
|
|
|
|
|
|
lexer^.buffer := malloc(CHUNK_SIZE);
|
|
|
|
|
memset(lexer^.buffer, 0, CHUNK_SIZE);
|
|
|
|
|
lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char);
|
|
|
|
|
memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE);
|
|
|
|
|
lexer^.size := CHUNK_SIZE
|
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
@@ -937,10 +936,10 @@ proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
|
|
|
|
|
var
|
|
|
|
|
result: LexerToken;
|
|
|
|
|
begin
|
|
|
|
|
if lexer^.length = 0 then
|
|
|
|
|
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer);
|
|
|
|
|
lexer^.current.location.column := 1;
|
|
|
|
|
lexer^.current.location.line := 1;
|
|
|
|
|
if lexer^.length = 0u then
|
|
|
|
|
lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input);
|
|
|
|
|
lexer^.current.location.column := 1u;
|
|
|
|
|
lexer^.current.location.line := 1u;
|
|
|
|
|
lexer^.current.iterator := lexer^.buffer
|
|
|
|
|
end;
|
|
|
|
|
lexer^.start := lexer^.current;
|
|
|
|
@@ -951,7 +950,7 @@ end;
|
|
|
|
|
|
|
|
|
|
proc lexer_destroy*(lexer: ^Lexer);
|
|
|
|
|
begin
|
|
|
|
|
free(lexer^.buffer)
|
|
|
|
|
free(cast(lexer^.buffer: Pointer))
|
|
|
|
|
end;
|
|
|
|
|
|
|
|
|
|
proc lexer_initialize();
|