Fix errors in the lexer module

This commit is contained in:
2025-08-16 15:28:07 +02:00
parent f880e3d2d7
commit 569139d44a
5 changed files with 189 additions and 276 deletions

View File

@@ -239,8 +239,10 @@ namespace elna::gcc
{ {
visit(static_cast<boot::unit *>(program)); visit(static_cast<boot::unit *>(program));
tree declaration_type = build_function_type_list(integer_type_node, elna_int_type_node, tree declaration_type = build_function_type_list(elna_int_type_node,
build_global_pointer_type(build_global_pointer_type(elna_char_type_node)), NULL_TREE); elna_int_type_node,
build_global_pointer_type(build_global_pointer_type(elna_char_type_node)),
NULL_TREE);
tree fndecl = build_fn_decl("main", declaration_type); tree fndecl = build_fn_decl("main", declaration_type);
tree resdecl = build_decl(UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, integer_type_node); tree resdecl = build_decl(UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, integer_type_node);

View File

@@ -8,6 +8,8 @@ proc free(ptr: Pointer); extern;
proc calloc(nmemb: Word, size: Word) -> Pointer; extern; proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern; proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
proc atoi(str: ^Char) -> Int; extern;
proc exit(code: Int) -> !; extern; proc exit(code: Int) -> !; extern;
end. end.

View File

@@ -3,7 +3,7 @@
obtain one at https://mozilla.org/MPL/2.0/. *) obtain one at https://mozilla.org/MPL/2.0/. *)
module; module;
proc memset(ptr: Pointer, c: Int, n: Int) -> ^Char; extern; proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern;
proc memcpy(dst: Pointer, src: Pointer, n: Word); extern; proc memcpy(dst: Pointer, src: Pointer, n: Word); extern;
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern; proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;

View File

@@ -6,12 +6,12 @@ module;
import cstdio, cstring, cctype, cstdlib, common; import cstdio, cstring, cctype, cstdlib, common;
const const
CHUNK_SIZE := 85536; CHUNK_SIZE := 85536u;
type type
(* (*
* Classification table assigns each possible character to a group (class). All * Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently. * characters of the same group are handled equivalently.
* *
* Classification: * Classification:
*) *)
@@ -88,7 +88,7 @@ type
current: BufferPosition current: BufferPosition
end; end;
LexerKind* = ( LexerKind* = (
eof, unknown,
identifier, identifier,
_if, _if,
_then, _then,
@@ -99,7 +99,7 @@ type
_proc, _proc,
_begin, _begin,
_end, _end,
_xor, _extern,
_const, _const,
_var, _var,
_case, _case,
@@ -113,13 +113,15 @@ type
null, null,
and, and,
_or, _or,
tilde, _xor,
not,
_return, _return,
_defer, _cast,
range, shift_left,
shift_right,
left_paren, left_paren,
right_paren, right_paren,
lefts_quare, left_square,
right_square, right_square,
greater_equal, greater_equal,
less_equal, less_equal,
@@ -132,7 +134,7 @@ type
comma, comma,
plus, plus,
minus, minus,
asterisk, multiplication,
division, division,
remainder, remainder,
assignment, assignment,
@@ -144,10 +146,10 @@ type
word, word,
character, character,
string, string,
from, _defer,
pointer, exclamation,
array,
arrow, arrow,
trait,
_program, _program,
_module, _module,
_import _import
@@ -322,7 +324,7 @@ end;
(* Reached the end of file. *) (* Reached the end of file. *)
proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
begin begin
token^.kind := LexerKind.eof token^.kind := LexerKind.unknown
end; end;
proc increment(position: ^BufferPosition); proc increment(position: ^BufferPosition);
@@ -374,9 +376,6 @@ begin
if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
token^.kind := LexerKind.greater_equal token^.kind := LexerKind.greater_equal
end; end;
if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then
token^.kind := LexerKind.range
end;
if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
token^.kind := LexerKind.assignment token^.kind := LexerKind.assignment
end; end;
@@ -409,16 +408,16 @@ begin
if lexer^.start.iterator^ = '"' then if lexer^.start.iterator^ = '"' then
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
token^.stringKind := String(malloc(text_length), text_length); token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
memcpy(@token^.stringKind.ptr, lexer^.start.iterator, text_length); memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
token^.kind := LexerKind.character token^.kind := LexerKind.character
end; end;
if lexer^.start.iterator^ = "'" then if lexer^.start.iterator^ = '\'' then
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
token^.stringKind := String(malloc(text_length), text_length); token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
memcpy(@token^.stringKind.ptr, lexer^.start.iterator, text_length); memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
token^.kind := LexerKind.string token^.kind := LexerKind.string
end; end;
@@ -430,8 +429,8 @@ proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
begin begin
token^.kind := LexerKind.identifier; token^.kind := LexerKind.identifier;
token^.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char); token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
memcpy(@token^.identifierKind[2], lexer^.start.iterator, cast(token^.identifierKind[1]: Word)); memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word));
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._program token^.kind := LexerKind._program
@@ -502,28 +501,19 @@ begin
if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._of token^.kind := LexerKind._of
end; end;
if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.from
end;
if compare_keyword("module", lexer^.start, lexer^.current.iterator) then if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._module token^.kind := LexerKind._module
end; end;
if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._xor token^.kind := LexerKind._xor
end; end;
if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.pointer
end;
if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.array
end;
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.boolean; token^.kind := LexerKind.boolean;
token^.booleanKind := true token^.value.booleanKind := true
end; end;
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.boolean; token^.kind := LexerKind.boolean;
token^.booleanKind := false token^.value.booleanKind := false
end end
end; end;
@@ -541,7 +531,7 @@ begin
token^.kind := LexerKind.comma token^.kind := LexerKind.comma
end; end;
if lexer^.current.iterator^ = '~' then if lexer^.current.iterator^ = '~' then
token^.kind := LexerKind.tilde token^.kind := LexerKind.not
end; end;
if lexer^.current.iterator^ = ')' then if lexer^.current.iterator^ = ')' then
token^.kind := LexerKind.right_paren token^.kind := LexerKind.right_paren
@@ -562,7 +552,7 @@ begin
token^.kind := LexerKind.plus token^.kind := LexerKind.plus
end; end;
if lexer^.current.iterator^ = '*' then if lexer^.current.iterator^ = '*' then
token^.kind := LexerKind.asterisk token^.kind := LexerKind.multiplication
end; end;
if lexer^.current.iterator^ = '/' then if lexer^.current.iterator^ = '/' then
token^.kind := LexerKind.division token^.kind := LexerKind.division
@@ -576,25 +566,24 @@ begin
if lexer^.current.iterator^ = '|' then if lexer^.current.iterator^ = '|' then
token^.kind := LexerKind.pipe token^.kind := LexerKind.pipe
end; end;
increment(@lexer^.current.iterator) increment(@lexer^.current)
end; end;
(* Handle an integer literal. *) (* Handle an integer literal. *)
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
var var
buffer: String; buffer: String;
integer_length: Int; integer_length: Word;
found: Bool; found: Bool;
begin begin
token^.kind := LexerKind.integer; token^.kind := LexerKind.integer;
integer_length := lexer^.current.iterator - lexer^.start.iterator; integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word);
memset(@token^.identifierKind, 0, #size(Identifier)); memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier));
memcpy(@token^.identifierKind[1], lexer^.start.iterator, integer_length); memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length);
buffer := InitStringCharStar(@token^.identifierKind[1]); token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0';
token^.integerKind := StringToInteger(buffer, 10, found); token^.value.integerKind := atoi(@token^.value.identifierKind[2])
buffer := KillString(buffer)
end; end;
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int;
@@ -893,10 +882,10 @@ end;
proc lexer_make*(lexer: ^Lexer, input: ^FILE); proc lexer_make*(lexer: ^Lexer, input: ^FILE);
begin begin
lexer^.input := input; lexer^.input := input;
lexer^.length := 0; lexer^.length := 0u;
lexer^.buffer := malloc(CHUNK_SIZE); lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char);
memset(lexer^.buffer, 0, CHUNK_SIZE); memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE);
lexer^.size := CHUNK_SIZE lexer^.size := CHUNK_SIZE
end; end;
@@ -937,10 +926,10 @@ proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
var var
result: LexerToken; result: LexerToken;
begin begin
if lexer^.length = 0 then if lexer^.length = 0u then
lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer); lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input);
lexer^.current.location.column := 1; lexer^.current.location.column := 1u;
lexer^.current.location.line := 1; lexer^.current.location.line := 1u;
lexer^.current.iterator := lexer^.buffer lexer^.current.iterator := lexer^.buffer
end; end;
lexer^.start := lexer^.current; lexer^.start := lexer^.current;
@@ -951,7 +940,7 @@ end;
proc lexer_destroy*(lexer: ^Lexer); proc lexer_destroy*(lexer: ^Lexer);
begin begin
free(lexer^.buffer) free(cast(lexer^.buffer: Pointer))
end; end;
proc lexer_initialize(); proc lexer_initialize();

View File

@@ -3,87 +3,9 @@
obtain one at https://mozilla.org/MPL/2.0/. *) obtain one at https://mozilla.org/MPL/2.0/. *)
program; program;
import cstdio, cctype, common, command_line_interface, Lexer; import cstdio, cctype, common, command_line_interface, lexer;
const
SEEK_SET* := 0;
SEEK_CUR* := 1;
SEEK_END* := 2;
STDIN := 0;
STDOUT := 1;
STDERR := 2;
type type
TokenKind* = (
unknown,
identifier,
_if,
_then,
_else,
_elsif,
_while,
_do,
_proc,
_begin,
_end,
_extern,
_const,
_var,
_case,
_of,
_type,
_record,
_union,
pipe,
to,
boolean,
null,
and,
_or,
not,
_return,
_cast,
shift_left,
shift_right,
left_paren,
right_paren,
left_square,
right_square,
greater_equal,
less_equal,
greater_than,
less_than,
not_equal,
equal,
semicolon,
dot,
comma,
plus,
minus,
multiplication,
division,
remainder,
assignment,
colon,
hat,
at,
comment,
integer,
word,
character,
string,
_defer,
exclamation,
arrow,
trait,
_program,
_module,
_import
);
Location* = record
first: TextLocation;
last: TextLocation
end;
SourceFile* = record SourceFile* = record
buffer: [1024]Char; buffer: [1024]Char;
handle: ^FILE; handle: ^FILE;
@@ -104,14 +26,13 @@ type
head: proc(Pointer) -> Char head: proc(Pointer) -> Char
end; end;
Token* = record Token* = record
kind: TokenKind; kind: LexerKind;
value: union value: union
int_value: Int; int_value: Int;
string: String; string: String;
boolean_value: Bool; boolean_value: Bool;
char_value: Char char_value: Char
end; end
location: Location
end; end;
Tokenizer* = record Tokenizer* = record
length: Word; length: Word;
@@ -421,63 +342,63 @@ var
current_token: Token; current_token: Token;
begin begin
if token_content = "if" then if token_content = "if" then
current_token.kind := TokenKind._if current_token.kind := LexerKind._if
elsif token_content = "then" then elsif token_content = "then" then
current_token.kind := TokenKind._then current_token.kind := LexerKind._then
elsif token_content = "else" then elsif token_content = "else" then
current_token.kind := TokenKind._else current_token.kind := LexerKind._else
elsif token_content = "elsif" then elsif token_content = "elsif" then
current_token.kind := TokenKind._elsif current_token.kind := LexerKind._elsif
elsif token_content = "while" then elsif token_content = "while" then
current_token.kind := TokenKind._while current_token.kind := LexerKind._while
elsif token_content = "do" then elsif token_content = "do" then
current_token.kind := TokenKind._do current_token.kind := LexerKind._do
elsif token_content = "proc" then elsif token_content = "proc" then
current_token.kind := TokenKind._proc current_token.kind := LexerKind._proc
elsif token_content = "begin" then elsif token_content = "begin" then
current_token.kind := TokenKind._begin current_token.kind := LexerKind._begin
elsif token_content = "end" then elsif token_content = "end" then
current_token.kind := TokenKind._end current_token.kind := LexerKind._end
elsif token_content = "extern" then elsif token_content = "extern" then
current_token.kind := TokenKind._extern current_token.kind := LexerKind._extern
elsif token_content = "const" then elsif token_content = "const" then
current_token.kind := TokenKind._const current_token.kind := LexerKind._const
elsif token_content = "var" then elsif token_content = "var" then
current_token.kind := TokenKind._var current_token.kind := LexerKind._var
elsif token_content = "case" then elsif token_content = "case" then
current_token.kind := TokenKind._case current_token.kind := LexerKind._case
elsif token_content = "of" then elsif token_content = "of" then
current_token.kind := TokenKind._of current_token.kind := LexerKind._of
elsif token_content = "type" then elsif token_content = "type" then
current_token.kind := TokenKind._type current_token.kind := LexerKind._type
elsif token_content = "record" then elsif token_content = "record" then
current_token.kind := TokenKind._record current_token.kind := LexerKind._record
elsif token_content = "union" then elsif token_content = "union" then
current_token.kind := TokenKind._union current_token.kind := LexerKind._union
elsif token_content = "true" then elsif token_content = "true" then
current_token.kind := TokenKind.boolean; current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := true current_token.value.boolean_value := true
elsif token_content = "false" then elsif token_content = "false" then
current_token.kind := TokenKind.boolean; current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := false current_token.value.boolean_value := false
elsif token_content = "nil" then elsif token_content = "nil" then
current_token.kind := TokenKind.null current_token.kind := LexerKind.null
elsif token_content = "or" then elsif token_content = "or" then
current_token.kind := TokenKind._or current_token.kind := LexerKind._or
elsif token_content = "return" then elsif token_content = "return" then
current_token.kind := TokenKind._return current_token.kind := LexerKind._return
elsif token_content = "cast" then elsif token_content = "cast" then
current_token.kind := TokenKind._cast current_token.kind := LexerKind._cast
elsif token_content = "defer" then elsif token_content = "defer" then
current_token.kind := TokenKind._defer current_token.kind := LexerKind._defer
elsif token_content = "program" then elsif token_content = "program" then
current_token.kind := TokenKind._program current_token.kind := LexerKind._program
elsif token_content = "module" then elsif token_content = "module" then
current_token.kind := TokenKind._module current_token.kind := LexerKind._module
elsif token_content = "import" then elsif token_content = "import" then
current_token.kind := TokenKind._import current_token.kind := LexerKind._import
else else
current_token.kind := TokenKind.identifier; current_token.kind := LexerKind.identifier;
current_token.value.string := string_dup(token_content) current_token.value.string := string_dup(token_content)
end; end;
@@ -500,7 +421,7 @@ var
current_token: Token; current_token: Token;
first_char: Char; first_char: Char;
begin begin
current_token.kind := TokenKind.unknown; current_token.kind := LexerKind.unknown;
first_char := source_code_head(source_code); first_char := source_code_head(source_code);
@@ -511,158 +432,158 @@ begin
source_code_advance(@source_code); source_code_advance(@source_code);
lexer_identifier(@source_code, token_buffer); lexer_identifier(@source_code, token_buffer);
current_token.kind := TokenKind.trait; current_token.kind := LexerKind.trait;
current_token.value.string := string_dup(string_buffer_clear(token_buffer)) current_token.value.string := string_dup(string_buffer_clear(token_buffer))
elsif isdigit(cast(first_char: Int)) <> 0 then elsif isdigit(cast(first_char: Int)) <> 0 then
lexer_number(@source_code, @current_token.value.int_value); lexer_number(@source_code, @current_token.value.int_value);
if source_code_expect(@source_code, 'u') then if source_code_expect(@source_code, 'u') then
current_token.kind := TokenKind.word; current_token.kind := LexerKind.word;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.integer current_token.kind := LexerKind.integer
end end
elsif first_char = '(' then elsif first_char = '(' then
source_code_advance(@source_code); source_code_advance(@source_code);
if source_code_empty(@source_code) then if source_code_empty(@source_code) then
current_token.kind := TokenKind.left_paren current_token.kind := LexerKind.left_paren
elsif source_code_head(source_code) = '*' then elsif source_code_head(source_code) = '*' then
source_code_advance(@source_code); source_code_advance(@source_code);
if lexer_comment(@source_code, token_buffer) then if lexer_comment(@source_code, token_buffer) then
current_token.value.string := string_dup(string_buffer_clear(token_buffer)); current_token.value.string := string_dup(string_buffer_clear(token_buffer));
current_token.kind := TokenKind.comment current_token.kind := LexerKind.comment
else else
current_token.kind := TokenKind.unknown current_token.kind := LexerKind.unknown
end end
else else
current_token.kind := TokenKind.left_paren current_token.kind := LexerKind.left_paren
end end
elsif first_char = ')' then elsif first_char = ')' then
current_token.kind := TokenKind.right_paren; current_token.kind := LexerKind.right_paren;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '\'' then elsif first_char = '\'' then
source_code_advance(@source_code); source_code_advance(@source_code);
if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
current_token.kind := TokenKind.character; current_token.kind := LexerKind.character;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.unknown current_token.kind := LexerKind.unknown
end end
elsif first_char = '"' then elsif first_char = '"' then
source_code_advance(@source_code); source_code_advance(@source_code);
if lexer_string(@source_code, token_buffer) then if lexer_string(@source_code, token_buffer) then
current_token.kind := TokenKind.string; current_token.kind := LexerKind.string;
current_token.value.string := string_dup(string_buffer_clear(token_buffer)) current_token.value.string := string_dup(string_buffer_clear(token_buffer))
else else
current_token.kind := TokenKind.unknown current_token.kind := LexerKind.unknown
end end
elsif first_char = '[' then elsif first_char = '[' then
current_token.kind := TokenKind.left_square; current_token.kind := LexerKind.left_square;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = ']' then elsif first_char = ']' then
current_token.kind := TokenKind.right_square; current_token.kind := LexerKind.right_square;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '>' then elsif first_char = '>' then
source_code_advance(@source_code); source_code_advance(@source_code);
if source_code_empty(@source_code) then if source_code_empty(@source_code) then
current_token.kind := TokenKind.greater_than current_token.kind := LexerKind.greater_than
elsif source_code_head(source_code) = '=' then elsif source_code_head(source_code) = '=' then
current_token.kind := TokenKind.greater_equal; current_token.kind := LexerKind.greater_equal;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then elsif source_code_head(source_code) = '>' then
current_token.kind := TokenKind.shift_right; current_token.kind := LexerKind.shift_right;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.greater_than current_token.kind := LexerKind.greater_than
end end
elsif first_char = '<' then elsif first_char = '<' then
source_code_advance(@source_code); source_code_advance(@source_code);
if source_code_empty(@source_code) then if source_code_empty(@source_code) then
current_token.kind := TokenKind.less_than current_token.kind := LexerKind.less_than
elsif source_code_head(source_code) = '=' then elsif source_code_head(source_code) = '=' then
current_token.kind := TokenKind.less_equal; current_token.kind := LexerKind.less_equal;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif source_code_head(source_code) = '<' then elsif source_code_head(source_code) = '<' then
current_token.kind := TokenKind.shift_left; current_token.kind := LexerKind.shift_left;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then elsif source_code_head(source_code) = '>' then
current_token.kind := TokenKind.not_equal; current_token.kind := LexerKind.not_equal;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.less_than current_token.kind := LexerKind.less_than
end end
elsif first_char = '=' then elsif first_char = '=' then
current_token.kind := TokenKind.equal; current_token.kind := LexerKind.equal;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = ';' then elsif first_char = ';' then
current_token.kind := TokenKind.semicolon; current_token.kind := LexerKind.semicolon;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '.' then elsif first_char = '.' then
current_token.kind := TokenKind.dot; current_token.kind := LexerKind.dot;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = ',' then elsif first_char = ',' then
current_token.kind := TokenKind.comma; current_token.kind := LexerKind.comma;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '+' then elsif first_char = '+' then
current_token.kind := TokenKind.plus; current_token.kind := LexerKind.plus;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '-' then elsif first_char = '-' then
source_code_advance(@source_code); source_code_advance(@source_code);
if source_code_empty(@source_code) then if source_code_empty(@source_code) then
current_token.kind := TokenKind.minus current_token.kind := LexerKind.minus
elsif source_code_head(source_code) = '>' then elsif source_code_head(source_code) = '>' then
current_token.kind := TokenKind.arrow; current_token.kind := LexerKind.arrow;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.minus current_token.kind := LexerKind.minus
end end
elsif first_char = '*' then elsif first_char = '*' then
current_token.kind := TokenKind.multiplication; current_token.kind := LexerKind.multiplication;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '/' then elsif first_char = '/' then
current_token.kind := TokenKind.division; current_token.kind := LexerKind.division;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '%' then elsif first_char = '%' then
current_token.kind := TokenKind.remainder; current_token.kind := LexerKind.remainder;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = ':' then elsif first_char = ':' then
source_code_advance(@source_code); source_code_advance(@source_code);
if source_code_empty(@source_code) then if source_code_empty(@source_code) then
current_token.kind := TokenKind.colon current_token.kind := LexerKind.colon
elsif source_code_head(source_code) = '=' then elsif source_code_head(source_code) = '=' then
current_token.kind := TokenKind.assignment; current_token.kind := LexerKind.assignment;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.colon current_token.kind := LexerKind.colon
end end
elsif first_char = '^' then elsif first_char = '^' then
current_token.kind := TokenKind.hat; current_token.kind := LexerKind.hat;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '@' then elsif first_char = '@' then
current_token.kind := TokenKind.at; current_token.kind := LexerKind.at;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '!' then elsif first_char = '!' then
current_token.kind := TokenKind.exclamation; current_token.kind := LexerKind.exclamation;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '&' then elsif first_char = '&' then
current_token.kind := TokenKind.and; current_token.kind := LexerKind.and;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '~' then elsif first_char = '~' then
current_token.kind := TokenKind.not; current_token.kind := LexerKind.not;
source_code_advance(@source_code) source_code_advance(@source_code)
elsif first_char = '|' then elsif first_char = '|' then
current_token.kind := TokenKind.pipe; current_token.kind := LexerKind.pipe;
source_code_advance(@source_code) source_code_advance(@source_code)
else else
current_token.kind := TokenKind.unknown; current_token.kind := LexerKind.unknown;
source_code_advance(@source_code) source_code_advance(@source_code)
end; end;
@@ -684,7 +605,7 @@ begin
while ~source_code_empty(@source_code) do while ~source_code_empty(@source_code) do
current_token := lexer_next(source_code, @token_buffer); current_token := lexer_next(source_code, @token_buffer);
if current_token.kind <> TokenKind.unknown then if current_token.kind <> LexerKind.unknown then
lexer_add_token(@lexer, current_token); lexer_add_token(@lexer, current_token);
lexer_spaces(@source_code) lexer_spaces(@source_code)
else else
@@ -711,142 +632,142 @@ begin
current_token := tokens + i; current_token := tokens + i;
case current_token^.kind of case current_token^.kind of
TokenKind._if: LexerKind._if:
write_s("IF") write_s("IF")
| TokenKind._then: | LexerKind._then:
write_s("THEN") write_s("THEN")
| TokenKind._else: | LexerKind._else:
write_s("ELSE") write_s("ELSE")
| TokenKind._elsif: | LexerKind._elsif:
write_s("ELSIF") write_s("ELSIF")
| TokenKind._while: | LexerKind._while:
write_s("WHILE") write_s("WHILE")
| TokenKind._do: | LexerKind._do:
write_s("DO") write_s("DO")
| TokenKind._proc: | LexerKind._proc:
write_s("PROC") write_s("PROC")
| TokenKind._begin: | LexerKind._begin:
write_s("BEGIN") write_s("BEGIN")
| TokenKind._end: | LexerKind._end:
write_s("END") write_s("END")
| TokenKind._extern: | LexerKind._extern:
write_s("EXTERN") write_s("EXTERN")
| TokenKind._const: | LexerKind._const:
write_s("CONST") write_s("CONST")
| TokenKind._var: | LexerKind._var:
write_s("VAR") write_s("VAR")
| TokenKind._case: | LexerKind._case:
write_s("CASE") write_s("CASE")
| TokenKind._of: | LexerKind._of:
write_s("OF") write_s("OF")
| TokenKind._type: | LexerKind._type:
write_s("TYPE") write_s("TYPE")
| TokenKind._record: | LexerKind._record:
write_s("RECORD") write_s("RECORD")
| TokenKind._union: | LexerKind._union:
write_s("UNION") write_s("UNION")
| TokenKind.pipe: | LexerKind.pipe:
write_s("|") write_s("|")
| TokenKind.to: | LexerKind.to:
write_s("TO") write_s("TO")
| TokenKind.boolean: | LexerKind.boolean:
write_s("BOOLEAN<"); write_s("BOOLEAN<");
write_b(current_token^.value.boolean_value); write_b(current_token^.value.boolean_value);
write_c('>') write_c('>')
| TokenKind.null: | LexerKind.null:
write_s("NIL") write_s("NIL")
| TokenKind.and: | LexerKind.and:
write_s("&") write_s("&")
| TokenKind._or: | LexerKind._or:
write_s("OR") write_s("OR")
| TokenKind.not: | LexerKind.not:
write_s("~") write_s("~")
| TokenKind._return: | LexerKind._return:
write_s("RETURN") write_s("RETURN")
| TokenKind._cast: | LexerKind._cast:
write_s("CAST") write_s("CAST")
| TokenKind.shift_left: | LexerKind.shift_left:
write_s("<<") write_s("<<")
| TokenKind.shift_right: | LexerKind.shift_right:
write_s(">>") write_s(">>")
| TokenKind.identifier: | LexerKind.identifier:
write_c('<'); write_c('<');
write_s(current_token^.value.string); write_s(current_token^.value.string);
write_c('>') write_c('>')
| TokenKind.trait: | LexerKind.trait:
write_c('#'); write_c('#');
write_s(current_token^.value.string) write_s(current_token^.value.string)
| TokenKind.left_paren: | LexerKind.left_paren:
write_s("(") write_s("(")
| TokenKind.right_paren: | LexerKind.right_paren:
write_s(")") write_s(")")
| TokenKind.left_square: | LexerKind.left_square:
write_s("[") write_s("[")
| TokenKind.right_square: | LexerKind.right_square:
write_s("]") write_s("]")
| TokenKind.greater_equal: | LexerKind.greater_equal:
write_s(">=") write_s(">=")
| TokenKind.less_equal: | LexerKind.less_equal:
write_s("<=") write_s("<=")
| TokenKind.greater_than: | LexerKind.greater_than:
write_s(">") write_s(">")
| TokenKind.less_than: | LexerKind.less_than:
write_s("<") write_s("<")
| TokenKind.equal: | LexerKind.equal:
write_s("=") write_s("=")
| TokenKind.not_equal: | LexerKind.not_equal:
write_s("<>") write_s("<>")
| TokenKind.semicolon: | LexerKind.semicolon:
write_c(';') write_c(';')
| TokenKind.dot: | LexerKind.dot:
write_c('.') write_c('.')
| TokenKind.comma: | LexerKind.comma:
write_c(',') write_c(',')
| TokenKind.plus: | LexerKind.plus:
write_c('+') write_c('+')
| TokenKind.minus: | LexerKind.minus:
write_c('-') write_c('-')
| TokenKind.multiplication: | LexerKind.multiplication:
write_c('*') write_c('*')
| TokenKind.division: | LexerKind.division:
write_c('/') write_c('/')
| TokenKind.remainder: | LexerKind.remainder:
write_c('%') write_c('%')
| TokenKind.assignment: | LexerKind.assignment:
write_s(":=") write_s(":=")
| TokenKind.colon: | LexerKind.colon:
write_c(':') write_c(':')
| TokenKind.hat: | LexerKind.hat:
write_c('^') write_c('^')
| TokenKind.at: | LexerKind.at:
write_c('@') write_c('@')
| TokenKind.comment: | LexerKind.comment:
write_s("(* COMMENT *)") write_s("(* COMMENT *)")
| TokenKind.integer: | LexerKind.integer:
write_c('<'); write_c('<');
write_i(current_token^.value.int_value); write_i(current_token^.value.int_value);
write_c('>') write_c('>')
| TokenKind.word: | LexerKind.word:
write_c('<'); write_c('<');
write_i(current_token^.value.int_value); write_i(current_token^.value.int_value);
write_s("u>") write_s("u>")
| TokenKind.character: | LexerKind.character:
write_c('<'); write_c('<');
write_i(cast(current_token^.value.char_value: Int)); write_i(cast(current_token^.value.char_value: Int));
write_s("c>") write_s("c>")
| TokenKind.string: | LexerKind.string:
write_s("\"...\"") write_s("\"...\"")
| TokenKind._defer: | LexerKind._defer:
write_s("DEFER") write_s("DEFER")
| TokenKind.exclamation: | LexerKind.exclamation:
write_c('!') write_c('!')
| TokenKind.arrow: | LexerKind.arrow:
write_s("->") write_s("->")
| TokenKind._program: | LexerKind._program:
write_s("PROGRAM") write_s("PROGRAM")
| TokenKind._module: | LexerKind._module:
write_s("MODULE") write_s("MODULE")
| TokenKind._import: | LexerKind._import:
write_s("IMPORT") write_s("IMPORT")
else else
write_s("UNKNOWN<"); write_s("UNKNOWN<");
@@ -922,6 +843,5 @@ begin
return return_code return return_code
end; end;
begin return process(count, parameters)
exit(process(count, parameters))
end. end.