Fix errors in the lexer module

This commit is contained in:
2025-08-16 15:28:07 +02:00
parent f880e3d2d7
commit 569139d44a
5 changed files with 189 additions and 276 deletions

View File

@@ -3,87 +3,9 @@
obtain one at https://mozilla.org/MPL/2.0/. *)
program;
import cstdio, cctype, common, command_line_interface, Lexer;
const
SEEK_SET* := 0;
SEEK_CUR* := 1;
SEEK_END* := 2;
STDIN := 0;
STDOUT := 1;
STDERR := 2;
import cstdio, cctype, common, command_line_interface, lexer;
type
TokenKind* = (
unknown,
identifier,
_if,
_then,
_else,
_elsif,
_while,
_do,
_proc,
_begin,
_end,
_extern,
_const,
_var,
_case,
_of,
_type,
_record,
_union,
pipe,
to,
boolean,
null,
and,
_or,
not,
_return,
_cast,
shift_left,
shift_right,
left_paren,
right_paren,
left_square,
right_square,
greater_equal,
less_equal,
greater_than,
less_than,
not_equal,
equal,
semicolon,
dot,
comma,
plus,
minus,
multiplication,
division,
remainder,
assignment,
colon,
hat,
at,
comment,
integer,
word,
character,
string,
_defer,
exclamation,
arrow,
trait,
_program,
_module,
_import
);
Location* = record
first: TextLocation;
last: TextLocation
end;
SourceFile* = record
buffer: [1024]Char;
handle: ^FILE;
@@ -104,14 +26,13 @@ type
head: proc(Pointer) -> Char
end;
Token* = record
kind: TokenKind;
kind: LexerKind;
value: union
int_value: Int;
string: String;
boolean_value: Bool;
char_value: Char
end;
location: Location
end
end;
Tokenizer* = record
length: Word;
@@ -421,63 +342,63 @@ var
current_token: Token;
begin
if token_content = "if" then
current_token.kind := TokenKind._if
current_token.kind := LexerKind._if
elsif token_content = "then" then
current_token.kind := TokenKind._then
current_token.kind := LexerKind._then
elsif token_content = "else" then
current_token.kind := TokenKind._else
current_token.kind := LexerKind._else
elsif token_content = "elsif" then
current_token.kind := TokenKind._elsif
current_token.kind := LexerKind._elsif
elsif token_content = "while" then
current_token.kind := TokenKind._while
current_token.kind := LexerKind._while
elsif token_content = "do" then
current_token.kind := TokenKind._do
current_token.kind := LexerKind._do
elsif token_content = "proc" then
current_token.kind := TokenKind._proc
current_token.kind := LexerKind._proc
elsif token_content = "begin" then
current_token.kind := TokenKind._begin
current_token.kind := LexerKind._begin
elsif token_content = "end" then
current_token.kind := TokenKind._end
current_token.kind := LexerKind._end
elsif token_content = "extern" then
current_token.kind := TokenKind._extern
current_token.kind := LexerKind._extern
elsif token_content = "const" then
current_token.kind := TokenKind._const
current_token.kind := LexerKind._const
elsif token_content = "var" then
current_token.kind := TokenKind._var
current_token.kind := LexerKind._var
elsif token_content = "case" then
current_token.kind := TokenKind._case
current_token.kind := LexerKind._case
elsif token_content = "of" then
current_token.kind := TokenKind._of
current_token.kind := LexerKind._of
elsif token_content = "type" then
current_token.kind := TokenKind._type
current_token.kind := LexerKind._type
elsif token_content = "record" then
current_token.kind := TokenKind._record
current_token.kind := LexerKind._record
elsif token_content = "union" then
current_token.kind := TokenKind._union
current_token.kind := LexerKind._union
elsif token_content = "true" then
current_token.kind := TokenKind.boolean;
current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := true
elsif token_content = "false" then
current_token.kind := TokenKind.boolean;
current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := false
elsif token_content = "nil" then
current_token.kind := TokenKind.null
current_token.kind := LexerKind.null
elsif token_content = "or" then
current_token.kind := TokenKind._or
current_token.kind := LexerKind._or
elsif token_content = "return" then
current_token.kind := TokenKind._return
current_token.kind := LexerKind._return
elsif token_content = "cast" then
current_token.kind := TokenKind._cast
current_token.kind := LexerKind._cast
elsif token_content = "defer" then
current_token.kind := TokenKind._defer
current_token.kind := LexerKind._defer
elsif token_content = "program" then
current_token.kind := TokenKind._program
current_token.kind := LexerKind._program
elsif token_content = "module" then
current_token.kind := TokenKind._module
current_token.kind := LexerKind._module
elsif token_content = "import" then
current_token.kind := TokenKind._import
current_token.kind := LexerKind._import
else
current_token.kind := TokenKind.identifier;
current_token.kind := LexerKind.identifier;
current_token.value.string := string_dup(token_content)
end;
@@ -500,7 +421,7 @@ var
current_token: Token;
first_char: Char;
begin
current_token.kind := TokenKind.unknown;
current_token.kind := LexerKind.unknown;
first_char := source_code_head(source_code);
@@ -511,158 +432,158 @@ begin
source_code_advance(@source_code);
lexer_identifier(@source_code, token_buffer);
current_token.kind := TokenKind.trait;
current_token.kind := LexerKind.trait;
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
elsif isdigit(cast(first_char: Int)) <> 0 then
lexer_number(@source_code, @current_token.value.int_value);
if source_code_expect(@source_code, 'u') then
current_token.kind := TokenKind.word;
current_token.kind := LexerKind.word;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.integer
current_token.kind := LexerKind.integer
end
elsif first_char = '(' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := TokenKind.left_paren
current_token.kind := LexerKind.left_paren
elsif source_code_head(source_code) = '*' then
source_code_advance(@source_code);
if lexer_comment(@source_code, token_buffer) then
current_token.value.string := string_dup(string_buffer_clear(token_buffer));
current_token.kind := TokenKind.comment
current_token.kind := LexerKind.comment
else
current_token.kind := TokenKind.unknown
current_token.kind := LexerKind.unknown
end
else
current_token.kind := TokenKind.left_paren
current_token.kind := LexerKind.left_paren
end
elsif first_char = ')' then
current_token.kind := TokenKind.right_paren;
current_token.kind := LexerKind.right_paren;
source_code_advance(@source_code)
elsif first_char = '\'' then
source_code_advance(@source_code);
if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
current_token.kind := TokenKind.character;
current_token.kind := LexerKind.character;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.unknown
current_token.kind := LexerKind.unknown
end
elsif first_char = '"' then
source_code_advance(@source_code);
if lexer_string(@source_code, token_buffer) then
current_token.kind := TokenKind.string;
current_token.kind := LexerKind.string;
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
else
current_token.kind := TokenKind.unknown
current_token.kind := LexerKind.unknown
end
elsif first_char = '[' then
current_token.kind := TokenKind.left_square;
current_token.kind := LexerKind.left_square;
source_code_advance(@source_code)
elsif first_char = ']' then
current_token.kind := TokenKind.right_square;
current_token.kind := LexerKind.right_square;
source_code_advance(@source_code)
elsif first_char = '>' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := TokenKind.greater_than
current_token.kind := LexerKind.greater_than
elsif source_code_head(source_code) = '=' then
current_token.kind := TokenKind.greater_equal;
current_token.kind := LexerKind.greater_equal;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token.kind := TokenKind.shift_right;
current_token.kind := LexerKind.shift_right;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.greater_than
current_token.kind := LexerKind.greater_than
end
elsif first_char = '<' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := TokenKind.less_than
current_token.kind := LexerKind.less_than
elsif source_code_head(source_code) = '=' then
current_token.kind := TokenKind.less_equal;
current_token.kind := LexerKind.less_equal;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '<' then
current_token.kind := TokenKind.shift_left;
current_token.kind := LexerKind.shift_left;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token.kind := TokenKind.not_equal;
current_token.kind := LexerKind.not_equal;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.less_than
current_token.kind := LexerKind.less_than
end
elsif first_char = '=' then
current_token.kind := TokenKind.equal;
current_token.kind := LexerKind.equal;
source_code_advance(@source_code)
elsif first_char = ';' then
current_token.kind := TokenKind.semicolon;
current_token.kind := LexerKind.semicolon;
source_code_advance(@source_code)
elsif first_char = '.' then
current_token.kind := TokenKind.dot;
current_token.kind := LexerKind.dot;
source_code_advance(@source_code)
elsif first_char = ',' then
current_token.kind := TokenKind.comma;
current_token.kind := LexerKind.comma;
source_code_advance(@source_code)
elsif first_char = '+' then
current_token.kind := TokenKind.plus;
current_token.kind := LexerKind.plus;
source_code_advance(@source_code)
elsif first_char = '-' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := TokenKind.minus
current_token.kind := LexerKind.minus
elsif source_code_head(source_code) = '>' then
current_token.kind := TokenKind.arrow;
current_token.kind := LexerKind.arrow;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.minus
current_token.kind := LexerKind.minus
end
elsif first_char = '*' then
current_token.kind := TokenKind.multiplication;
current_token.kind := LexerKind.multiplication;
source_code_advance(@source_code)
elsif first_char = '/' then
current_token.kind := TokenKind.division;
current_token.kind := LexerKind.division;
source_code_advance(@source_code)
elsif first_char = '%' then
current_token.kind := TokenKind.remainder;
current_token.kind := LexerKind.remainder;
source_code_advance(@source_code)
elsif first_char = ':' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := TokenKind.colon
current_token.kind := LexerKind.colon
elsif source_code_head(source_code) = '=' then
current_token.kind := TokenKind.assignment;
current_token.kind := LexerKind.assignment;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.colon
current_token.kind := LexerKind.colon
end
elsif first_char = '^' then
current_token.kind := TokenKind.hat;
current_token.kind := LexerKind.hat;
source_code_advance(@source_code)
elsif first_char = '@' then
current_token.kind := TokenKind.at;
current_token.kind := LexerKind.at;
source_code_advance(@source_code)
elsif first_char = '!' then
current_token.kind := TokenKind.exclamation;
current_token.kind := LexerKind.exclamation;
source_code_advance(@source_code)
elsif first_char = '&' then
current_token.kind := TokenKind.and;
current_token.kind := LexerKind.and;
source_code_advance(@source_code)
elsif first_char = '~' then
current_token.kind := TokenKind.not;
current_token.kind := LexerKind.not;
source_code_advance(@source_code)
elsif first_char = '|' then
current_token.kind := TokenKind.pipe;
current_token.kind := LexerKind.pipe;
source_code_advance(@source_code)
else
current_token.kind := TokenKind.unknown;
current_token.kind := LexerKind.unknown;
source_code_advance(@source_code)
end;
@@ -684,7 +605,7 @@ begin
while ~source_code_empty(@source_code) do
current_token := lexer_next(source_code, @token_buffer);
if current_token.kind <> TokenKind.unknown then
if current_token.kind <> LexerKind.unknown then
lexer_add_token(@lexer, current_token);
lexer_spaces(@source_code)
else
@@ -711,142 +632,142 @@ begin
current_token := tokens + i;
case current_token^.kind of
TokenKind._if:
LexerKind._if:
write_s("IF")
| TokenKind._then:
| LexerKind._then:
write_s("THEN")
| TokenKind._else:
| LexerKind._else:
write_s("ELSE")
| TokenKind._elsif:
| LexerKind._elsif:
write_s("ELSIF")
| TokenKind._while:
| LexerKind._while:
write_s("WHILE")
| TokenKind._do:
| LexerKind._do:
write_s("DO")
| TokenKind._proc:
| LexerKind._proc:
write_s("PROC")
| TokenKind._begin:
| LexerKind._begin:
write_s("BEGIN")
| TokenKind._end:
| LexerKind._end:
write_s("END")
| TokenKind._extern:
| LexerKind._extern:
write_s("EXTERN")
| TokenKind._const:
| LexerKind._const:
write_s("CONST")
| TokenKind._var:
| LexerKind._var:
write_s("VAR")
| TokenKind._case:
| LexerKind._case:
write_s("CASE")
| TokenKind._of:
| LexerKind._of:
write_s("OF")
| TokenKind._type:
| LexerKind._type:
write_s("TYPE")
| TokenKind._record:
| LexerKind._record:
write_s("RECORD")
| TokenKind._union:
| LexerKind._union:
write_s("UNION")
| TokenKind.pipe:
| LexerKind.pipe:
write_s("|")
| TokenKind.to:
| LexerKind.to:
write_s("TO")
| TokenKind.boolean:
| LexerKind.boolean:
write_s("BOOLEAN<");
write_b(current_token^.value.boolean_value);
write_c('>')
| TokenKind.null:
| LexerKind.null:
write_s("NIL")
| TokenKind.and:
| LexerKind.and:
write_s("&")
| TokenKind._or:
| LexerKind._or:
write_s("OR")
| TokenKind.not:
| LexerKind.not:
write_s("~")
| TokenKind._return:
| LexerKind._return:
write_s("RETURN")
| TokenKind._cast:
| LexerKind._cast:
write_s("CAST")
| TokenKind.shift_left:
| LexerKind.shift_left:
write_s("<<")
| TokenKind.shift_right:
| LexerKind.shift_right:
write_s(">>")
| TokenKind.identifier:
| LexerKind.identifier:
write_c('<');
write_s(current_token^.value.string);
write_c('>')
| TokenKind.trait:
| LexerKind.trait:
write_c('#');
write_s(current_token^.value.string)
| TokenKind.left_paren:
| LexerKind.left_paren:
write_s("(")
| TokenKind.right_paren:
| LexerKind.right_paren:
write_s(")")
| TokenKind.left_square:
| LexerKind.left_square:
write_s("[")
| TokenKind.right_square:
| LexerKind.right_square:
write_s("]")
| TokenKind.greater_equal:
| LexerKind.greater_equal:
write_s(">=")
| TokenKind.less_equal:
| LexerKind.less_equal:
write_s("<=")
| TokenKind.greater_than:
| LexerKind.greater_than:
write_s(">")
| TokenKind.less_than:
| LexerKind.less_than:
write_s("<")
| TokenKind.equal:
| LexerKind.equal:
write_s("=")
| TokenKind.not_equal:
| LexerKind.not_equal:
write_s("<>")
| TokenKind.semicolon:
| LexerKind.semicolon:
write_c(';')
| TokenKind.dot:
| LexerKind.dot:
write_c('.')
| TokenKind.comma:
| LexerKind.comma:
write_c(',')
| TokenKind.plus:
| LexerKind.plus:
write_c('+')
| TokenKind.minus:
| LexerKind.minus:
write_c('-')
| TokenKind.multiplication:
| LexerKind.multiplication:
write_c('*')
| TokenKind.division:
| LexerKind.division:
write_c('/')
| TokenKind.remainder:
| LexerKind.remainder:
write_c('%')
| TokenKind.assignment:
| LexerKind.assignment:
write_s(":=")
| TokenKind.colon:
| LexerKind.colon:
write_c(':')
| TokenKind.hat:
| LexerKind.hat:
write_c('^')
| TokenKind.at:
| LexerKind.at:
write_c('@')
| TokenKind.comment:
| LexerKind.comment:
write_s("(* COMMENT *)")
| TokenKind.integer:
| LexerKind.integer:
write_c('<');
write_i(current_token^.value.int_value);
write_c('>')
| TokenKind.word:
| LexerKind.word:
write_c('<');
write_i(current_token^.value.int_value);
write_s("u>")
| TokenKind.character:
| LexerKind.character:
write_c('<');
write_i(cast(current_token^.value.char_value: Int));
write_s("c>")
| TokenKind.string:
| LexerKind.string:
write_s("\"...\"")
| TokenKind._defer:
| LexerKind._defer:
write_s("DEFER")
| TokenKind.exclamation:
| LexerKind.exclamation:
write_c('!')
| TokenKind.arrow:
| LexerKind.arrow:
write_s("->")
| TokenKind._program:
| LexerKind._program:
write_s("PROGRAM")
| TokenKind._module:
| LexerKind._module:
write_s("MODULE")
| TokenKind._import:
| LexerKind._import:
write_s("IMPORT")
else
write_s("UNKNOWN<");
@@ -922,6 +843,5 @@ begin
return return_code
end;
begin
exit(process(count, parameters))
return process(count, parameters)
end.