Fix errors in the lexer module

2025-08-16 15:28:07 +02:00
parent f880e3d2d7
commit 569139d44a
5 changed files with 189 additions and 276 deletions
--- a/source/main.elna
+++ b/source/main.elna
@@ -3,87 +3,9 @@
  obtain one at https://mozilla.org/MPL/2.0/. *)
 program;

-import cstdio, cctype, common, command_line_interface, Lexer;
-
-const
-  SEEK_SET* := 0;
-  SEEK_CUR* := 1;
-  SEEK_END* := 2;
-  STDIN := 0;
-  STDOUT := 1;
-  STDERR := 2;
+import cstdio, cctype, common, command_line_interface, lexer;

 type
-  TokenKind* = (
-    unknown,
-    identifier,
-    _if,
-    _then,
-    _else,
-    _elsif,
-    _while,
-    _do,
-    _proc,
-    _begin,
-    _end,
-    _extern,
-    _const,
-    _var,
-    _case,
-    _of,
-    _type,
-    _record,
-    _union,
-    pipe,
-    to,
-    boolean,
-    null,
-    and,
-    _or,
-    not,
-    _return,
-    _cast,
-    shift_left,
-    shift_right,
-    left_paren,
-    right_paren,
-    left_square,
-    right_square,
-    greater_equal,
-    less_equal,
-    greater_than,
-    less_than,
-    not_equal,
-    equal,
-    semicolon,
-    dot,
-    comma,
-    plus,
-    minus,
-    multiplication,
-    division,
-    remainder,
-    assignment,
-    colon,
-    hat,
-    at,
-    comment,
-    integer,
-    word,
-    character,
-    string,
-    _defer,
-    exclamation,
-    arrow,
-    trait,
-    _program,
-    _module,
-    _import
-  );
-  Location* = record
-    first: TextLocation;
-    last: TextLocation
-  end;
  SourceFile* = record
    buffer: [1024]Char;
    handle: ^FILE;
@@ -104,14 +26,13 @@ type
    head: proc(Pointer) -> Char
  end;
  Token* = record
-    kind: TokenKind;
+    kind: LexerKind;
    value: union
      int_value: Int;
      string: String;
      boolean_value: Bool;
      char_value: Char
-    end;
-    location: Location
+    end
  end;
  Tokenizer* = record
    length: Word;
@@ -421,63 +342,63 @@ var
  current_token: Token;
 begin
  if token_content = "if" then
-    current_token.kind := TokenKind._if
+    current_token.kind := LexerKind._if
  elsif token_content = "then" then
-    current_token.kind := TokenKind._then
+    current_token.kind := LexerKind._then
  elsif token_content = "else" then
-    current_token.kind := TokenKind._else
+    current_token.kind := LexerKind._else
  elsif token_content = "elsif" then
-    current_token.kind := TokenKind._elsif
+    current_token.kind := LexerKind._elsif
  elsif token_content = "while" then
-    current_token.kind := TokenKind._while
+    current_token.kind := LexerKind._while
  elsif token_content = "do" then
-    current_token.kind := TokenKind._do
+    current_token.kind := LexerKind._do
  elsif token_content = "proc" then
-    current_token.kind := TokenKind._proc
+    current_token.kind := LexerKind._proc
  elsif token_content = "begin" then
-    current_token.kind := TokenKind._begin
+    current_token.kind := LexerKind._begin
  elsif token_content = "end" then
-    current_token.kind := TokenKind._end
+    current_token.kind := LexerKind._end
  elsif token_content = "extern" then
-    current_token.kind := TokenKind._extern
+    current_token.kind := LexerKind._extern
  elsif token_content = "const" then
-    current_token.kind := TokenKind._const
+    current_token.kind := LexerKind._const
  elsif token_content = "var" then
-    current_token.kind := TokenKind._var
+    current_token.kind := LexerKind._var
  elsif token_content = "case" then
-    current_token.kind := TokenKind._case
+    current_token.kind := LexerKind._case
  elsif token_content = "of" then
-    current_token.kind := TokenKind._of
+    current_token.kind := LexerKind._of
  elsif token_content = "type" then
-    current_token.kind := TokenKind._type
+    current_token.kind := LexerKind._type
  elsif token_content = "record" then
-    current_token.kind := TokenKind._record
+    current_token.kind := LexerKind._record
  elsif token_content = "union" then
-    current_token.kind := TokenKind._union
+    current_token.kind := LexerKind._union
  elsif token_content = "true" then
-    current_token.kind := TokenKind.boolean;
+    current_token.kind := LexerKind.boolean;
    current_token.value.boolean_value := true
  elsif token_content = "false" then
-    current_token.kind := TokenKind.boolean;
+    current_token.kind := LexerKind.boolean;
    current_token.value.boolean_value := false
  elsif token_content = "nil" then
-    current_token.kind := TokenKind.null
+    current_token.kind := LexerKind.null
  elsif token_content = "or" then
-    current_token.kind := TokenKind._or
+    current_token.kind := LexerKind._or
  elsif token_content = "return" then
-    current_token.kind := TokenKind._return
+    current_token.kind := LexerKind._return
  elsif token_content = "cast" then
-    current_token.kind := TokenKind._cast
+    current_token.kind := LexerKind._cast
  elsif token_content = "defer" then
-    current_token.kind := TokenKind._defer
+    current_token.kind := LexerKind._defer
  elsif token_content = "program" then
-    current_token.kind := TokenKind._program
+    current_token.kind := LexerKind._program
  elsif token_content = "module" then
-    current_token.kind := TokenKind._module
+    current_token.kind := LexerKind._module
  elsif token_content = "import" then
-    current_token.kind := TokenKind._import
+    current_token.kind := LexerKind._import
  else
-    current_token.kind := TokenKind.identifier;
+    current_token.kind := LexerKind.identifier;
    current_token.value.string := string_dup(token_content)
  end;

@@ -500,7 +421,7 @@ var
  current_token: Token;
  first_char: Char;
 begin
-  current_token.kind := TokenKind.unknown;
+  current_token.kind := LexerKind.unknown;

  first_char := source_code_head(source_code);

@@ -511,158 +432,158 @@ begin
    source_code_advance(@source_code);
    lexer_identifier(@source_code, token_buffer);

-    current_token.kind := TokenKind.trait;
+    current_token.kind := LexerKind.trait;
    current_token.value.string := string_dup(string_buffer_clear(token_buffer))
  elsif isdigit(cast(first_char: Int)) <> 0 then
    lexer_number(@source_code, @current_token.value.int_value);

    if source_code_expect(@source_code, 'u') then
-      current_token.kind := TokenKind.word;
+      current_token.kind := LexerKind.word;
        source_code_advance(@source_code)
    else
-      current_token.kind := TokenKind.integer
+      current_token.kind := LexerKind.integer
    end
  elsif first_char = '(' then
    source_code_advance(@source_code);

    if source_code_empty(@source_code) then
-      current_token.kind := TokenKind.left_paren
+      current_token.kind := LexerKind.left_paren
    elsif source_code_head(source_code) = '*' then
      source_code_advance(@source_code);

      if lexer_comment(@source_code, token_buffer) then
        current_token.value.string := string_dup(string_buffer_clear(token_buffer));
-        current_token.kind := TokenKind.comment
+        current_token.kind := LexerKind.comment
      else
-        current_token.kind := TokenKind.unknown
+        current_token.kind := LexerKind.unknown
      end
    else
-      current_token.kind := TokenKind.left_paren
+      current_token.kind := LexerKind.left_paren
    end
  elsif first_char = ')' then
-    current_token.kind := TokenKind.right_paren;
+    current_token.kind := LexerKind.right_paren;
    source_code_advance(@source_code)
  elsif first_char = '\'' then
    source_code_advance(@source_code);

    if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
-      current_token.kind := TokenKind.character;
+      current_token.kind := LexerKind.character;
      source_code_advance(@source_code)
    else
-      current_token.kind := TokenKind.unknown
+      current_token.kind := LexerKind.unknown
    end
  elsif first_char = '"' then
    source_code_advance(@source_code);

    if lexer_string(@source_code, token_buffer) then
-      current_token.kind := TokenKind.string;
+      current_token.kind := LexerKind.string;
      current_token.value.string := string_dup(string_buffer_clear(token_buffer))
    else
-      current_token.kind := TokenKind.unknown
+      current_token.kind := LexerKind.unknown
    end
  elsif first_char = '[' then
-    current_token.kind := TokenKind.left_square;
+    current_token.kind := LexerKind.left_square;
    source_code_advance(@source_code)
  elsif first_char = ']' then
-    current_token.kind := TokenKind.right_square;
+    current_token.kind := LexerKind.right_square;
    source_code_advance(@source_code)
  elsif first_char = '>' then
    source_code_advance(@source_code);

    if source_code_empty(@source_code) then
-      current_token.kind := TokenKind.greater_than
+      current_token.kind := LexerKind.greater_than
    elsif source_code_head(source_code) = '=' then
-      current_token.kind := TokenKind.greater_equal;
+      current_token.kind := LexerKind.greater_equal;
      source_code_advance(@source_code)
    elsif source_code_head(source_code) = '>' then
-      current_token.kind := TokenKind.shift_right;
+      current_token.kind := LexerKind.shift_right;
      source_code_advance(@source_code)
    else
-      current_token.kind := TokenKind.greater_than
+      current_token.kind := LexerKind.greater_than
    end
  elsif first_char = '<' then
    source_code_advance(@source_code);

    if source_code_empty(@source_code) then
-      current_token.kind := TokenKind.less_than
+      current_token.kind := LexerKind.less_than
    elsif source_code_head(source_code) = '=' then
-      current_token.kind := TokenKind.less_equal;
+      current_token.kind := LexerKind.less_equal;
      source_code_advance(@source_code)
    elsif source_code_head(source_code) = '<' then
-      current_token.kind := TokenKind.shift_left;
+      current_token.kind := LexerKind.shift_left;
      source_code_advance(@source_code)
    elsif source_code_head(source_code) = '>' then
-      current_token.kind := TokenKind.not_equal;
+      current_token.kind := LexerKind.not_equal;
      source_code_advance(@source_code)
    else
-      current_token.kind := TokenKind.less_than
+      current_token.kind := LexerKind.less_than
    end
  elsif first_char = '=' then
-    current_token.kind := TokenKind.equal;
+    current_token.kind := LexerKind.equal;
    source_code_advance(@source_code)
  elsif first_char = ';' then
-    current_token.kind := TokenKind.semicolon;
+    current_token.kind := LexerKind.semicolon;
    source_code_advance(@source_code)
  elsif first_char = '.' then
-    current_token.kind := TokenKind.dot;
+    current_token.kind := LexerKind.dot;
    source_code_advance(@source_code)
  elsif first_char = ',' then
-    current_token.kind := TokenKind.comma;
+    current_token.kind := LexerKind.comma;
    source_code_advance(@source_code)
  elsif first_char = '+' then
-    current_token.kind := TokenKind.plus;
+    current_token.kind := LexerKind.plus;
    source_code_advance(@source_code)
  elsif first_char = '-' then
    source_code_advance(@source_code);

    if source_code_empty(@source_code) then
-      current_token.kind := TokenKind.minus
+      current_token.kind := LexerKind.minus
    elsif source_code_head(source_code) = '>' then
-      current_token.kind := TokenKind.arrow;
+      current_token.kind := LexerKind.arrow;
      source_code_advance(@source_code)
    else
-      current_token.kind := TokenKind.minus
+      current_token.kind := LexerKind.minus
    end
  elsif first_char = '*' then
-    current_token.kind := TokenKind.multiplication;
+    current_token.kind := LexerKind.multiplication;
    source_code_advance(@source_code)
  elsif first_char = '/' then
-    current_token.kind := TokenKind.division;
+    current_token.kind := LexerKind.division;
    source_code_advance(@source_code)
  elsif first_char = '%' then
-    current_token.kind := TokenKind.remainder;
+    current_token.kind := LexerKind.remainder;
    source_code_advance(@source_code)
  elsif first_char = ':' then
    source_code_advance(@source_code);

    if source_code_empty(@source_code) then
-      current_token.kind := TokenKind.colon
+      current_token.kind := LexerKind.colon
    elsif source_code_head(source_code) = '=' then
-      current_token.kind := TokenKind.assignment;
+      current_token.kind := LexerKind.assignment;
      source_code_advance(@source_code)
    else
-      current_token.kind := TokenKind.colon
+      current_token.kind := LexerKind.colon
    end
  elsif first_char = '^' then
-    current_token.kind := TokenKind.hat;
+    current_token.kind := LexerKind.hat;
    source_code_advance(@source_code)
  elsif first_char = '@' then
-    current_token.kind := TokenKind.at;
+    current_token.kind := LexerKind.at;
    source_code_advance(@source_code)
  elsif first_char = '!' then
-    current_token.kind := TokenKind.exclamation;
+    current_token.kind := LexerKind.exclamation;
    source_code_advance(@source_code)
  elsif first_char = '&' then
-    current_token.kind := TokenKind.and;
+    current_token.kind := LexerKind.and;
    source_code_advance(@source_code)
  elsif first_char = '~' then
-    current_token.kind := TokenKind.not;
+    current_token.kind := LexerKind.not;
    source_code_advance(@source_code)
  elsif first_char = '|' then
-    current_token.kind := TokenKind.pipe;
+    current_token.kind := LexerKind.pipe;
    source_code_advance(@source_code)
  else
-    current_token.kind := TokenKind.unknown;
+    current_token.kind := LexerKind.unknown;
    source_code_advance(@source_code)
  end;

@@ -684,7 +605,7 @@ begin
  while ~source_code_empty(@source_code) do
    current_token := lexer_next(source_code, @token_buffer);

-    if current_token.kind <> TokenKind.unknown then
+    if current_token.kind <> LexerKind.unknown then
      lexer_add_token(@lexer, current_token);
        lexer_spaces(@source_code)
    else
@@ -711,142 +632,142 @@ begin
    current_token := tokens + i;

    case current_token^.kind of
-      TokenKind._if:
+      LexerKind._if:
        write_s("IF")
-      | TokenKind._then:
+      | LexerKind._then:
        write_s("THEN")
-      | TokenKind._else:
+      | LexerKind._else:
        write_s("ELSE")
-      | TokenKind._elsif:
+      | LexerKind._elsif:
        write_s("ELSIF")
-      | TokenKind._while:
+      | LexerKind._while:
        write_s("WHILE")
-      | TokenKind._do:
+      | LexerKind._do:
        write_s("DO")
-      | TokenKind._proc:
+      | LexerKind._proc:
        write_s("PROC")
-      | TokenKind._begin:
+      | LexerKind._begin:
        write_s("BEGIN")
-      | TokenKind._end:
+      | LexerKind._end:
        write_s("END")
-      | TokenKind._extern:
+      | LexerKind._extern:
        write_s("EXTERN")
-      | TokenKind._const:
+      | LexerKind._const:
        write_s("CONST")
-      | TokenKind._var:
+      | LexerKind._var:
        write_s("VAR")
-     | TokenKind._case:
+     | LexerKind._case:
        write_s("CASE")
-      | TokenKind._of:
+      | LexerKind._of:
        write_s("OF")
-      | TokenKind._type:
+      | LexerKind._type:
        write_s("TYPE")
-      | TokenKind._record:
+      | LexerKind._record:
        write_s("RECORD")
-      | TokenKind._union:
+      | LexerKind._union:
        write_s("UNION")
-      | TokenKind.pipe:
+      | LexerKind.pipe:
        write_s("|")
-      | TokenKind.to:
+      | LexerKind.to:
        write_s("TO")
-      | TokenKind.boolean:
+      | LexerKind.boolean:
        write_s("BOOLEAN<");
        write_b(current_token^.value.boolean_value);
        write_c('>')
-      | TokenKind.null:
+      | LexerKind.null:
        write_s("NIL")
-      | TokenKind.and:
+      | LexerKind.and:
        write_s("&")
-      | TokenKind._or:
+      | LexerKind._or:
        write_s("OR")
-      | TokenKind.not:
+      | LexerKind.not:
        write_s("~")
-      | TokenKind._return:
+      | LexerKind._return:
        write_s("RETURN")
-      | TokenKind._cast:
+      | LexerKind._cast:
        write_s("CAST")
-      | TokenKind.shift_left:
+      | LexerKind.shift_left:
        write_s("<<")
-      | TokenKind.shift_right:
+      | LexerKind.shift_right:
        write_s(">>")
-      | TokenKind.identifier:
+      | LexerKind.identifier:
        write_c('<');
        write_s(current_token^.value.string);
        write_c('>')
-      | TokenKind.trait:
+      | LexerKind.trait:
        write_c('#');
        write_s(current_token^.value.string)
-      | TokenKind.left_paren:
+      | LexerKind.left_paren:
        write_s("(")
-      | TokenKind.right_paren:
+      | LexerKind.right_paren:
        write_s(")")
-      | TokenKind.left_square:
+      | LexerKind.left_square:
        write_s("[")
-      | TokenKind.right_square:
+      | LexerKind.right_square:
        write_s("]")
-      | TokenKind.greater_equal:
+      | LexerKind.greater_equal:
        write_s(">=")
-      | TokenKind.less_equal:
+      | LexerKind.less_equal:
        write_s("<=")
-      | TokenKind.greater_than:
+      | LexerKind.greater_than:
        write_s(">")
-      | TokenKind.less_than:
+      | LexerKind.less_than:
        write_s("<")
-      | TokenKind.equal:
+      | LexerKind.equal:
        write_s("=")
-      | TokenKind.not_equal:
+      | LexerKind.not_equal:
        write_s("<>")
-      | TokenKind.semicolon:
+      | LexerKind.semicolon:
        write_c(';')
-      | TokenKind.dot:
+      | LexerKind.dot:
        write_c('.')
-      | TokenKind.comma:
+      | LexerKind.comma:
        write_c(',')
-      | TokenKind.plus:
+      | LexerKind.plus:
        write_c('+')
-      | TokenKind.minus:
+      | LexerKind.minus:
        write_c('-')
-      | TokenKind.multiplication:
+      | LexerKind.multiplication:
        write_c('*')
-      | TokenKind.division:
+      | LexerKind.division:
        write_c('/')
-      | TokenKind.remainder:
+      | LexerKind.remainder:
        write_c('%')
-      | TokenKind.assignment:
+      | LexerKind.assignment:
        write_s(":=")
-      | TokenKind.colon:
+      | LexerKind.colon:
        write_c(':')
-      | TokenKind.hat:
+      | LexerKind.hat:
        write_c('^')
-      | TokenKind.at:
+      | LexerKind.at:
        write_c('@')
-      | TokenKind.comment:
+      | LexerKind.comment:
        write_s("(* COMMENT *)")
-      | TokenKind.integer:
+      | LexerKind.integer:
        write_c('<');
        write_i(current_token^.value.int_value);
        write_c('>')
-      | TokenKind.word:
+      | LexerKind.word:
        write_c('<');
        write_i(current_token^.value.int_value);
        write_s("u>")
-      | TokenKind.character:
+      | LexerKind.character:
        write_c('<');
        write_i(cast(current_token^.value.char_value: Int));
        write_s("c>")
-      | TokenKind.string:
+      | LexerKind.string:
        write_s("\"...\"")
-      | TokenKind._defer:
+      | LexerKind._defer:
        write_s("DEFER")
-      | TokenKind.exclamation:
+      | LexerKind.exclamation:
        write_c('!')
-      | TokenKind.arrow:
+      | LexerKind.arrow:
        write_s("->")
-      | TokenKind._program:
+      | LexerKind._program:
        write_s("PROGRAM")
-      | TokenKind._module:
+      | LexerKind._module:
      write_s("MODULE")
-      | TokenKind._import:
+      | LexerKind._import:
      write_s("IMPORT")
      else
        write_s("UNKNOWN<");
@@ -922,6 +843,5 @@ begin
  return return_code
 end;

-begin
-  exit(process(count, parameters))
+  return process(count, parameters)
 end.