Support one hardcoded import

2025-07-10 00:43:17 +02:00
parent 181b19eefe
commit 34abb6b4f5
18 changed files with 396 additions and 312 deletions
--- a/source/Common.def
+++ b/source/Common.def
@ -1,12 +0,0 @@
-DEFINITION MODULE Common;
-
-TYPE
-  ShortString = ARRAY[1..256] OF CHAR;
-  Identifier = ARRAY[1..256] OF CHAR;
-  PIdentifier = POINTER TO Identifier;
-  TextLocation = RECORD
-    line: CARDINAL;
-    column: CARDINAL
-  END;
-
-END Common.
--- a/source/Common.elna
+++ b/source/Common.elna
@ -1,3 +1,13 @@
 module;

+type
+  ShortString = [256]Char;
+  Identifier = [256]Char;
+  PIdentifier = ^Identifier;
+  TextLocation* = record
+    line: Word;
+    column: Word
+  end;
+  FILE* = record end;
+
 end.
--- a/source/Lexer.def
+++ b/source/Lexer.def
@ -1,107 +0,0 @@
-DEFINITION MODULE Lexer;
-
-FROM FIO IMPORT File;
-
-FROM Common IMPORT Identifier, ShortString, TextLocation;
-
-TYPE
-  PLexerBuffer = POINTER TO CHAR;
-  BufferPosition = RECORD
-    iterator: PLexerBuffer;
-    location: TextLocation
-  END;
-  PBufferPosition = POINTER TO BufferPosition;
-  Lexer = RECORD
-    input: File;
-    buffer: PLexerBuffer;
-    size: CARDINAL;
-    length: CARDINAL;
-    start: BufferPosition;
-    current: BufferPosition
-  END;
-  PLexer = POINTER TO Lexer;
-  LexerKind = (
-    lexerKindEof,
-    lexerKindIdentifier,
-    lexerKindIf,
-    lexerKindThen,
-    lexerKindElse,
-    lexerKindElsif,
-    lexerKindWhile,
-    lexerKindDo,
-    lexerKindProc,
-    lexerKindBegin,
-    lexerKindEnd,
-    lexerKindXor,
-    lexerKindConst,
-    lexerKindVar,
-    lexerKindCase,
-    lexerKindOf,
-    lexerKindType,
-    lexerKindRecord,
-    lexerKindUnion,
-    lexerKindPipe,
-    lexerKindTo,
-    lexerKindBoolean,
-    lexerKindNull,
-    lexerKindAnd,
-    lexerKindOr,
-    lexerKindTilde,
-    lexerKindReturn,
-    lexerKindDefer,
-    lexerKindRange,
-    lexerKindLeftParen,
-    lexerKindRightParen,
-    lexerKindLeftSquare,
-    lexerKindRightSquare,
-    lexerKindGreaterEqual,
-    lexerKindLessEqual,
-    lexerKindGreaterThan,
-    lexerKindLessThan,
-    lexerKindNotEqual,
-    lexerKindEqual,
-    lexerKindSemicolon,
-    lexerKindDot,
-    lexerKindComma,
-    lexerKindPlus,
-    lexerKindMinus,
-    lexerKindAsterisk,
-    lexerKindDivision,
-    lexerKindRemainder,
-    lexerKindAssignment,
-    lexerKindColon,
-    lexerKindHat,
-    lexerKindAt,
-    lexerKindComment,
-    lexerKindInteger,
-    lexerKindWord,
-    lexerKindCharacter,
-    lexerKindString,
-    lexerKindFrom,
-    lexerKindPointer,
-    lexerKindArray,
-    lexerKindArrow,
-    lexerKindProgram,
-    lexerKindModule,
-    lexerKindImport
-  );
-  LexerToken = RECORD
-    CASE kind: LexerKind OF
-      lexerKindBoolean: booleanKind: BOOLEAN |
-      lexerKindIdentifier: identifierKind: Identifier |
-      lexerKindInteger: integerKind: INTEGER |
-      lexerKindString: stringKind: ShortString
-    END;
-    start_location: TextLocation;
-    end_location: TextLocation
-  END;
-  PLexerToken = POINTER TO LexerToken;
-
-PROCEDURE lexer_initialize(lexer: PLexer; input: File);
-PROCEDURE lexer_destroy(lexer: PLexer);
-(* Returns the last read token. *)
-PROCEDURE lexer_current(lexer: PLexer): LexerToken;
-(* Read and return the next token. *)
-PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
-
-END Lexer.
--- a/source/Lexer.elna
+++ b/source/Lexer.elna
@ -1,17 +1,9 @@
 module;

-from FIO import ReadNBytes;
-from SYSTEM import ADR, TSIZE;
-
-from DynamicStrings import String, InitStringCharStar, KillString;
-from StringConvert import StringToInteger;
-from Storage import DEALLOCATE, ALLOCATE;
-from Strings import Length;
-from MemUtils import MemCopy, MemZero;
-from StrCase import Lower;
+import Common;

 const
-  CHUNK_SIZE = 85536;
+  CHUNK_SIZE := 85536;

 type
  (*
@ -62,20 +54,109 @@ type
    transitionStateDecimalSuffix,
    transitionStateEnd
  );
-  TransitionAction = proc(PLexer, PLexerToken);
+  LexerToken = record
+    kind: LexerKind;
+    value: union
+      booleanKind: Bool;
+      identifierKind: Identifier;
+      integerKind: Int;
+      stringKind: ShortString
+    end;
+    start_location: TextLocation;
+    end_location: TextLocation
+  end;
+  TransitionAction = proc(^Lexer, ^LexerToken);
  Transition = record
    action: TransitionAction;
    next_state: TransitionState
  end;
  TransitionClasses = [22]Transition;

+  BufferPosition* = record
+    iterator: ^Char;
+    location: TextLocation
+  end;
+  Lexer* = record
+    input: ^FILE;
+    buffer: ^Char;
+    size: Word;
+    length: Word;
+    start: BufferPosition;
+    current: BufferPosition
+  end;
+  LexerKind* = (
+    lexerKindEof,
+    lexerKindIdentifier,
+    lexerKindIf,
+    lexerKindThen,
+    lexerKindElse,
+    lexerKindElsif,
+    lexerKindWhile,
+    lexerKindDo,
+    lexerKindProc,
+    lexerKindBegin,
+    lexerKindEnd,
+    lexerKindXor,
+    lexerKindConst,
+    lexerKindVar,
+    lexerKindCase,
+    lexerKindOf,
+    lexerKindType,
+    lexerKindRecord,
+    lexerKindUnion,
+    lexerKindPipe,
+    lexerKindTo,
+    lexerKindBoolean,
+    lexerKindNull,
+    lexerKindAnd,
+    lexerKindOr,
+    lexerKindTilde,
+    lexerKindReturn,
+    lexerKindDefer,
+    lexerKindRange,
+    lexerKindLeftParen,
+    lexerKindRightParen,
+    lexerKindLeftSquare,
+    lexerKindRightSquare,
+    lexerKindGreaterEqual,
+    lexerKindLessEqual,
+    lexerKindGreaterThan,
+    lexerKindLessThan,
+    lexerKindNotEqual,
+    lexerKindEqual,
+    lexerKindSemicolon,
+    lexerKindDot,
+    lexerKindComma,
+    lexerKindPlus,
+    lexerKindMinus,
+    lexerKindAsterisk,
+    lexerKindDivision,
+    lexerKindRemainder,
+    lexerKindAssignment,
+    lexerKindColon,
+    lexerKindHat,
+    lexerKindAt,
+    lexerKindComment,
+    lexerKindInteger,
+    lexerKindWord,
+    lexerKindCharacter,
+    lexerKindString,
+    lexerKindFrom,
+    lexerKindPointer,
+    lexerKindArray,
+    lexerKindArrow,
+    lexerKindProgram,
+    lexerKindModule,
+    lexerKindImport
+  );
+
 var
  classification: [128]TransitionClass;
  transitions: [16]TransitionClasses;

 proc initialize_classification();
 var
-  i: CARDINAL;
+  i: Word;
 begin
  classification[1] := transitionClassEof; (* NUL *)
  classification[2] := transitionClassInvalid; (* SOH *)
@ -213,12 +294,12 @@ begin
  end
 end;

-proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
+proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
 var
-  result: BOOLEAN;
-  index: CARDINAL;
-  keyword_length: CARDINAL;
-  continue: BOOLEAN;
+  result: Bool;
+  index: Word;
+  keyword_length: Word;
+  continue: Bool;
 begin
  index := 0;
  result := true;
@ -237,25 +318,25 @@ begin
 end;

 (* Reached the end of file. *)
-proc transition_action_eof(lexer: PLexer, token: PLexerToken);
+proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
 begin
  token^.kind := lexerKindEof
 end;

-proc increment(position: PBufferPosition);
+proc increment(position: ^BufferPosition);
 begin
  INC(position^.iterator)
 end;

 (* Add the character to the token currently read and advance to the next character. *)
-proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
+proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
 begin
  increment(ADR(lexer^.current))
 end;

 (* The current character is not a part of the token. Finish the token already
 * read. Don't advance to the next character. *)
-proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
+proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
 begin
  if lexer^.start.iterator^ = ':' then
    token^.kind := lexerKindColon
@ -278,7 +359,7 @@ begin
 end;

 (* An action for tokens containing multiple characters. *)
-proc transition_action_composite(lexer: PLexer, token: PLexerToken);
+proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
 begin
  if lexer^.start.iterator^ = '<' then
    if lexer^.current.iterator^ = '>' then
@ -304,7 +385,7 @@ begin
 end;

 (* Skip a space. *)
-proc transition_action_skip(lexer: PLexer, token: PLexerToken);
+proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
 begin
  increment(ADR(lexer^.start));

@ -316,9 +397,9 @@ begin
 end;

 (* Delimited string action. *)
-proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
+proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
 var
-  text_length: CARDINAL;
+  text_length: Word;
 begin
  if lexer^.start.iterator^ = '(' then
    token^.kind := lexerKindComment
@ -347,7 +428,7 @@ begin
 end;

 (* Finalize keyword or identifier. *)
-proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
+proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
 begin
  token^.kind := lexerKindIdentifier;

@ -355,95 +436,95 @@ begin
  DEC(token^.identifierKind[1], lexer^.start.iterator);
  MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));

-  if compare_keyword('program', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindProgram
  end;
-  if compare_keyword('import', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindImport
  end;
-  if compare_keyword('const', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindConst
  end;
-  if compare_keyword('var', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindVar
  end;
-  if compare_keyword('if', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindIf
  end;
-  if compare_keyword('then', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindThen
  end;
-  if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindElsif
  end;
-  if compare_keyword('else', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindElse
  end;
-  if compare_keyword('while', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindWhile
  end;
-  if compare_keyword('do', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindDo
  end;
-  if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindProc
  end;
-  if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindBegin
  end;
-  if compare_keyword('end', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindEnd
  end;
-  if compare_keyword('type', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindType
  end;
-  if compare_keyword('record', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindRecord
  end;
-  if compare_keyword('union', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindUnion
  end;
-  if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindNull
  end;
-  if compare_keyword('or', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindOr
  end;
-  if compare_keyword('return', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindReturn
  end;
-  if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindDefer
  end;
-  if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindTo
  end;
-  if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindCase
  end;
-  if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindOf
  end;
-  if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindFrom
  end;
-  if compare_keyword('module', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindModule
  end;
-  if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindXor
  end;
-  if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindPointer
  end;
-  if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindArray
  end;
-  if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindBoolean;
    token^.booleanKind := true
  end;
-  if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
+  if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
    token^.kind := lexerKindBoolean;
    token^.booleanKind := false
  end
@ -451,7 +532,7 @@ end;

 (* Action for tokens containing only one character. The character cannot be
 * followed by other characters forming a composite token. *)
-proc transition_action_single(lexer: PLexer, token: PLexerToken);
+proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
 begin
  if lexer^.current.iterator^ = '&' then
    token^.kind := lexerKindAnd
@ -502,11 +583,11 @@ begin
 end;

 (* Handle an integer literal. *)
-proc transition_action_integer(lexer: PLexer, token: PLexerToken);
+proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
 var
  buffer: String;
-  integer_length: CARDINAL;
-  found: BOOLEAN;
+  integer_length: Word;
+  found: Bool;
 begin
  token^.kind := lexerKindInteger;

@ -805,7 +886,7 @@ begin
  transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
 end;

-proc lexer_initialize(lexer: PLexer, input: File);
+proc lexer_make*(lexer: ^Lexer, input: ^FILE);
 begin
  lexer^.input := input;
  lexer^.length := 0;
@ -815,19 +896,20 @@ begin
  lexer^.size := CHUNK_SIZE
 end;

-proc lexer_current(lexer: PLexer) -> LexerToken;
+(* Returns the last read token. *)
+proc lexer_current*(lexer: ^Lexer) -> LexerToken;
 var
  current_class: TransitionClass;
  current_state: TransitionState;
  current_transition: Transition;
  result: LexerToken;
-  index1: CARDINAL;
-  index2: CARDINAL;
+  index1: Word;
+  index2: Word;
 begin
  lexer^.current := lexer^.start;
  current_state := transitionStateStart;

-  while current_state <> transitionStateEnd DO
+  while current_state <> transitionStateEnd do
    index1 := ORD(lexer^.current.iterator^);
    INC(index1);
    current_class := classification[index1];
@ -849,7 +931,8 @@ begin
  return result
 end;

-proc lexer_lex(lexer: PLexer) -> LexerToken;
+(* Read and return the next token. *)
+proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
 var
  result: LexerToken;
 begin
@ -865,12 +948,15 @@ begin
  return result
 end;

-proc lexer_destroy(lexer: PLexer);
+proc lexer_destroy*(lexer: ^Lexer);
 begin
  DEALLOCATE(lexer^.buffer, lexer^.size)
 end;

+proc lexer_initialize();
 begin
  initialize_classification();
  initialize_transitions()
+end;
+
 end.
--- a/source/main.elna
+++ b/source/main.elna
@ -3,7 +3,7 @@
  obtain one at https://mozilla.org/MPL/2.0/. *)
 program;

-import dummy;
+import Common, Lexer;

 const
  SEEK_SET* := 0;
@ -80,13 +80,9 @@ type
    _module,
    _import
  );
-  Position* = record
-    line: Word;
-    column: Word
-  end;
  Location* = record
-    first: Position;
-    last: Position
+    first: TextLocation;
+    last: TextLocation
  end;
  SourceFile* = record
    buffer: [1024]Char;
@ -94,14 +90,13 @@ type
    size: Word;
    index: Word
  end;
-  FILE* = record end;
  StringBuffer* = record
    data: Pointer;
    size: Word;
    capacity: Word
  end;
  SourceCode = record
-    position: Position;
+    position: TextLocation;

    input: Pointer;
    empty: proc(Pointer) -> Bool;
@ -123,7 +118,7 @@ type
    lex: Bool;
    parse: Bool
  end;
-  Lexer* = record
+  Tokenizer* = record
    length: Word;
    data: ^Token
  end;
@ -592,7 +587,7 @@ begin
  return current_token
 end;

-proc lexer_add_token(lexer: ^Lexer, token: Token);
+proc lexer_add_token(lexer: ^Tokenizer, token: Token);
 var
  new_length: Word;
 begin
@ -778,13 +773,13 @@ begin
 end;

 (* Split the source text into tokens. *)
-proc lexer_text(source_code: SourceCode) -> Lexer;
+proc lexer_text(source_code: SourceCode) -> Tokenizer;
 var
  current_token: Token;
  token_buffer: StringBuffer;
-  lexer: Lexer;
+  lexer: Tokenizer;
 begin
-  lexer := Lexer(0u, nil);
+  lexer := Tokenizer(0u, nil);
  token_buffer := string_buffer_new();

  lexer_spaces(@source_code);
@ -1024,7 +1019,7 @@ end;
 proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
 var
  return_code: Int;
-  lexer: Lexer;
+  lexer: Tokenizer;
 begin
  return_code := 0;

@ -1068,7 +1063,7 @@ begin
 	  fclose(source_file^.handle)
 	end;

-	source_code.position := Position(1u, 1u);
+	source_code.position := TextLocation(1u, 1u);
 	source_code.input := cast(source_file: Pointer);
 	source_code.empty := source_file_empty;
 	source_code.head := source_file_head;