Support one hardcoded import

This commit is contained in:
2025-07-10 00:43:17 +02:00
parent 181b19eefe
commit 34abb6b4f5
18 changed files with 396 additions and 312 deletions

View File

@ -1,12 +0,0 @@
DEFINITION MODULE Common;
TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common.

View File

@ -1,3 +1,13 @@
module;
type
ShortString = [256]Char;
Identifier = [256]Char;
PIdentifier = ^Identifier;
TextLocation* = record
line: Word;
column: Word
end;
FILE* = record end;
end.

View File

@ -1,107 +0,0 @@
DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE
PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD
input: File;
buffer: PLexerBuffer;
size: CARDINAL;
length: CARDINAL;
start: BufferPosition;
current: BufferPosition
END;
PLexer = POINTER TO Lexer;
LexerKind = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
LexerToken = RECORD
CASE kind: LexerKind OF
lexerKindBoolean: booleanKind: BOOLEAN |
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString
END;
start_location: TextLocation;
end_location: TextLocation
END;
PLexerToken = POINTER TO LexerToken;
PROCEDURE lexer_initialize(lexer: PLexer; input: File);
PROCEDURE lexer_destroy(lexer: PLexer);
(* Returns the last read token. *)
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
(* Read and return the next token. *)
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
END Lexer.

View File

@ -1,17 +1,9 @@
module;
from FIO import ReadNBytes;
from SYSTEM import ADR, TSIZE;
from DynamicStrings import String, InitStringCharStar, KillString;
from StringConvert import StringToInteger;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
import Common;
const
CHUNK_SIZE = 85536;
CHUNK_SIZE := 85536;
type
(*
@ -62,20 +54,109 @@ type
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = proc(PLexer, PLexerToken);
LexerToken = record
kind: LexerKind;
value: union
booleanKind: Bool;
identifierKind: Identifier;
integerKind: Int;
stringKind: ShortString
end;
start_location: TextLocation;
end_location: TextLocation
end;
TransitionAction = proc(^Lexer, ^LexerToken);
Transition = record
action: TransitionAction;
next_state: TransitionState
end;
TransitionClasses = [22]Transition;
BufferPosition* = record
iterator: ^Char;
location: TextLocation
end;
Lexer* = record
input: ^FILE;
buffer: ^Char;
size: Word;
length: Word;
start: BufferPosition;
current: BufferPosition
end;
LexerKind* = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
i: Word;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
@ -213,12 +294,12 @@ begin
end
end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
var
result: BOOLEAN;
index: CARDINAL;
keyword_length: CARDINAL;
continue: BOOLEAN;
result: Bool;
index: Word;
keyword_length: Word;
continue: Bool;
begin
index := 0;
result := true;
@ -237,25 +318,25 @@ begin
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := lexerKindEof
end;
proc increment(position: PBufferPosition);
proc increment(position: ^BufferPosition);
begin
INC(position^.iterator)
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
begin
increment(ADR(lexer^.current))
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = ':' then
token^.kind := lexerKindColon
@ -278,7 +359,7 @@ begin
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then
@ -304,7 +385,7 @@ begin
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
begin
increment(ADR(lexer^.start));
@ -316,9 +397,9 @@ begin
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
var
text_length: CARDINAL;
text_length: Word;
begin
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindComment
@ -347,7 +428,7 @@ begin
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := lexerKindIdentifier;
@ -355,95 +436,95 @@ begin
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('program', lexer^.start, lexer^.current.iterator) then
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProgram
end;
if compare_keyword('import', lexer^.start, lexer^.current.iterator) then
if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImport
end;
if compare_keyword('const', lexer^.start, lexer^.current.iterator) then
if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindConst
end;
if compare_keyword('var', lexer^.start, lexer^.current.iterator) then
if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindVar
end;
if compare_keyword('if', lexer^.start, lexer^.current.iterator) then
if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindIf
end;
if compare_keyword('then', lexer^.start, lexer^.current.iterator) then
if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindThen
end;
if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then
if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElsif
end;
if compare_keyword('else', lexer^.start, lexer^.current.iterator) then
if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElse
end;
if compare_keyword('while', lexer^.start, lexer^.current.iterator) then
if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindWhile
end;
if compare_keyword('do', lexer^.start, lexer^.current.iterator) then
if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProc
end;
if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then
if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBegin
end;
if compare_keyword('end', lexer^.start, lexer^.current.iterator) then
if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindEnd
end;
if compare_keyword('type', lexer^.start, lexer^.current.iterator) then
if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindType
end;
if compare_keyword('record', lexer^.start, lexer^.current.iterator) then
if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindRecord
end;
if compare_keyword('union', lexer^.start, lexer^.current.iterator) then
if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindNull
end;
if compare_keyword('or', lexer^.start, lexer^.current.iterator) then
if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOr
end;
if compare_keyword('return', lexer^.start, lexer^.current.iterator) then
if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindReturn
end;
if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then
if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDefer
end;
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindFrom
end;
if compare_keyword('module', lexer^.start, lexer^.current.iterator) then
if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindModule
end;
if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then
if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindXor
end;
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
@ -451,7 +532,7 @@ end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.current.iterator^ = '&' then
token^.kind := lexerKindAnd
@ -502,11 +583,11 @@ begin
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
var
buffer: String;
integer_length: CARDINAL;
found: BOOLEAN;
integer_length: Word;
found: Bool;
begin
token^.kind := lexerKindInteger;
@ -805,7 +886,7 @@ begin
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
end;
proc lexer_initialize(lexer: PLexer, input: File);
proc lexer_make*(lexer: ^Lexer, input: ^FILE);
begin
lexer^.input := input;
lexer^.length := 0;
@ -815,19 +896,20 @@ begin
lexer^.size := CHUNK_SIZE
end;
proc lexer_current(lexer: PLexer) -> LexerToken;
(* Returns the last read token. *)
proc lexer_current*(lexer: ^Lexer) -> LexerToken;
var
current_class: TransitionClass;
current_state: TransitionState;
current_transition: Transition;
result: LexerToken;
index1: CARDINAL;
index2: CARDINAL;
index1: Word;
index2: Word;
begin
lexer^.current := lexer^.start;
current_state := transitionStateStart;
while current_state <> transitionStateEnd DO
while current_state <> transitionStateEnd do
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -849,7 +931,8 @@ begin
return result
end;
proc lexer_lex(lexer: PLexer) -> LexerToken;
(* Read and return the next token. *)
proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
var
result: LexerToken;
begin
@ -865,12 +948,15 @@ begin
return result
end;
proc lexer_destroy(lexer: PLexer);
proc lexer_destroy*(lexer: ^Lexer);
begin
DEALLOCATE(lexer^.buffer, lexer^.size)
end;
proc lexer_initialize();
begin
initialize_classification();
initialize_transitions()
end;
end.

View File

@ -3,7 +3,7 @@
obtain one at https://mozilla.org/MPL/2.0/. *)
program;
import dummy;
import Common, Lexer;
const
SEEK_SET* := 0;
@ -80,13 +80,9 @@ type
_module,
_import
);
Position* = record
line: Word;
column: Word
end;
Location* = record
first: Position;
last: Position
first: TextLocation;
last: TextLocation
end;
SourceFile* = record
buffer: [1024]Char;
@ -94,14 +90,13 @@ type
size: Word;
index: Word
end;
FILE* = record end;
StringBuffer* = record
data: Pointer;
size: Word;
capacity: Word
end;
SourceCode = record
position: Position;
position: TextLocation;
input: Pointer;
empty: proc(Pointer) -> Bool;
@ -123,7 +118,7 @@ type
lex: Bool;
parse: Bool
end;
Lexer* = record
Tokenizer* = record
length: Word;
data: ^Token
end;
@ -592,7 +587,7 @@ begin
return current_token
end;
proc lexer_add_token(lexer: ^Lexer, token: Token);
proc lexer_add_token(lexer: ^Tokenizer, token: Token);
var
new_length: Word;
begin
@ -778,13 +773,13 @@ begin
end;
(* Split the source text into tokens. *)
proc lexer_text(source_code: SourceCode) -> Lexer;
proc lexer_text(source_code: SourceCode) -> Tokenizer;
var
current_token: Token;
token_buffer: StringBuffer;
lexer: Lexer;
lexer: Tokenizer;
begin
lexer := Lexer(0u, nil);
lexer := Tokenizer(0u, nil);
token_buffer := string_buffer_new();
lexer_spaces(@source_code);
@ -1024,7 +1019,7 @@ end;
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
var
return_code: Int;
lexer: Lexer;
lexer: Tokenizer;
begin
return_code := 0;
@ -1068,7 +1063,7 @@ begin
fclose(source_file^.handle)
end;
source_code.position := Position(1u, 1u);
source_code.position := TextLocation(1u, 1u);
source_code.input := cast(source_file: Pointer);
source_code.empty := source_file_empty;
source_code.head := source_file_head;