Add lexer and parser sources

This commit is contained in:
2025-06-14 23:57:48 +02:00
parent d5e2d53e9b
commit 50bd223658
25 changed files with 3461 additions and 429 deletions

View File

@ -1,17 +1,17 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
program
program;
import dummy
import dummy;
const
SEEK_SET* := 0
SEEK_CUR* := 1
SEEK_END* := 2
STDIN := 0
STDOUT := 1
STDERR := 2
SEEK_SET* := 0;
SEEK_CUR* := 1;
SEEK_END* := 2;
STDIN := 0;
STDOUT := 1;
STDERR := 2;
type
TokenKind* = (
@ -79,82 +79,82 @@ type
_program,
_module,
_import
)
);
Position* = record
line: Word
line: Word;
column: Word
end
end;
Location* = record
first: Position
first: Position;
last: Position
end
end;
SourceFile* = record
buffer: [1024]Char
handle: ^FILE
size: Word
buffer: [1024]Char;
handle: ^FILE;
size: Word;
index: Word
end
FILE* = record end
end;
FILE* = record end;
StringBuffer* = record
data: Pointer
size: Word
data: Pointer;
size: Word;
capacity: Word
end
end;
SourceCode = record
position: Position
position: Position;
input: Pointer
empty: proc(Pointer) -> Bool
advance: proc(Pointer)
input: Pointer;
empty: proc(Pointer) -> Bool;
advance: proc(Pointer);
head: proc(Pointer) -> Char
end
end;
Token* = record
kind: TokenKind
kind: TokenKind;
value: union
int_value: Int
string: String
boolean_value: Bool
int_value: Int;
string: String;
boolean_value: Bool;
char_value: Char
end
end;
location: Location
end
end;
CommandLine* = record
input: ^Char
lex: Bool
input: ^Char;
lex: Bool;
parse: Bool
end
end;
Lexer* = record
length: Word
length: Word;
data: ^Token
end
end;
(*
External procedures.
*)
proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern
proc fclose(stream: ^FILE) -> Int; extern
proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern
proc rewind(stream: ^FILE); extern
proc ftell(stream: ^FILE) -> Int; extern
proc fread(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern
proc write(fd: Int, buf: Pointer, Word: Int) -> Int; extern
proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern;
proc fclose(stream: ^FILE) -> Int; extern;
proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern;
proc rewind(stream: ^FILE); extern;
proc ftell(stream: ^FILE) -> Int; extern;
proc fread(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern;
proc write(fd: Int, buf: Pointer, Word: Int) -> Int; extern;
proc malloc(size: Word) -> Pointer; extern
proc free(ptr: Pointer); extern
proc calloc(nmemb: Word, size: Word) -> Pointer; extern
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern
proc malloc(size: Word) -> Pointer; extern;
proc free(ptr: Pointer); extern;
proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern
proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern;
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
proc strlen(ptr: ^Char) -> Word; extern
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern;
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern;
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern;
proc strlen(ptr: ^Char) -> Word; extern;
proc perror(s: ^Char); extern
proc exit(code: Int) -> !; extern
proc perror(s: ^Char); extern;
proc exit(code: Int) -> !; extern;
(*
Standard procedures.
@ -162,17 +162,17 @@ proc exit(code: Int) -> !; extern
proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
return realloc(ptr, n * size)
end
end;
proc write_s(value: String);
begin
write(0, cast(value.ptr: Pointer), cast(value.length: Int))
end
end;
proc write_z(value: ^Char);
begin
write(0, cast(value: Pointer), cast(strlen(value): Int))
end
end;
proc write_b(value: Bool);
begin
@ -181,18 +181,18 @@ begin
else
write_s("false")
end
end
end;
proc write_c(value: Char);
begin
write(0, cast(@value: Pointer), 1)
end
end;
proc write_i(value: Int);
var
digit: Int
n: Word
buffer: [10]Char
digit: Int;
n: Word;
buffer: [10]Char;
begin
n := 10u;
@ -210,57 +210,57 @@ begin
n := n + 1u;
write_c(buffer[n])
end
end
end;
proc write_u(value: Word);
begin
write_i(cast(value: Int))
end
end;
proc is_digit(c: Char) -> Bool;
return cast(c: Int) >= cast('0': Int) & cast(c: Int) <= cast('9': Int)
end
end;
proc is_alpha(c: Char) -> Bool;
return cast(c: Int) >= cast('A': Int) & cast(c: Int) <= cast('z': Int)
end
end;
proc is_alnum(c: Char) -> Bool;
return is_digit(c) or is_alpha(c)
end
end;
proc is_space(c: Char) -> Bool;
return c = ' ' or c = '\n' or c = '\t'
end
end;
proc substring(string: String, start: Word, count: Word) -> String;
return String(string.ptr + start, count)
end
end;
proc open_substring(string: String, start: Word) -> String;
return substring(string, start, string.length - start)
end
end;
proc string_dup(origin: String) -> String;
var
copy: ^Char
copy: ^Char;
begin
copy := cast(malloc(origin.length): ^Char);
strncpy(copy, origin.ptr, origin.length);
return String(copy, origin.length)
end
end;
proc string_buffer_new() -> StringBuffer;
var
result: StringBuffer
result: StringBuffer;
begin
result.capacity := 64u;
result.data := malloc(result.capacity);
result.size := 0u;
return result
end
end;
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
begin
@ -270,21 +270,21 @@ begin
end;
cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
buffer^.size := buffer^.size + 1u
end
end;
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
begin
buffer^.size := buffer^.size - count
end
end;
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
var
result: String
result: String;
begin
result := String(cast(buffer^.data: ^Char), buffer^.size);
buffer^.size := 0u;
return result
end
end;
(*
Source code stream procedures.
@ -292,8 +292,8 @@ end
proc read_source(filename: ^Char) -> ^SourceFile;
var
result: ^SourceFile
file_handle: ^FILE
result: ^SourceFile;
file_handle: ^FILE;
begin
file_handle := fopen(filename, "rb\0".ptr);
@ -304,11 +304,11 @@ begin
result^.index := 1u
end;
return result
end
end;
proc source_file_empty(source_input: Pointer) -> Bool;
var
source_file: ^SourceFile
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
@ -318,49 +318,49 @@ begin
end;
return source_file^.size = 0u
end
end;
proc source_file_head(source_input: Pointer) -> Char;
var
source_file: ^SourceFile
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
return source_file^.buffer[source_file^.index]
end
end;
proc source_file_advance(source_input: Pointer);
var
source_file: ^SourceFile
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
source_file^.index := source_file^.index + 1u
end
end;
proc source_code_empty(source_code: ^SourceCode) -> Bool;
return source_code^.empty(source_code^.input)
end
end;
proc source_code_head(source_code: SourceCode) -> Char;
return source_code.head(source_code.input)
end
end;
proc source_code_advance(source_code: ^SourceCode);
begin
source_code^.advance(source_code^.input);
source_code^.position.column := source_code^.position.column
end
end;
proc source_code_break(source_code: ^SourceCode);
begin
source_code^.position.line := source_code^.position.line + 1u;
source_code^.position.column := 0u
end
end;
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
end
end;
(*
Token procedures.
@ -368,7 +368,7 @@ end
proc lexer_escape(escape: Char, result: ^Char) -> Bool;
var
successful: Bool
successful: Bool;
begin
if escape = 'n' then
result^ := '\n';
@ -410,12 +410,12 @@ begin
successful := false
end;
return successful
end
end;
(* Skip spaces. *)
proc lexer_spaces(source_code: ^SourceCode);
var
current: Char
current: Char;
begin
while ~source_code_empty(source_code) & is_space(source_code_head(source_code^)) do
current := source_code_head(source_code^);
@ -425,26 +425,26 @@ begin
end;
source_code_advance(source_code)
end
end
end;
(* Checker whether the character is allowed in an identificator. *)
proc lexer_is_ident(char: Char) -> Bool;
return is_alnum(char) or char = '_'
end
end;
proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
var
content_length: Word
content_length: Word;
begin
while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
string_buffer_push(token_content, source_code_head(source_code^));
source_code_advance(source_code)
end
end
end;
proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
trailing: Word
trailing: Word;
begin
trailing := 0u;
@ -463,11 +463,11 @@ begin
end;
return trailing = 2u
end
end;
proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
var
successful: Bool
successful: Bool;
begin
successful := ~source_code_empty(source_code);
@ -485,14 +485,14 @@ begin
source_code_advance(source_code)
end;
return successful
end
end;
proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
token_end, constructed_string: ^Char
token_length: Word
is_valid: Bool
next_char: Char
token_end, constructed_string: ^Char;
token_length: Word;
is_valid: Bool;
next_char: Char;
begin
is_valid := true;
@ -510,7 +510,7 @@ begin
is_valid := false
end;
return is_valid
end
end;
proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
begin
@ -521,12 +521,12 @@ begin
source_code_advance(source_code)
end
end
end;
(* Categorize an identifier. *)
proc lexer_categorize(token_content: String) -> Token;
var
current_token: Token
current_token: Token;
begin
if token_content = "if" then
current_token.kind := TokenKind._if
@ -590,23 +590,23 @@ begin
end;
return current_token
end
end;
proc lexer_add_token(lexer: ^Lexer, token: Token);
var
new_length: Word
new_length: Word;
begin
new_length := lexer^.length + 1u;
lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
(lexer^.data + lexer^.length)^ := token;
lexer^.length := new_length
end
end;
(* Read the next token from the input. *)
proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
var
current_token: Token
first_char: Char
current_token: Token;
first_char: Char;
begin
current_token.kind := TokenKind.unknown;
@ -775,14 +775,14 @@ begin
end;
return current_token
end
end;
(* Split the source text into tokens. *)
proc lexer_text(source_code: SourceCode) -> Lexer;
var
current_token: Token
token_buffer: StringBuffer
lexer: Lexer
current_token: Token;
token_buffer: StringBuffer;
lexer: Lexer;
begin
lexer := Lexer(0u, nil);
token_buffer := string_buffer_new();
@ -803,7 +803,7 @@ begin
end;
return lexer
end
end;
(*
Command line handling.
@ -811,9 +811,9 @@ end
proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
var
parameter: ^^Char
i: Int
result: ^CommandLine
parameter: ^^Char;
i: Int;
result: ^CommandLine;
begin
i := 1;
result := cast(malloc(#size(CommandLine)): ^CommandLine);
@ -852,7 +852,7 @@ begin
end;
return result
end
end;
(*
Parser.
@ -860,8 +860,8 @@ end
proc parse(tokens: ^Token, tokens_size: Word);
var
current_token: ^Token
i: Word
current_token: ^Token;
i: Word;
begin
i := 0u;
while i < tokens_size do
@ -1015,7 +1015,7 @@ begin
i := i + 1u
end;
write_c('\n')
end
end;
(*
Compilation entry.
@ -1023,8 +1023,8 @@ end
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
var
return_code: Int
lexer: Lexer
return_code: Int;
lexer: Lexer;
begin
return_code := 0;
@ -1036,16 +1036,16 @@ begin
end;
return return_code
end
end;
proc process(argc: Int, argv: ^^Char) -> Int;
var
tokens: ^Token
tokens_size: Word
source_code: SourceCode
command_line: ^CommandLine
return_code: Int
source_file: ^SourceFile
tokens: ^Token;
tokens_size: Word;
source_code: SourceCode;
command_line: ^CommandLine;
return_code: Int;
source_file: ^SourceFile;
begin
return_code := 0;
@ -1077,7 +1077,7 @@ begin
return_code := compile_in_stages(command_line, source_code)
end;
return return_code
end
end;
begin
exit(process(count, parameters))