Add lexer and parser sources
This commit is contained in:
278
source.elna
278
source.elna
@ -1,17 +1,17 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
program
|
||||
program;
|
||||
|
||||
import dummy
|
||||
import dummy;
|
||||
|
||||
const
|
||||
SEEK_SET* := 0
|
||||
SEEK_CUR* := 1
|
||||
SEEK_END* := 2
|
||||
STDIN := 0
|
||||
STDOUT := 1
|
||||
STDERR := 2
|
||||
SEEK_SET* := 0;
|
||||
SEEK_CUR* := 1;
|
||||
SEEK_END* := 2;
|
||||
STDIN := 0;
|
||||
STDOUT := 1;
|
||||
STDERR := 2;
|
||||
|
||||
type
|
||||
TokenKind* = (
|
||||
@ -79,82 +79,82 @@ type
|
||||
_program,
|
||||
_module,
|
||||
_import
|
||||
)
|
||||
);
|
||||
Position* = record
|
||||
line: Word
|
||||
line: Word;
|
||||
column: Word
|
||||
end
|
||||
end;
|
||||
Location* = record
|
||||
first: Position
|
||||
first: Position;
|
||||
last: Position
|
||||
end
|
||||
end;
|
||||
SourceFile* = record
|
||||
buffer: [1024]Char
|
||||
handle: ^FILE
|
||||
size: Word
|
||||
buffer: [1024]Char;
|
||||
handle: ^FILE;
|
||||
size: Word;
|
||||
index: Word
|
||||
end
|
||||
FILE* = record end
|
||||
end;
|
||||
FILE* = record end;
|
||||
StringBuffer* = record
|
||||
data: Pointer
|
||||
size: Word
|
||||
data: Pointer;
|
||||
size: Word;
|
||||
capacity: Word
|
||||
end
|
||||
end;
|
||||
SourceCode = record
|
||||
position: Position
|
||||
position: Position;
|
||||
|
||||
input: Pointer
|
||||
empty: proc(Pointer) -> Bool
|
||||
advance: proc(Pointer)
|
||||
input: Pointer;
|
||||
empty: proc(Pointer) -> Bool;
|
||||
advance: proc(Pointer);
|
||||
head: proc(Pointer) -> Char
|
||||
end
|
||||
end;
|
||||
Token* = record
|
||||
kind: TokenKind
|
||||
kind: TokenKind;
|
||||
value: union
|
||||
int_value: Int
|
||||
string: String
|
||||
boolean_value: Bool
|
||||
int_value: Int;
|
||||
string: String;
|
||||
boolean_value: Bool;
|
||||
char_value: Char
|
||||
end
|
||||
end;
|
||||
location: Location
|
||||
end
|
||||
end;
|
||||
CommandLine* = record
|
||||
input: ^Char
|
||||
lex: Bool
|
||||
input: ^Char;
|
||||
lex: Bool;
|
||||
parse: Bool
|
||||
end
|
||||
end;
|
||||
Lexer* = record
|
||||
length: Word
|
||||
length: Word;
|
||||
data: ^Token
|
||||
end
|
||||
end;
|
||||
|
||||
(*
|
||||
External procedures.
|
||||
*)
|
||||
|
||||
proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern
|
||||
proc fclose(stream: ^FILE) -> Int; extern
|
||||
proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern
|
||||
proc rewind(stream: ^FILE); extern
|
||||
proc ftell(stream: ^FILE) -> Int; extern
|
||||
proc fread(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern
|
||||
proc write(fd: Int, buf: Pointer, Word: Int) -> Int; extern
|
||||
proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern;
|
||||
proc fclose(stream: ^FILE) -> Int; extern;
|
||||
proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern;
|
||||
proc rewind(stream: ^FILE); extern;
|
||||
proc ftell(stream: ^FILE) -> Int; extern;
|
||||
proc fread(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern;
|
||||
proc write(fd: Int, buf: Pointer, Word: Int) -> Int; extern;
|
||||
|
||||
proc malloc(size: Word) -> Pointer; extern
|
||||
proc free(ptr: Pointer); extern
|
||||
proc calloc(nmemb: Word, size: Word) -> Pointer; extern
|
||||
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern
|
||||
proc malloc(size: Word) -> Pointer; extern;
|
||||
proc free(ptr: Pointer); extern;
|
||||
proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
|
||||
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
|
||||
|
||||
proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern
|
||||
proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern;
|
||||
|
||||
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern
|
||||
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern
|
||||
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
|
||||
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
|
||||
proc strlen(ptr: ^Char) -> Word; extern
|
||||
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;
|
||||
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern;
|
||||
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern;
|
||||
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern;
|
||||
proc strlen(ptr: ^Char) -> Word; extern;
|
||||
|
||||
proc perror(s: ^Char); extern
|
||||
proc exit(code: Int) -> !; extern
|
||||
proc perror(s: ^Char); extern;
|
||||
proc exit(code: Int) -> !; extern;
|
||||
|
||||
(*
|
||||
Standard procedures.
|
||||
@ -162,17 +162,17 @@ proc exit(code: Int) -> !; extern
|
||||
|
||||
proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
|
||||
return realloc(ptr, n * size)
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_s(value: String);
|
||||
begin
|
||||
write(0, cast(value.ptr: Pointer), cast(value.length: Int))
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_z(value: ^Char);
|
||||
begin
|
||||
write(0, cast(value: Pointer), cast(strlen(value): Int))
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_b(value: Bool);
|
||||
begin
|
||||
@ -181,18 +181,18 @@ begin
|
||||
else
|
||||
write_s("false")
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_c(value: Char);
|
||||
begin
|
||||
write(0, cast(@value: Pointer), 1)
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_i(value: Int);
|
||||
var
|
||||
digit: Int
|
||||
n: Word
|
||||
buffer: [10]Char
|
||||
digit: Int;
|
||||
n: Word;
|
||||
buffer: [10]Char;
|
||||
begin
|
||||
n := 10u;
|
||||
|
||||
@ -210,57 +210,57 @@ begin
|
||||
n := n + 1u;
|
||||
write_c(buffer[n])
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_u(value: Word);
|
||||
begin
|
||||
write_i(cast(value: Int))
|
||||
end
|
||||
end;
|
||||
|
||||
proc is_digit(c: Char) -> Bool;
|
||||
return cast(c: Int) >= cast('0': Int) & cast(c: Int) <= cast('9': Int)
|
||||
end
|
||||
end;
|
||||
|
||||
proc is_alpha(c: Char) -> Bool;
|
||||
return cast(c: Int) >= cast('A': Int) & cast(c: Int) <= cast('z': Int)
|
||||
end
|
||||
end;
|
||||
|
||||
proc is_alnum(c: Char) -> Bool;
|
||||
return is_digit(c) or is_alpha(c)
|
||||
end
|
||||
end;
|
||||
|
||||
proc is_space(c: Char) -> Bool;
|
||||
return c = ' ' or c = '\n' or c = '\t'
|
||||
end
|
||||
end;
|
||||
|
||||
proc substring(string: String, start: Word, count: Word) -> String;
|
||||
return String(string.ptr + start, count)
|
||||
end
|
||||
end;
|
||||
|
||||
proc open_substring(string: String, start: Word) -> String;
|
||||
return substring(string, start, string.length - start)
|
||||
end
|
||||
end;
|
||||
|
||||
proc string_dup(origin: String) -> String;
|
||||
var
|
||||
copy: ^Char
|
||||
copy: ^Char;
|
||||
begin
|
||||
copy := cast(malloc(origin.length): ^Char);
|
||||
strncpy(copy, origin.ptr, origin.length);
|
||||
|
||||
return String(copy, origin.length)
|
||||
end
|
||||
end;
|
||||
|
||||
proc string_buffer_new() -> StringBuffer;
|
||||
var
|
||||
result: StringBuffer
|
||||
result: StringBuffer;
|
||||
begin
|
||||
result.capacity := 64u;
|
||||
result.data := malloc(result.capacity);
|
||||
result.size := 0u;
|
||||
|
||||
return result
|
||||
end
|
||||
end;
|
||||
|
||||
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
|
||||
begin
|
||||
@ -270,21 +270,21 @@ begin
|
||||
end;
|
||||
cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
|
||||
buffer^.size := buffer^.size + 1u
|
||||
end
|
||||
end;
|
||||
|
||||
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
|
||||
begin
|
||||
buffer^.size := buffer^.size - count
|
||||
end
|
||||
end;
|
||||
|
||||
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
|
||||
var
|
||||
result: String
|
||||
result: String;
|
||||
begin
|
||||
result := String(cast(buffer^.data: ^Char), buffer^.size);
|
||||
buffer^.size := 0u;
|
||||
return result
|
||||
end
|
||||
end;
|
||||
|
||||
(*
|
||||
Source code stream procedures.
|
||||
@ -292,8 +292,8 @@ end
|
||||
|
||||
proc read_source(filename: ^Char) -> ^SourceFile;
|
||||
var
|
||||
result: ^SourceFile
|
||||
file_handle: ^FILE
|
||||
result: ^SourceFile;
|
||||
file_handle: ^FILE;
|
||||
begin
|
||||
file_handle := fopen(filename, "rb\0".ptr);
|
||||
|
||||
@ -304,11 +304,11 @@ begin
|
||||
result^.index := 1u
|
||||
end;
|
||||
return result
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_file_empty(source_input: Pointer) -> Bool;
|
||||
var
|
||||
source_file: ^SourceFile
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
source_file := cast(source_input: ^SourceFile);
|
||||
|
||||
@ -318,49 +318,49 @@ begin
|
||||
end;
|
||||
|
||||
return source_file^.size = 0u
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_file_head(source_input: Pointer) -> Char;
|
||||
var
|
||||
source_file: ^SourceFile
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
source_file := cast(source_input: ^SourceFile);
|
||||
|
||||
return source_file^.buffer[source_file^.index]
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_file_advance(source_input: Pointer);
|
||||
var
|
||||
source_file: ^SourceFile
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
source_file := cast(source_input: ^SourceFile);
|
||||
|
||||
source_file^.index := source_file^.index + 1u
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_code_empty(source_code: ^SourceCode) -> Bool;
|
||||
return source_code^.empty(source_code^.input)
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_code_head(source_code: SourceCode) -> Char;
|
||||
return source_code.head(source_code.input)
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_code_advance(source_code: ^SourceCode);
|
||||
begin
|
||||
source_code^.advance(source_code^.input);
|
||||
source_code^.position.column := source_code^.position.column
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_code_break(source_code: ^SourceCode);
|
||||
begin
|
||||
source_code^.position.line := source_code^.position.line + 1u;
|
||||
source_code^.position.column := 0u
|
||||
end
|
||||
end;
|
||||
|
||||
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
|
||||
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
|
||||
end
|
||||
end;
|
||||
|
||||
(*
|
||||
Token procedures.
|
||||
@ -368,7 +368,7 @@ end
|
||||
|
||||
proc lexer_escape(escape: Char, result: ^Char) -> Bool;
|
||||
var
|
||||
successful: Bool
|
||||
successful: Bool;
|
||||
begin
|
||||
if escape = 'n' then
|
||||
result^ := '\n';
|
||||
@ -410,12 +410,12 @@ begin
|
||||
successful := false
|
||||
end;
|
||||
return successful
|
||||
end
|
||||
end;
|
||||
|
||||
(* Skip spaces. *)
|
||||
proc lexer_spaces(source_code: ^SourceCode);
|
||||
var
|
||||
current: Char
|
||||
current: Char;
|
||||
begin
|
||||
while ~source_code_empty(source_code) & is_space(source_code_head(source_code^)) do
|
||||
current := source_code_head(source_code^);
|
||||
@ -425,26 +425,26 @@ begin
|
||||
end;
|
||||
source_code_advance(source_code)
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
(* Checker whether the character is allowed in an identificator. *)
|
||||
proc lexer_is_ident(char: Char) -> Bool;
|
||||
return is_alnum(char) or char = '_'
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
|
||||
var
|
||||
content_length: Word
|
||||
content_length: Word;
|
||||
begin
|
||||
while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
|
||||
string_buffer_push(token_content, source_code_head(source_code^));
|
||||
source_code_advance(source_code)
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
||||
var
|
||||
trailing: Word
|
||||
trailing: Word;
|
||||
begin
|
||||
trailing := 0u;
|
||||
|
||||
@ -463,11 +463,11 @@ begin
|
||||
end;
|
||||
|
||||
return trailing = 2u
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
|
||||
var
|
||||
successful: Bool
|
||||
successful: Bool;
|
||||
begin
|
||||
successful := ~source_code_empty(source_code);
|
||||
|
||||
@ -485,14 +485,14 @@ begin
|
||||
source_code_advance(source_code)
|
||||
end;
|
||||
return successful
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
||||
var
|
||||
token_end, constructed_string: ^Char
|
||||
token_length: Word
|
||||
is_valid: Bool
|
||||
next_char: Char
|
||||
token_end, constructed_string: ^Char;
|
||||
token_length: Word;
|
||||
is_valid: Bool;
|
||||
next_char: Char;
|
||||
begin
|
||||
is_valid := true;
|
||||
|
||||
@ -510,7 +510,7 @@ begin
|
||||
is_valid := false
|
||||
end;
|
||||
return is_valid
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
|
||||
begin
|
||||
@ -521,12 +521,12 @@ begin
|
||||
|
||||
source_code_advance(source_code)
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
(* Categorize an identifier. *)
|
||||
proc lexer_categorize(token_content: String) -> Token;
|
||||
var
|
||||
current_token: Token
|
||||
current_token: Token;
|
||||
begin
|
||||
if token_content = "if" then
|
||||
current_token.kind := TokenKind._if
|
||||
@ -590,23 +590,23 @@ begin
|
||||
end;
|
||||
|
||||
return current_token
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_add_token(lexer: ^Lexer, token: Token);
|
||||
var
|
||||
new_length: Word
|
||||
new_length: Word;
|
||||
begin
|
||||
new_length := lexer^.length + 1u;
|
||||
lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
|
||||
(lexer^.data + lexer^.length)^ := token;
|
||||
lexer^.length := new_length
|
||||
end
|
||||
end;
|
||||
|
||||
(* Read the next token from the input. *)
|
||||
proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
|
||||
var
|
||||
current_token: Token
|
||||
first_char: Char
|
||||
current_token: Token;
|
||||
first_char: Char;
|
||||
begin
|
||||
current_token.kind := TokenKind.unknown;
|
||||
|
||||
@ -775,14 +775,14 @@ begin
|
||||
end;
|
||||
|
||||
return current_token
|
||||
end
|
||||
end;
|
||||
|
||||
(* Split the source text into tokens. *)
|
||||
proc lexer_text(source_code: SourceCode) -> Lexer;
|
||||
var
|
||||
current_token: Token
|
||||
token_buffer: StringBuffer
|
||||
lexer: Lexer
|
||||
current_token: Token;
|
||||
token_buffer: StringBuffer;
|
||||
lexer: Lexer;
|
||||
begin
|
||||
lexer := Lexer(0u, nil);
|
||||
token_buffer := string_buffer_new();
|
||||
@ -803,7 +803,7 @@ begin
|
||||
end;
|
||||
|
||||
return lexer
|
||||
end
|
||||
end;
|
||||
|
||||
(*
|
||||
Command line handling.
|
||||
@ -811,9 +811,9 @@ end
|
||||
|
||||
proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
|
||||
var
|
||||
parameter: ^^Char
|
||||
i: Int
|
||||
result: ^CommandLine
|
||||
parameter: ^^Char;
|
||||
i: Int;
|
||||
result: ^CommandLine;
|
||||
begin
|
||||
i := 1;
|
||||
result := cast(malloc(#size(CommandLine)): ^CommandLine);
|
||||
@ -852,7 +852,7 @@ begin
|
||||
end;
|
||||
|
||||
return result
|
||||
end
|
||||
end;
|
||||
|
||||
(*
|
||||
Parser.
|
||||
@ -860,8 +860,8 @@ end
|
||||
|
||||
proc parse(tokens: ^Token, tokens_size: Word);
|
||||
var
|
||||
current_token: ^Token
|
||||
i: Word
|
||||
current_token: ^Token;
|
||||
i: Word;
|
||||
begin
|
||||
i := 0u;
|
||||
while i < tokens_size do
|
||||
@ -1015,7 +1015,7 @@ begin
|
||||
i := i + 1u
|
||||
end;
|
||||
write_c('\n')
|
||||
end
|
||||
end;
|
||||
|
||||
(*
|
||||
Compilation entry.
|
||||
@ -1023,8 +1023,8 @@ end
|
||||
|
||||
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
|
||||
var
|
||||
return_code: Int
|
||||
lexer: Lexer
|
||||
return_code: Int;
|
||||
lexer: Lexer;
|
||||
begin
|
||||
return_code := 0;
|
||||
|
||||
@ -1036,16 +1036,16 @@ begin
|
||||
end;
|
||||
|
||||
return return_code
|
||||
end
|
||||
end;
|
||||
|
||||
proc process(argc: Int, argv: ^^Char) -> Int;
|
||||
var
|
||||
tokens: ^Token
|
||||
tokens_size: Word
|
||||
source_code: SourceCode
|
||||
command_line: ^CommandLine
|
||||
return_code: Int
|
||||
source_file: ^SourceFile
|
||||
tokens: ^Token;
|
||||
tokens_size: Word;
|
||||
source_code: SourceCode;
|
||||
command_line: ^CommandLine;
|
||||
return_code: Int;
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
return_code := 0;
|
||||
|
||||
@ -1077,7 +1077,7 @@ begin
|
||||
return_code := compile_in_stages(command_line, source_code)
|
||||
end;
|
||||
return return_code
|
||||
end
|
||||
end;
|
||||
|
||||
begin
|
||||
exit(process(count, parameters))
|
||||
|
Reference in New Issue
Block a user