Remove the old version code
This commit is contained in:
1174
source/Parser.elna
1174
source/Parser.elna
File diff suppressed because it is too large
Load Diff
@@ -1,14 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
module;
|
|
||||||
|
|
||||||
proc isdigit*(c: Int ) -> Int; extern;
|
|
||||||
proc isalnum*(c: Int) -> Int; extern;
|
|
||||||
proc isalpha*(c: Int) -> Int; extern;
|
|
||||||
proc isspace*(c: Int) -> Int; extern;
|
|
||||||
|
|
||||||
proc tolower*(c: Int) -> Int; extern;
|
|
||||||
proc toupper*(c: Int) -> Int; extern;
|
|
||||||
|
|
||||||
end.
|
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
|
|
||||||
(*
|
|
||||||
Command line handling.
|
|
||||||
*)
|
|
||||||
module;
|
|
||||||
|
|
||||||
import cstdlib, cstring, common;
|
|
||||||
|
|
||||||
type
|
|
||||||
CommandLine* = record
|
|
||||||
input: ^Char;
|
|
||||||
output: ^Char;
|
|
||||||
lex: Bool;
|
|
||||||
parse: Bool
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
|
|
||||||
var
|
|
||||||
parameter: ^Char;
|
|
||||||
i: Int;
|
|
||||||
result: ^CommandLine;
|
|
||||||
parsed: Bool;
|
|
||||||
begin
|
|
||||||
i := 1;
|
|
||||||
result := cast(malloc(#size(CommandLine)): ^CommandLine);
|
|
||||||
result^.lex := false;
|
|
||||||
result^.parse := false;
|
|
||||||
result^.input := nil;
|
|
||||||
result^.output := nil;
|
|
||||||
|
|
||||||
while i < argc & result <> nil do
|
|
||||||
parameter := (argv + i)^;
|
|
||||||
parsed := false;
|
|
||||||
|
|
||||||
if strcmp(parameter, "--lex\0".ptr) = 0 then
|
|
||||||
parsed := true;
|
|
||||||
result^.lex := true
|
|
||||||
end;
|
|
||||||
if strcmp(parameter, "--parse\0".ptr) = 0 then
|
|
||||||
parsed := true;
|
|
||||||
result^.parse := true
|
|
||||||
end;
|
|
||||||
if strcmp(parameter, "-o\0".ptr) = 0 then
|
|
||||||
i := i + 1;
|
|
||||||
|
|
||||||
if i = argc then
|
|
||||||
write_s("Fatal error: expecting a file name following -o.");
|
|
||||||
result := nil
|
|
||||||
end;
|
|
||||||
if i < argc then
|
|
||||||
parameter := (argv + i)^;
|
|
||||||
result^.output := parameter
|
|
||||||
end;
|
|
||||||
parsed := true
|
|
||||||
end;
|
|
||||||
if (parameter^ <> '-') & ~parsed then
|
|
||||||
parsed := true;
|
|
||||||
|
|
||||||
if result^.input <> nil then
|
|
||||||
write_s("Fatal error: only one source file can be compiled at once. First given \"");
|
|
||||||
write_z(result^.input);
|
|
||||||
write_s("\", then \"");
|
|
||||||
write_z(parameter);
|
|
||||||
write_s("\".\n");
|
|
||||||
result := nil
|
|
||||||
end;
|
|
||||||
if result <> nil then
|
|
||||||
result^.input := parameter
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
if ~parsed then
|
|
||||||
write_s("Fatal error: unknown command line options: ");
|
|
||||||
|
|
||||||
write_z(parameter);
|
|
||||||
write_s(".\n");
|
|
||||||
|
|
||||||
result := nil
|
|
||||||
end;
|
|
||||||
|
|
||||||
i := i + 1
|
|
||||||
end;
|
|
||||||
if result <> nil & result^.input = nil then
|
|
||||||
write_s("Fatal error: no input files.\n");
|
|
||||||
result := nil
|
|
||||||
end;
|
|
||||||
|
|
||||||
return result
|
|
||||||
end;
|
|
||||||
|
|
||||||
end.
|
|
||||||
@@ -1,72 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
module;
|
|
||||||
|
|
||||||
import cstring, cstdio;
|
|
||||||
|
|
||||||
type
|
|
||||||
Identifier = [256]Char;
|
|
||||||
TextLocation* = record
|
|
||||||
line: Word;
|
|
||||||
column: Word
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern;
|
|
||||||
|
|
||||||
proc write_s*(value: String);
|
|
||||||
begin
|
|
||||||
(* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *)
|
|
||||||
write(1, cast(value.ptr: Pointer), cast(value.length: Int))
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc write_z*(value: ^Char);
|
|
||||||
begin
|
|
||||||
write(1, cast(value: Pointer), cast(strlen(value): Int))
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc write_b*(value: Bool);
|
|
||||||
begin
|
|
||||||
if value then
|
|
||||||
write_s("true")
|
|
||||||
else
|
|
||||||
write_s("false")
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc write_c*(value: Char);
|
|
||||||
begin
|
|
||||||
putchar(cast(value: Int));
|
|
||||||
fflush(nil)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc write_i*(value: Int);
|
|
||||||
var
|
|
||||||
digit: Int;
|
|
||||||
n: Word;
|
|
||||||
buffer: [10]Char;
|
|
||||||
begin
|
|
||||||
n := 10u;
|
|
||||||
|
|
||||||
if value = 0 then
|
|
||||||
write_c('0')
|
|
||||||
end;
|
|
||||||
while value <> 0 do
|
|
||||||
digit := value % 10;
|
|
||||||
value := value / 10;
|
|
||||||
|
|
||||||
buffer[n] := cast(cast('0': Int) + digit: Char);
|
|
||||||
n := n - 1u
|
|
||||||
end;
|
|
||||||
while n < 10u do
|
|
||||||
n := n + 1u;
|
|
||||||
write_c(buffer[n])
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc write_u*(value: Word);
|
|
||||||
begin
|
|
||||||
write_i(cast(value: Int))
|
|
||||||
end;
|
|
||||||
|
|
||||||
end.
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
module;
|
|
||||||
|
|
||||||
type
|
|
||||||
FILE* = record end;
|
|
||||||
|
|
||||||
var
|
|
||||||
stdin*: ^FILE := extern;
|
|
||||||
stdout*: ^FILE := extern;
|
|
||||||
stderr*: ^FILE := extern;
|
|
||||||
|
|
||||||
proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern;
|
|
||||||
proc fclose*(stream: ^FILE) -> Int; extern;
|
|
||||||
proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern;
|
|
||||||
proc rewind*(stream: ^FILE); extern;
|
|
||||||
proc ftell*(stream: ^FILE) -> Int; extern;
|
|
||||||
proc fflush*(stream: ^FILE) -> Int; extern;
|
|
||||||
|
|
||||||
proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern;
|
|
||||||
proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern;
|
|
||||||
|
|
||||||
proc perror(s: ^Char); extern;
|
|
||||||
|
|
||||||
proc puts(s: ^Char) -> Int; extern;
|
|
||||||
proc putchar(c: Int) -> Int; extern;
|
|
||||||
|
|
||||||
end.
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
module;
|
|
||||||
|
|
||||||
proc malloc(size: Word) -> Pointer; extern;
|
|
||||||
proc free(ptr: Pointer); extern;
|
|
||||||
proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
|
|
||||||
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
|
|
||||||
|
|
||||||
proc atoi(str: ^Char) -> Int; extern;
|
|
||||||
|
|
||||||
proc exit(code: Int) -> !; extern;
|
|
||||||
|
|
||||||
end.
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
module;
|
|
||||||
|
|
||||||
proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern;
|
|
||||||
proc memcpy(dst: Pointer, src: Pointer, n: Word); extern;
|
|
||||||
|
|
||||||
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;
|
|
||||||
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern;
|
|
||||||
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern;
|
|
||||||
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern;
|
|
||||||
proc strlen(ptr: ^Char) -> Word; extern;
|
|
||||||
|
|
||||||
end.
|
|
||||||
841
source/main.elna
841
source/main.elna
@@ -1,841 +0,0 @@
|
|||||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
|
||||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
|
||||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
|
||||||
program;
|
|
||||||
|
|
||||||
import cstdio, cctype, common, command_line_interface, lexer;
|
|
||||||
|
|
||||||
type
|
|
||||||
SourceFile* = record
|
|
||||||
buffer: [1024]Char;
|
|
||||||
handle: ^FILE;
|
|
||||||
size: Word;
|
|
||||||
index: Word
|
|
||||||
end;
|
|
||||||
StringBuffer* = record
|
|
||||||
data: Pointer;
|
|
||||||
size: Word;
|
|
||||||
capacity: Word
|
|
||||||
end;
|
|
||||||
SourceCode = record
|
|
||||||
position: TextLocation;
|
|
||||||
|
|
||||||
input: Pointer;
|
|
||||||
empty: proc(Pointer) -> Bool;
|
|
||||||
advance: proc(Pointer);
|
|
||||||
head: proc(Pointer) -> Char
|
|
||||||
end;
|
|
||||||
Token* = record
|
|
||||||
kind: LexerKind;
|
|
||||||
value: union
|
|
||||||
int_value: Int;
|
|
||||||
string: String;
|
|
||||||
boolean_value: Bool;
|
|
||||||
char_value: Char
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
Tokenizer* = record
|
|
||||||
length: Word;
|
|
||||||
data: ^Token
|
|
||||||
end;
|
|
||||||
|
|
||||||
(*
|
|
||||||
Standard procedures.
|
|
||||||
*)
|
|
||||||
proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
|
|
||||||
return realloc(ptr, n * size)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc substring(string: String, start: Word, count: Word) -> String;
|
|
||||||
return String(string.ptr + start, count)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc open_substring(string: String, start: Word) -> String;
|
|
||||||
return substring(string, start, string.length - start)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc string_dup(origin: String) -> String;
|
|
||||||
var
|
|
||||||
copy: ^Char;
|
|
||||||
begin
|
|
||||||
copy := cast(malloc(origin.length): ^Char);
|
|
||||||
strncpy(copy, origin.ptr, origin.length);
|
|
||||||
|
|
||||||
return String(copy, origin.length)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc string_buffer_new() -> StringBuffer;
|
|
||||||
var
|
|
||||||
result: StringBuffer;
|
|
||||||
begin
|
|
||||||
result.capacity := 64u;
|
|
||||||
result.data := malloc(result.capacity);
|
|
||||||
result.size := 0u;
|
|
||||||
|
|
||||||
return result
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
|
|
||||||
begin
|
|
||||||
if buffer^.size >= buffer^.capacity then
|
|
||||||
buffer^.capacity := buffer^.capacity + 1024u;
|
|
||||||
buffer^.data := realloc(buffer^.data, buffer^.capacity)
|
|
||||||
end;
|
|
||||||
cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
|
|
||||||
buffer^.size := buffer^.size + 1u
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
|
|
||||||
begin
|
|
||||||
buffer^.size := buffer^.size - count
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
|
|
||||||
var
|
|
||||||
result: String;
|
|
||||||
begin
|
|
||||||
result := String(cast(buffer^.data: ^Char), buffer^.size);
|
|
||||||
buffer^.size := 0u;
|
|
||||||
return result
|
|
||||||
end;
|
|
||||||
|
|
||||||
(*
|
|
||||||
Source code stream procedures.
|
|
||||||
*)
|
|
||||||
|
|
||||||
proc read_source(filename: ^Char) -> ^SourceFile;
|
|
||||||
var
|
|
||||||
result: ^SourceFile;
|
|
||||||
file_handle: ^FILE;
|
|
||||||
begin
|
|
||||||
file_handle := fopen(filename, "rb\0".ptr);
|
|
||||||
|
|
||||||
if file_handle <> nil then
|
|
||||||
result := cast(malloc(#size(SourceFile)): ^SourceFile);
|
|
||||||
result^.handle := file_handle;
|
|
||||||
result^.size := 0u;
|
|
||||||
result^.index := 1u
|
|
||||||
end;
|
|
||||||
return result
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_file_empty(source_input: Pointer) -> Bool;
|
|
||||||
var
|
|
||||||
source_file: ^SourceFile;
|
|
||||||
begin
|
|
||||||
source_file := cast(source_input: ^SourceFile);
|
|
||||||
|
|
||||||
if source_file^.index > source_file^.size then
|
|
||||||
source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle);
|
|
||||||
source_file^.index := 1u
|
|
||||||
end;
|
|
||||||
|
|
||||||
return source_file^.size = 0u
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_file_head(source_input: Pointer) -> Char;
|
|
||||||
var
|
|
||||||
source_file: ^SourceFile;
|
|
||||||
begin
|
|
||||||
source_file := cast(source_input: ^SourceFile);
|
|
||||||
|
|
||||||
return source_file^.buffer[source_file^.index]
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_file_advance(source_input: Pointer);
|
|
||||||
var
|
|
||||||
source_file: ^SourceFile;
|
|
||||||
begin
|
|
||||||
source_file := cast(source_input: ^SourceFile);
|
|
||||||
|
|
||||||
source_file^.index := source_file^.index + 1u
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_code_empty(source_code: ^SourceCode) -> Bool;
|
|
||||||
return source_code^.empty(source_code^.input)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_code_head(source_code: SourceCode) -> Char;
|
|
||||||
return source_code.head(source_code.input)
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_code_advance(source_code: ^SourceCode);
|
|
||||||
begin
|
|
||||||
source_code^.advance(source_code^.input);
|
|
||||||
source_code^.position.column := source_code^.position.column
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_code_break(source_code: ^SourceCode);
|
|
||||||
begin
|
|
||||||
source_code^.position.line := source_code^.position.line + 1u;
|
|
||||||
source_code^.position.column := 0u
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
|
|
||||||
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
|
|
||||||
end;
|
|
||||||
|
|
||||||
(*
|
|
||||||
Token procedures.
|
|
||||||
*)
|
|
||||||
|
|
||||||
proc lexer_escape(escape: Char, result: ^Char) -> Bool;
|
|
||||||
var
|
|
||||||
successful: Bool;
|
|
||||||
begin
|
|
||||||
case escape of
|
|
||||||
'n':
|
|
||||||
result^ := '\n';
|
|
||||||
successful := true
|
|
||||||
| 'a':
|
|
||||||
result^ := '\a';
|
|
||||||
successful := true
|
|
||||||
| 'b':
|
|
||||||
result^ := '\b';
|
|
||||||
successful := true
|
|
||||||
| 't':
|
|
||||||
result^ := '\t';
|
|
||||||
successful := true
|
|
||||||
| 'f':
|
|
||||||
result^ := '\f';
|
|
||||||
successful := true
|
|
||||||
| 'r':
|
|
||||||
result^ := '\r';
|
|
||||||
successful := true
|
|
||||||
| 'v':
|
|
||||||
result^ := '\v';
|
|
||||||
successful := true
|
|
||||||
| '\\':
|
|
||||||
result^ := '\\';
|
|
||||||
successful := true
|
|
||||||
| '\'':
|
|
||||||
result^ := '\'';
|
|
||||||
successful := true
|
|
||||||
| '"':
|
|
||||||
result^ := '"';
|
|
||||||
successful := true
|
|
||||||
| '?':
|
|
||||||
result^ := '\?';
|
|
||||||
successful := true
|
|
||||||
| '0':
|
|
||||||
result^ := '\0';
|
|
||||||
successful := true
|
|
||||||
else
|
|
||||||
successful := false
|
|
||||||
end;
|
|
||||||
return successful
|
|
||||||
end;
|
|
||||||
|
|
||||||
(* Skip spaces. *)
|
|
||||||
proc lexer_spaces(source_code: ^SourceCode);
|
|
||||||
var
|
|
||||||
current: Char;
|
|
||||||
begin
|
|
||||||
while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do
|
|
||||||
current := source_code_head(source_code^);
|
|
||||||
|
|
||||||
if current = '\n' then
|
|
||||||
source_code_break(source_code)
|
|
||||||
end;
|
|
||||||
source_code_advance(source_code)
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
(* Checker whether the character is allowed in an identificator. *)
|
|
||||||
proc lexer_is_ident(char: Char) -> Bool;
|
|
||||||
return isalnum(cast(char: Int)) <> 0 or char = '_'
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
|
|
||||||
var
|
|
||||||
content_length: Word;
|
|
||||||
begin
|
|
||||||
while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
|
|
||||||
string_buffer_push(token_content, source_code_head(source_code^));
|
|
||||||
source_code_advance(source_code)
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
|
||||||
var
|
|
||||||
trailing: Word;
|
|
||||||
begin
|
|
||||||
trailing := 0u;
|
|
||||||
|
|
||||||
while ~source_code_empty(source_code) & trailing < 2u do
|
|
||||||
if source_code_head(source_code^) = '*' then
|
|
||||||
string_buffer_push(token_content, '*');
|
|
||||||
trailing := 1u
|
|
||||||
elsif source_code_head(source_code^) = ')' & trailing = 1u then
|
|
||||||
string_buffer_pop(token_content, 1u);
|
|
||||||
trailing := 2u
|
|
||||||
else
|
|
||||||
string_buffer_push(token_content, source_code_head(source_code^));
|
|
||||||
trailing := 0u
|
|
||||||
end;
|
|
||||||
source_code_advance(source_code)
|
|
||||||
end;
|
|
||||||
|
|
||||||
return trailing = 2u
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
|
|
||||||
var
|
|
||||||
successful: Bool;
|
|
||||||
begin
|
|
||||||
successful := ~source_code_empty(source_code);
|
|
||||||
|
|
||||||
if successful then
|
|
||||||
if source_code_head(source_code^) = '\\' then
|
|
||||||
source_code_advance(source_code);
|
|
||||||
|
|
||||||
successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content)
|
|
||||||
else
|
|
||||||
token_content^ := source_code_head(source_code^);
|
|
||||||
successful := true
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
if successful then
|
|
||||||
source_code_advance(source_code)
|
|
||||||
end;
|
|
||||||
return successful
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
|
||||||
var
|
|
||||||
token_end, constructed_string: ^Char;
|
|
||||||
token_length: Word;
|
|
||||||
is_valid: Bool := true;
|
|
||||||
next_char: Char;
|
|
||||||
begin
|
|
||||||
while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do
|
|
||||||
is_valid := lexer_character(source_code, @next_char);
|
|
||||||
|
|
||||||
if is_valid then
|
|
||||||
string_buffer_push(token_content, next_char)
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
if is_valid & source_code_expect(source_code, '"') then
|
|
||||||
source_code_advance(source_code)
|
|
||||||
else
|
|
||||||
is_valid := false
|
|
||||||
end;
|
|
||||||
return is_valid
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
|
|
||||||
begin
|
|
||||||
token_content^ := 0;
|
|
||||||
|
|
||||||
while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do
|
|
||||||
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int));
|
|
||||||
|
|
||||||
source_code_advance(source_code)
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
(* Categorize an identifier. *)
|
|
||||||
proc lexer_categorize(token_content: String) -> Token;
|
|
||||||
var
|
|
||||||
current_token: Token;
|
|
||||||
begin
|
|
||||||
if token_content = "if" then
|
|
||||||
current_token.kind := LexerKind._if
|
|
||||||
elsif token_content = "then" then
|
|
||||||
current_token.kind := LexerKind._then
|
|
||||||
elsif token_content = "else" then
|
|
||||||
current_token.kind := LexerKind._else
|
|
||||||
elsif token_content = "elsif" then
|
|
||||||
current_token.kind := LexerKind._elsif
|
|
||||||
elsif token_content = "while" then
|
|
||||||
current_token.kind := LexerKind._while
|
|
||||||
elsif token_content = "do" then
|
|
||||||
current_token.kind := LexerKind._do
|
|
||||||
elsif token_content = "proc" then
|
|
||||||
current_token.kind := LexerKind._proc
|
|
||||||
elsif token_content = "begin" then
|
|
||||||
current_token.kind := LexerKind._begin
|
|
||||||
elsif token_content = "end" then
|
|
||||||
current_token.kind := LexerKind._end
|
|
||||||
elsif token_content = "extern" then
|
|
||||||
current_token.kind := LexerKind._extern
|
|
||||||
elsif token_content = "const" then
|
|
||||||
current_token.kind := LexerKind._const
|
|
||||||
elsif token_content = "var" then
|
|
||||||
current_token.kind := LexerKind._var
|
|
||||||
elsif token_content = "case" then
|
|
||||||
current_token.kind := LexerKind._case
|
|
||||||
elsif token_content = "of" then
|
|
||||||
current_token.kind := LexerKind._of
|
|
||||||
elsif token_content = "type" then
|
|
||||||
current_token.kind := LexerKind._type
|
|
||||||
elsif token_content = "record" then
|
|
||||||
current_token.kind := LexerKind._record
|
|
||||||
elsif token_content = "union" then
|
|
||||||
current_token.kind := LexerKind._union
|
|
||||||
elsif token_content = "true" then
|
|
||||||
current_token.kind := LexerKind.boolean;
|
|
||||||
current_token.value.boolean_value := true
|
|
||||||
elsif token_content = "false" then
|
|
||||||
current_token.kind := LexerKind.boolean;
|
|
||||||
current_token.value.boolean_value := false
|
|
||||||
elsif token_content = "nil" then
|
|
||||||
current_token.kind := LexerKind.null
|
|
||||||
elsif token_content = "or" then
|
|
||||||
current_token.kind := LexerKind._or
|
|
||||||
elsif token_content = "return" then
|
|
||||||
current_token.kind := LexerKind._return
|
|
||||||
elsif token_content = "cast" then
|
|
||||||
current_token.kind := LexerKind._cast
|
|
||||||
elsif token_content = "defer" then
|
|
||||||
current_token.kind := LexerKind._defer
|
|
||||||
elsif token_content = "program" then
|
|
||||||
current_token.kind := LexerKind._program
|
|
||||||
elsif token_content = "module" then
|
|
||||||
current_token.kind := LexerKind._module
|
|
||||||
elsif token_content = "import" then
|
|
||||||
current_token.kind := LexerKind._import
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.identifier;
|
|
||||||
current_token.value.string := string_dup(token_content)
|
|
||||||
end;
|
|
||||||
|
|
||||||
return current_token
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc lexer_add_token(lexer: ^Tokenizer, token: Token);
|
|
||||||
var
|
|
||||||
new_length: Word;
|
|
||||||
begin
|
|
||||||
new_length := lexer^.length + 1u;
|
|
||||||
lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
|
|
||||||
(lexer^.data + lexer^.length)^ := token;
|
|
||||||
lexer^.length := new_length
|
|
||||||
end;
|
|
||||||
|
|
||||||
(* Read the next token from the input. *)
|
|
||||||
proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
|
|
||||||
var
|
|
||||||
current_token: Token;
|
|
||||||
first_char: Char;
|
|
||||||
begin
|
|
||||||
current_token.kind := LexerKind.unknown;
|
|
||||||
|
|
||||||
first_char := source_code_head(source_code);
|
|
||||||
|
|
||||||
if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then
|
|
||||||
lexer_identifier(@source_code, token_buffer);
|
|
||||||
current_token := lexer_categorize(string_buffer_clear(token_buffer))
|
|
||||||
elsif first_char = '#' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
lexer_identifier(@source_code, token_buffer);
|
|
||||||
|
|
||||||
current_token.kind := LexerKind.trait;
|
|
||||||
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
|
|
||||||
elsif isdigit(cast(first_char: Int)) <> 0 then
|
|
||||||
lexer_number(@source_code, @current_token.value.int_value);
|
|
||||||
|
|
||||||
if source_code_expect(@source_code, 'u') then
|
|
||||||
current_token.kind := LexerKind.word;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.integer
|
|
||||||
end
|
|
||||||
elsif first_char = '(' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if source_code_empty(@source_code) then
|
|
||||||
current_token.kind := LexerKind.left_paren
|
|
||||||
elsif source_code_head(source_code) = '*' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if lexer_comment(@source_code, token_buffer) then
|
|
||||||
current_token.value.string := string_dup(string_buffer_clear(token_buffer));
|
|
||||||
current_token.kind := LexerKind.comment
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.unknown
|
|
||||||
end
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.left_paren
|
|
||||||
end
|
|
||||||
elsif first_char = ')' then
|
|
||||||
current_token.kind := LexerKind.right_paren;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '\'' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
|
|
||||||
current_token.kind := LexerKind.character;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.unknown
|
|
||||||
end
|
|
||||||
elsif first_char = '"' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if lexer_string(@source_code, token_buffer) then
|
|
||||||
current_token.kind := LexerKind.string;
|
|
||||||
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.unknown
|
|
||||||
end
|
|
||||||
elsif first_char = '[' then
|
|
||||||
current_token.kind := LexerKind.left_square;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = ']' then
|
|
||||||
current_token.kind := LexerKind.right_square;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '>' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if source_code_empty(@source_code) then
|
|
||||||
current_token.kind := LexerKind.greater_than
|
|
||||||
elsif source_code_head(source_code) = '=' then
|
|
||||||
current_token.kind := LexerKind.greater_equal;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif source_code_head(source_code) = '>' then
|
|
||||||
current_token.kind := LexerKind.shift_right;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.greater_than
|
|
||||||
end
|
|
||||||
elsif first_char = '<' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if source_code_empty(@source_code) then
|
|
||||||
current_token.kind := LexerKind.less_than
|
|
||||||
elsif source_code_head(source_code) = '=' then
|
|
||||||
current_token.kind := LexerKind.less_equal;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif source_code_head(source_code) = '<' then
|
|
||||||
current_token.kind := LexerKind.shift_left;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif source_code_head(source_code) = '>' then
|
|
||||||
current_token.kind := LexerKind.not_equal;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.less_than
|
|
||||||
end
|
|
||||||
elsif first_char = '=' then
|
|
||||||
current_token.kind := LexerKind.equal;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = ';' then
|
|
||||||
current_token.kind := LexerKind.semicolon;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '.' then
|
|
||||||
current_token.kind := LexerKind.dot;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = ',' then
|
|
||||||
current_token.kind := LexerKind.comma;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '+' then
|
|
||||||
current_token.kind := LexerKind.plus;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '-' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if source_code_empty(@source_code) then
|
|
||||||
current_token.kind := LexerKind.minus
|
|
||||||
elsif source_code_head(source_code) = '>' then
|
|
||||||
current_token.kind := LexerKind.arrow;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.minus
|
|
||||||
end
|
|
||||||
elsif first_char = '*' then
|
|
||||||
current_token.kind := LexerKind.multiplication;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '/' then
|
|
||||||
current_token.kind := LexerKind.division;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '%' then
|
|
||||||
current_token.kind := LexerKind.remainder;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = ':' then
|
|
||||||
source_code_advance(@source_code);
|
|
||||||
|
|
||||||
if source_code_empty(@source_code) then
|
|
||||||
current_token.kind := LexerKind.colon
|
|
||||||
elsif source_code_head(source_code) = '=' then
|
|
||||||
current_token.kind := LexerKind.assignment;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.colon
|
|
||||||
end
|
|
||||||
elsif first_char = '^' then
|
|
||||||
current_token.kind := LexerKind.hat;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '@' then
|
|
||||||
current_token.kind := LexerKind.at;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '!' then
|
|
||||||
current_token.kind := LexerKind.exclamation;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '&' then
|
|
||||||
current_token.kind := LexerKind.and;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '~' then
|
|
||||||
current_token.kind := LexerKind.not;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
elsif first_char = '|' then
|
|
||||||
current_token.kind := LexerKind.pipe;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
else
|
|
||||||
current_token.kind := LexerKind.unknown;
|
|
||||||
source_code_advance(@source_code)
|
|
||||||
end;
|
|
||||||
|
|
||||||
return current_token
|
|
||||||
end;
|
|
||||||
|
|
||||||
(* Split the source text into tokens. *)
|
|
||||||
proc lexer_text(source_code: SourceCode) -> Tokenizer;
|
|
||||||
var
|
|
||||||
current_token: Token;
|
|
||||||
token_buffer: StringBuffer;
|
|
||||||
lexer: Tokenizer;
|
|
||||||
begin
|
|
||||||
lexer := Tokenizer(0u, nil);
|
|
||||||
token_buffer := string_buffer_new();
|
|
||||||
|
|
||||||
lexer_spaces(@source_code);
|
|
||||||
|
|
||||||
while ~source_code_empty(@source_code) do
|
|
||||||
current_token := lexer_next(source_code, @token_buffer);
|
|
||||||
|
|
||||||
if current_token.kind <> LexerKind.unknown then
|
|
||||||
lexer_add_token(@lexer, current_token);
|
|
||||||
lexer_spaces(@source_code)
|
|
||||||
else
|
|
||||||
write_s("Lexical analysis error on \"");
|
|
||||||
write_c(source_code_head(source_code));
|
|
||||||
write_s("\".\n")
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
return lexer
|
|
||||||
end;
|
|
||||||
|
|
||||||
(*
|
|
||||||
Parser.
|
|
||||||
*)
|
|
||||||
|
|
||||||
proc parse(tokens: ^Token, tokens_size: Word);
|
|
||||||
var
|
|
||||||
current_token: ^Token;
|
|
||||||
i: Word := 0u;
|
|
||||||
begin
|
|
||||||
while i < tokens_size do
|
|
||||||
current_token := tokens + i;
|
|
||||||
|
|
||||||
case current_token^.kind of
|
|
||||||
LexerKind._if:
|
|
||||||
write_s("IF")
|
|
||||||
| LexerKind._then:
|
|
||||||
write_s("THEN")
|
|
||||||
| LexerKind._else:
|
|
||||||
write_s("ELSE")
|
|
||||||
| LexerKind._elsif:
|
|
||||||
write_s("ELSIF")
|
|
||||||
| LexerKind._while:
|
|
||||||
write_s("WHILE")
|
|
||||||
| LexerKind._do:
|
|
||||||
write_s("DO")
|
|
||||||
| LexerKind._proc:
|
|
||||||
write_s("PROC")
|
|
||||||
| LexerKind._begin:
|
|
||||||
write_s("BEGIN")
|
|
||||||
| LexerKind._end:
|
|
||||||
write_s("END")
|
|
||||||
| LexerKind._extern:
|
|
||||||
write_s("EXTERN")
|
|
||||||
| LexerKind._const:
|
|
||||||
write_s("CONST")
|
|
||||||
| LexerKind._var:
|
|
||||||
write_s("VAR")
|
|
||||||
| LexerKind._case:
|
|
||||||
write_s("CASE")
|
|
||||||
| LexerKind._of:
|
|
||||||
write_s("OF")
|
|
||||||
| LexerKind._type:
|
|
||||||
write_s("TYPE")
|
|
||||||
| LexerKind._record:
|
|
||||||
write_s("RECORD")
|
|
||||||
| LexerKind._union:
|
|
||||||
write_s("UNION")
|
|
||||||
| LexerKind.pipe:
|
|
||||||
write_s("|")
|
|
||||||
| LexerKind.to:
|
|
||||||
write_s("TO")
|
|
||||||
| LexerKind.boolean:
|
|
||||||
write_s("BOOLEAN<");
|
|
||||||
write_b(current_token^.value.boolean_value);
|
|
||||||
write_c('>')
|
|
||||||
| LexerKind.null:
|
|
||||||
write_s("NIL")
|
|
||||||
| LexerKind.and:
|
|
||||||
write_s("&")
|
|
||||||
| LexerKind._or:
|
|
||||||
write_s("OR")
|
|
||||||
| LexerKind.not:
|
|
||||||
write_s("~")
|
|
||||||
| LexerKind._return:
|
|
||||||
write_s("RETURN")
|
|
||||||
| LexerKind._cast:
|
|
||||||
write_s("CAST")
|
|
||||||
| LexerKind.shift_left:
|
|
||||||
write_s("<<")
|
|
||||||
| LexerKind.shift_right:
|
|
||||||
write_s(">>")
|
|
||||||
| LexerKind.identifier:
|
|
||||||
write_c('<');
|
|
||||||
write_s(current_token^.value.string);
|
|
||||||
write_c('>')
|
|
||||||
| LexerKind.trait:
|
|
||||||
write_c('#');
|
|
||||||
write_s(current_token^.value.string)
|
|
||||||
| LexerKind.left_paren:
|
|
||||||
write_s("(")
|
|
||||||
| LexerKind.right_paren:
|
|
||||||
write_s(")")
|
|
||||||
| LexerKind.left_square:
|
|
||||||
write_s("[")
|
|
||||||
| LexerKind.right_square:
|
|
||||||
write_s("]")
|
|
||||||
| LexerKind.greater_equal:
|
|
||||||
write_s(">=")
|
|
||||||
| LexerKind.less_equal:
|
|
||||||
write_s("<=")
|
|
||||||
| LexerKind.greater_than:
|
|
||||||
write_s(">")
|
|
||||||
| LexerKind.less_than:
|
|
||||||
write_s("<")
|
|
||||||
| LexerKind.equal:
|
|
||||||
write_s("=")
|
|
||||||
| LexerKind.not_equal:
|
|
||||||
write_s("<>")
|
|
||||||
| LexerKind.semicolon:
|
|
||||||
write_c(';')
|
|
||||||
| LexerKind.dot:
|
|
||||||
write_c('.')
|
|
||||||
| LexerKind.comma:
|
|
||||||
write_c(',')
|
|
||||||
| LexerKind.plus:
|
|
||||||
write_c('+')
|
|
||||||
| LexerKind.minus:
|
|
||||||
write_c('-')
|
|
||||||
| LexerKind.multiplication:
|
|
||||||
write_c('*')
|
|
||||||
| LexerKind.division:
|
|
||||||
write_c('/')
|
|
||||||
| LexerKind.remainder:
|
|
||||||
write_c('%')
|
|
||||||
| LexerKind.assignment:
|
|
||||||
write_s(":=")
|
|
||||||
| LexerKind.colon:
|
|
||||||
write_c(':')
|
|
||||||
| LexerKind.hat:
|
|
||||||
write_c('^')
|
|
||||||
| LexerKind.at:
|
|
||||||
write_c('@')
|
|
||||||
| LexerKind.comment:
|
|
||||||
write_s("(* COMMENT *)")
|
|
||||||
| LexerKind.integer:
|
|
||||||
write_c('<');
|
|
||||||
write_i(current_token^.value.int_value);
|
|
||||||
write_c('>')
|
|
||||||
| LexerKind.word:
|
|
||||||
write_c('<');
|
|
||||||
write_i(current_token^.value.int_value);
|
|
||||||
write_s("u>")
|
|
||||||
| LexerKind.character:
|
|
||||||
write_c('<');
|
|
||||||
write_i(cast(current_token^.value.char_value: Int));
|
|
||||||
write_s("c>")
|
|
||||||
| LexerKind.string:
|
|
||||||
write_s("\"...\"")
|
|
||||||
| LexerKind._defer:
|
|
||||||
write_s("DEFER")
|
|
||||||
| LexerKind.exclamation:
|
|
||||||
write_c('!')
|
|
||||||
| LexerKind.arrow:
|
|
||||||
write_s("->")
|
|
||||||
| LexerKind._program:
|
|
||||||
write_s("PROGRAM")
|
|
||||||
| LexerKind._module:
|
|
||||||
write_s("MODULE")
|
|
||||||
| LexerKind._import:
|
|
||||||
write_s("IMPORT")
|
|
||||||
else
|
|
||||||
write_s("UNKNOWN<");
|
|
||||||
write_i(cast(current_token^.kind: Int));
|
|
||||||
write_c('>')
|
|
||||||
end;
|
|
||||||
write_c(' ');
|
|
||||||
|
|
||||||
i := i + 1u
|
|
||||||
end;
|
|
||||||
write_c('\n')
|
|
||||||
end;
|
|
||||||
|
|
||||||
(*
|
|
||||||
Compilation entry.
|
|
||||||
*)
|
|
||||||
|
|
||||||
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
|
|
||||||
var
|
|
||||||
return_code: Int := 0;
|
|
||||||
lexer: Tokenizer;
|
|
||||||
begin
|
|
||||||
if command_line^.lex or command_line^.parse then
|
|
||||||
lexer := lexer_text(source_code)
|
|
||||||
end;
|
|
||||||
if command_line^.parse then
|
|
||||||
parse(lexer.data, lexer.length)
|
|
||||||
end;
|
|
||||||
|
|
||||||
return return_code
|
|
||||||
end;
|
|
||||||
|
|
||||||
proc process(argc: Int, argv: ^^Char) -> Int;
|
|
||||||
var
|
|
||||||
tokens: ^Token;
|
|
||||||
tokens_size: Word;
|
|
||||||
source_code: SourceCode;
|
|
||||||
command_line: ^CommandLine;
|
|
||||||
return_code: Int := 0;
|
|
||||||
source_file: ^SourceFile;
|
|
||||||
begin
|
|
||||||
command_line := parse_command_line(argc, argv);
|
|
||||||
if command_line = nil then
|
|
||||||
return_code := 2
|
|
||||||
end;
|
|
||||||
|
|
||||||
if return_code = 0 then
|
|
||||||
source_file := read_source(command_line^.input);
|
|
||||||
|
|
||||||
if source_file = nil then
|
|
||||||
perror(command_line^.input);
|
|
||||||
return_code := 3
|
|
||||||
end
|
|
||||||
end;
|
|
||||||
|
|
||||||
if return_code = 0 then
|
|
||||||
defer
|
|
||||||
fclose(source_file^.handle)
|
|
||||||
end;
|
|
||||||
|
|
||||||
source_code.position := TextLocation(1u, 1u);
|
|
||||||
source_code.input := cast(source_file: Pointer);
|
|
||||||
source_code.empty := source_file_empty;
|
|
||||||
source_code.head := source_file_head;
|
|
||||||
source_code.advance := source_file_advance;
|
|
||||||
|
|
||||||
return_code := compile_in_stages(command_line, source_code)
|
|
||||||
end;
|
|
||||||
return return_code
|
|
||||||
end;
|
|
||||||
|
|
||||||
return process(count, parameters)
|
|
||||||
end.
|
|
||||||
Reference in New Issue
Block a user