Files
elna/source/main.elna

842 lines
22 KiB
Plaintext

(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
program;
import cstdio, cctype, common, command_line_interface, lexer;
type
SourceFile* = record
buffer: [1024]Char;
handle: ^FILE;
size: Word;
index: Word
end;
StringBuffer* = record
data: Pointer;
size: Word;
capacity: Word
end;
SourceCode = record
position: TextLocation;
input: Pointer;
empty: proc(Pointer) -> Bool;
advance: proc(Pointer);
head: proc(Pointer) -> Char
end;
Token* = record
kind: LexerKind;
value: union
int_value: Int;
string: String;
boolean_value: Bool;
char_value: Char
end
end;
Tokenizer* = record
length: Word;
data: ^Token
end;
(*
Standard procedures.
*)
proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
return realloc(ptr, n * size)
end;
proc substring(string: String, start: Word, count: Word) -> String;
return String(string.ptr + start, count)
end;
proc open_substring(string: String, start: Word) -> String;
return substring(string, start, string.length - start)
end;
proc string_dup(origin: String) -> String;
var
copy: ^Char;
begin
copy := cast(malloc(origin.length): ^Char);
strncpy(copy, origin.ptr, origin.length);
return String(copy, origin.length)
end;
proc string_buffer_new() -> StringBuffer;
var
result: StringBuffer;
begin
result.capacity := 64u;
result.data := malloc(result.capacity);
result.size := 0u;
return result
end;
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
begin
if buffer^.size >= buffer^.capacity then
buffer^.capacity := buffer^.capacity + 1024u;
buffer^.data := realloc(buffer^.data, buffer^.capacity)
end;
cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
buffer^.size := buffer^.size + 1u
end;
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
begin
buffer^.size := buffer^.size - count
end;
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
var
result: String;
begin
result := String(cast(buffer^.data: ^Char), buffer^.size);
buffer^.size := 0u;
return result
end;
(*
Source code stream procedures.
*)
proc read_source(filename: ^Char) -> ^SourceFile;
var
result: ^SourceFile;
file_handle: ^FILE;
begin
file_handle := fopen(filename, "rb\0".ptr);
if file_handle <> nil then
result := cast(malloc(#size(SourceFile)): ^SourceFile);
result^.handle := file_handle;
result^.size := 0u;
result^.index := 1u
end;
return result
end;
proc source_file_empty(source_input: Pointer) -> Bool;
var
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
if source_file^.index > source_file^.size then
source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle);
source_file^.index := 1u
end;
return source_file^.size = 0u
end;
proc source_file_head(source_input: Pointer) -> Char;
var
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
return source_file^.buffer[source_file^.index]
end;
proc source_file_advance(source_input: Pointer);
var
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
source_file^.index := source_file^.index + 1u
end;
proc source_code_empty(source_code: ^SourceCode) -> Bool;
return source_code^.empty(source_code^.input)
end;
proc source_code_head(source_code: SourceCode) -> Char;
return source_code.head(source_code.input)
end;
proc source_code_advance(source_code: ^SourceCode);
begin
source_code^.advance(source_code^.input);
source_code^.position.column := source_code^.position.column
end;
proc source_code_break(source_code: ^SourceCode);
begin
source_code^.position.line := source_code^.position.line + 1u;
source_code^.position.column := 0u
end;
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
end;
(*
Token procedures.
*)
proc lexer_escape(escape: Char, result: ^Char) -> Bool;
var
successful: Bool;
begin
case escape of
'n':
result^ := '\n';
successful := true
| 'a':
result^ := '\a';
successful := true
| 'b':
result^ := '\b';
successful := true
| 't':
result^ := '\t';
successful := true
| 'f':
result^ := '\f';
successful := true
| 'r':
result^ := '\r';
successful := true
| 'v':
result^ := '\v';
successful := true
| '\\':
result^ := '\\';
successful := true
| '\'':
result^ := '\'';
successful := true
| '"':
result^ := '"';
successful := true
| '?':
result^ := '\?';
successful := true
| '0':
result^ := '\0';
successful := true
else
successful := false
end;
return successful
end;
(* Skip spaces. *)
proc lexer_spaces(source_code: ^SourceCode);
var
current: Char;
begin
while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do
current := source_code_head(source_code^);
if current = '\n' then
source_code_break(source_code)
end;
source_code_advance(source_code)
end
end;
(* Checker whether the character is allowed in an identificator. *)
proc lexer_is_ident(char: Char) -> Bool;
return isalnum(cast(char: Int)) <> 0 or char = '_'
end;
proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
var
content_length: Word;
begin
while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
string_buffer_push(token_content, source_code_head(source_code^));
source_code_advance(source_code)
end
end;
proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
trailing: Word;
begin
trailing := 0u;
while ~source_code_empty(source_code) & trailing < 2u do
if source_code_head(source_code^) = '*' then
string_buffer_push(token_content, '*');
trailing := 1u
elsif source_code_head(source_code^) = ')' & trailing = 1u then
string_buffer_pop(token_content, 1u);
trailing := 2u
else
string_buffer_push(token_content, source_code_head(source_code^));
trailing := 0u
end;
source_code_advance(source_code)
end;
return trailing = 2u
end;
proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
var
successful: Bool;
begin
successful := ~source_code_empty(source_code);
if successful then
if source_code_head(source_code^) = '\\' then
source_code_advance(source_code);
successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content)
else
token_content^ := source_code_head(source_code^);
successful := true
end
end;
if successful then
source_code_advance(source_code)
end;
return successful
end;
proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
token_end, constructed_string: ^Char;
token_length: Word;
is_valid: Bool := true;
next_char: Char;
begin
while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do
is_valid := lexer_character(source_code, @next_char);
if is_valid then
string_buffer_push(token_content, next_char)
end
end;
if is_valid & source_code_expect(source_code, '"') then
source_code_advance(source_code)
else
is_valid := false
end;
return is_valid
end;
proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
begin
token_content^ := 0;
while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int));
source_code_advance(source_code)
end
end;
(* Categorize an identifier. *)
proc lexer_categorize(token_content: String) -> Token;
var
current_token: Token;
begin
if token_content = "if" then
current_token.kind := LexerKind._if
elsif token_content = "then" then
current_token.kind := LexerKind._then
elsif token_content = "else" then
current_token.kind := LexerKind._else
elsif token_content = "elsif" then
current_token.kind := LexerKind._elsif
elsif token_content = "while" then
current_token.kind := LexerKind._while
elsif token_content = "do" then
current_token.kind := LexerKind._do
elsif token_content = "proc" then
current_token.kind := LexerKind._proc
elsif token_content = "begin" then
current_token.kind := LexerKind._begin
elsif token_content = "end" then
current_token.kind := LexerKind._end
elsif token_content = "extern" then
current_token.kind := LexerKind._extern
elsif token_content = "const" then
current_token.kind := LexerKind._const
elsif token_content = "var" then
current_token.kind := LexerKind._var
elsif token_content = "case" then
current_token.kind := LexerKind._case
elsif token_content = "of" then
current_token.kind := LexerKind._of
elsif token_content = "type" then
current_token.kind := LexerKind._type
elsif token_content = "record" then
current_token.kind := LexerKind._record
elsif token_content = "union" then
current_token.kind := LexerKind._union
elsif token_content = "true" then
current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := true
elsif token_content = "false" then
current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := false
elsif token_content = "nil" then
current_token.kind := LexerKind.null
elsif token_content = "or" then
current_token.kind := LexerKind._or
elsif token_content = "return" then
current_token.kind := LexerKind._return
elsif token_content = "cast" then
current_token.kind := LexerKind._cast
elsif token_content = "defer" then
current_token.kind := LexerKind._defer
elsif token_content = "program" then
current_token.kind := LexerKind._program
elsif token_content = "module" then
current_token.kind := LexerKind._module
elsif token_content = "import" then
current_token.kind := LexerKind._import
else
current_token.kind := LexerKind.identifier;
current_token.value.string := string_dup(token_content)
end;
return current_token
end;
proc lexer_add_token(lexer: ^Tokenizer, token: Token);
var
new_length: Word;
begin
new_length := lexer^.length + 1u;
lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
(lexer^.data + lexer^.length)^ := token;
lexer^.length := new_length
end;
(* Read the next token from the input. *)
proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
var
current_token: Token;
first_char: Char;
begin
current_token.kind := LexerKind.unknown;
first_char := source_code_head(source_code);
if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then
lexer_identifier(@source_code, token_buffer);
current_token := lexer_categorize(string_buffer_clear(token_buffer))
elsif first_char = '#' then
source_code_advance(@source_code);
lexer_identifier(@source_code, token_buffer);
current_token.kind := LexerKind.trait;
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
elsif isdigit(cast(first_char: Int)) <> 0 then
lexer_number(@source_code, @current_token.value.int_value);
if source_code_expect(@source_code, 'u') then
current_token.kind := LexerKind.word;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.integer
end
elsif first_char = '(' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.left_paren
elsif source_code_head(source_code) = '*' then
source_code_advance(@source_code);
if lexer_comment(@source_code, token_buffer) then
current_token.value.string := string_dup(string_buffer_clear(token_buffer));
current_token.kind := LexerKind.comment
else
current_token.kind := LexerKind.unknown
end
else
current_token.kind := LexerKind.left_paren
end
elsif first_char = ')' then
current_token.kind := LexerKind.right_paren;
source_code_advance(@source_code)
elsif first_char = '\'' then
source_code_advance(@source_code);
if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
current_token.kind := LexerKind.character;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.unknown
end
elsif first_char = '"' then
source_code_advance(@source_code);
if lexer_string(@source_code, token_buffer) then
current_token.kind := LexerKind.string;
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
else
current_token.kind := LexerKind.unknown
end
elsif first_char = '[' then
current_token.kind := LexerKind.left_square;
source_code_advance(@source_code)
elsif first_char = ']' then
current_token.kind := LexerKind.right_square;
source_code_advance(@source_code)
elsif first_char = '>' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.greater_than
elsif source_code_head(source_code) = '=' then
current_token.kind := LexerKind.greater_equal;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token.kind := LexerKind.shift_right;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.greater_than
end
elsif first_char = '<' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.less_than
elsif source_code_head(source_code) = '=' then
current_token.kind := LexerKind.less_equal;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '<' then
current_token.kind := LexerKind.shift_left;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token.kind := LexerKind.not_equal;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.less_than
end
elsif first_char = '=' then
current_token.kind := LexerKind.equal;
source_code_advance(@source_code)
elsif first_char = ';' then
current_token.kind := LexerKind.semicolon;
source_code_advance(@source_code)
elsif first_char = '.' then
current_token.kind := LexerKind.dot;
source_code_advance(@source_code)
elsif first_char = ',' then
current_token.kind := LexerKind.comma;
source_code_advance(@source_code)
elsif first_char = '+' then
current_token.kind := LexerKind.plus;
source_code_advance(@source_code)
elsif first_char = '-' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.minus
elsif source_code_head(source_code) = '>' then
current_token.kind := LexerKind.arrow;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.minus
end
elsif first_char = '*' then
current_token.kind := LexerKind.multiplication;
source_code_advance(@source_code)
elsif first_char = '/' then
current_token.kind := LexerKind.division;
source_code_advance(@source_code)
elsif first_char = '%' then
current_token.kind := LexerKind.remainder;
source_code_advance(@source_code)
elsif first_char = ':' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.colon
elsif source_code_head(source_code) = '=' then
current_token.kind := LexerKind.assignment;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.colon
end
elsif first_char = '^' then
current_token.kind := LexerKind.hat;
source_code_advance(@source_code)
elsif first_char = '@' then
current_token.kind := LexerKind.at;
source_code_advance(@source_code)
elsif first_char = '!' then
current_token.kind := LexerKind.exclamation;
source_code_advance(@source_code)
elsif first_char = '&' then
current_token.kind := LexerKind.and;
source_code_advance(@source_code)
elsif first_char = '~' then
current_token.kind := LexerKind.not;
source_code_advance(@source_code)
elsif first_char = '|' then
current_token.kind := LexerKind.pipe;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.unknown;
source_code_advance(@source_code)
end;
return current_token
end;
(* Split the source text into tokens. *)
proc lexer_text(source_code: SourceCode) -> Tokenizer;
var
current_token: Token;
token_buffer: StringBuffer;
lexer: Tokenizer;
begin
lexer := Tokenizer(0u, nil);
token_buffer := string_buffer_new();
lexer_spaces(@source_code);
while ~source_code_empty(@source_code) do
current_token := lexer_next(source_code, @token_buffer);
if current_token.kind <> LexerKind.unknown then
lexer_add_token(@lexer, current_token);
lexer_spaces(@source_code)
else
write_s("Lexical analysis error on \"");
write_c(source_code_head(source_code));
write_s("\".\n")
end
end;
return lexer
end;
(*
Parser.
*)
proc parse(tokens: ^Token, tokens_size: Word);
var
current_token: ^Token;
i: Word := 0u;
begin
while i < tokens_size do
current_token := tokens + i;
case current_token^.kind of
LexerKind._if:
write_s("IF")
| LexerKind._then:
write_s("THEN")
| LexerKind._else:
write_s("ELSE")
| LexerKind._elsif:
write_s("ELSIF")
| LexerKind._while:
write_s("WHILE")
| LexerKind._do:
write_s("DO")
| LexerKind._proc:
write_s("PROC")
| LexerKind._begin:
write_s("BEGIN")
| LexerKind._end:
write_s("END")
| LexerKind._extern:
write_s("EXTERN")
| LexerKind._const:
write_s("CONST")
| LexerKind._var:
write_s("VAR")
| LexerKind._case:
write_s("CASE")
| LexerKind._of:
write_s("OF")
| LexerKind._type:
write_s("TYPE")
| LexerKind._record:
write_s("RECORD")
| LexerKind._union:
write_s("UNION")
| LexerKind.pipe:
write_s("|")
| LexerKind.to:
write_s("TO")
| LexerKind.boolean:
write_s("BOOLEAN<");
write_b(current_token^.value.boolean_value);
write_c('>')
| LexerKind.null:
write_s("NIL")
| LexerKind.and:
write_s("&")
| LexerKind._or:
write_s("OR")
| LexerKind.not:
write_s("~")
| LexerKind._return:
write_s("RETURN")
| LexerKind._cast:
write_s("CAST")
| LexerKind.shift_left:
write_s("<<")
| LexerKind.shift_right:
write_s(">>")
| LexerKind.identifier:
write_c('<');
write_s(current_token^.value.string);
write_c('>')
| LexerKind.trait:
write_c('#');
write_s(current_token^.value.string)
| LexerKind.left_paren:
write_s("(")
| LexerKind.right_paren:
write_s(")")
| LexerKind.left_square:
write_s("[")
| LexerKind.right_square:
write_s("]")
| LexerKind.greater_equal:
write_s(">=")
| LexerKind.less_equal:
write_s("<=")
| LexerKind.greater_than:
write_s(">")
| LexerKind.less_than:
write_s("<")
| LexerKind.equal:
write_s("=")
| LexerKind.not_equal:
write_s("<>")
| LexerKind.semicolon:
write_c(';')
| LexerKind.dot:
write_c('.')
| LexerKind.comma:
write_c(',')
| LexerKind.plus:
write_c('+')
| LexerKind.minus:
write_c('-')
| LexerKind.multiplication:
write_c('*')
| LexerKind.division:
write_c('/')
| LexerKind.remainder:
write_c('%')
| LexerKind.assignment:
write_s(":=")
| LexerKind.colon:
write_c(':')
| LexerKind.hat:
write_c('^')
| LexerKind.at:
write_c('@')
| LexerKind.comment:
write_s("(* COMMENT *)")
| LexerKind.integer:
write_c('<');
write_i(current_token^.value.int_value);
write_c('>')
| LexerKind.word:
write_c('<');
write_i(current_token^.value.int_value);
write_s("u>")
| LexerKind.character:
write_c('<');
write_i(cast(current_token^.value.char_value: Int));
write_s("c>")
| LexerKind.string:
write_s("\"...\"")
| LexerKind._defer:
write_s("DEFER")
| LexerKind.exclamation:
write_c('!')
| LexerKind.arrow:
write_s("->")
| LexerKind._program:
write_s("PROGRAM")
| LexerKind._module:
write_s("MODULE")
| LexerKind._import:
write_s("IMPORT")
else
write_s("UNKNOWN<");
write_i(cast(current_token^.kind: Int));
write_c('>')
end;
write_c(' ');
i := i + 1u
end;
write_c('\n')
end;
(*
Compilation entry.
*)
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
var
return_code: Int := 0;
lexer: Tokenizer;
begin
if command_line^.lex or command_line^.parse then
lexer := lexer_text(source_code)
end;
if command_line^.parse then
parse(lexer.data, lexer.length)
end;
return return_code
end;
proc process(argc: Int, argv: ^^Char) -> Int;
var
tokens: ^Token;
tokens_size: Word;
source_code: SourceCode;
command_line: ^CommandLine;
return_code: Int := 0;
source_file: ^SourceFile;
begin
command_line := parse_command_line(argc, argv);
if command_line = nil then
return_code := 2
end;
if return_code = 0 then
source_file := read_source(command_line^.input);
if source_file = nil then
perror(command_line^.input);
return_code := 3
end
end;
if return_code = 0 then
defer
fclose(source_file^.handle)
end;
source_code.position := TextLocation(1u, 1u);
source_code.input := cast(source_file: Pointer);
source_code.empty := source_file_empty;
source_code.head := source_file_head;
source_code.advance := source_file_advance;
return_code := compile_in_stages(command_line, source_code)
end;
return return_code
end;
return process(count, parameters)
end.