Replace type expression with traits

This commit is contained in:
2025-02-25 23:39:31 +01:00
parent 85b6843ecf
commit f091344cce
7 changed files with 423 additions and 373 deletions

View File

@ -72,13 +72,28 @@ type
first: Position
last: Position
end
FILE* = record end
SourceFile* = record
buffer: [1024]Char
handle: ^FILE
size: Word
index: Word
end
StringBuffer* = record
data: ^Byte
size: Word
capacity: Word
end
SourceCode = record
position: Position
text: String
input: ^Byte
empty: proc(data: ^Byte) -> Bool
advance: proc(data: ^Byte)
head: proc(data: ^Byte) -> Char
end
TokenValue* = union
int_value: Int
string_value: ^Char
string: String
boolean_value: Bool
char_value: Char
@ -88,7 +103,6 @@ type
value: TokenValue
location: Location
end
FILE* = record end
CommandLine* = record
input: ^Char
tokenize: Bool
@ -119,8 +133,6 @@ proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
proc strlen(ptr: ^Char) -> Word; extern
proc strtol(nptr: ^Char, endptr: ^^Char, base: Int) -> Int; extern
proc perror(s: ^Char); extern
proc exit(code: Int) -> !; extern
@ -225,6 +237,41 @@ begin
return String(copy, origin.length)
end
proc string_buffer_new() -> StringBuffer;
var
result: StringBuffer
begin
result.capacity := 64u
result.data := malloc(result.capacity)
result.size := 0u
return result
end
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
begin
if buffer^.size >= buffer^.capacity then
buffer^.capacity := buffer^.capacity + 1024u
buffer^.data := realloc(buffer^.data, buffer^.capacity)
end
(buffer^.data + buffer^.size)^ := cast(char: Byte)
buffer^.size := buffer^.size + 1u
end
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
begin
buffer^.size := buffer^.size - count
end
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
var
result: String
begin
result := String(cast(buffer^.data: ^Char), buffer^.size)
buffer^.size := 0u
return result
end
(*
End of standard procedures.
*)
@ -234,36 +281,20 @@ begin
return Position(1u, 1u)
end
proc read_source(filename: ^Char, result: ^String) -> Bool;
proc read_source(filename: ^Char) -> ^SourceFile;
var
input_file: ^FILE
source_size: Int
input: ^Byte
result: ^SourceFile
file_handle: ^FILE
begin
input_file := fopen(filename, "rb\0".ptr)
file_handle := fopen(filename, "rb\0".ptr)
if input_file = nil then
return false
if file_handle <> nil then
result := cast(malloc(#size(SourceFile)): ^SourceFile)
result^.handle := file_handle
result^.size := 0u
result^.index := 1u
end
defer
fclose(input_file)
end
if fseek(input_file, 0, SEEK_END) <> 0 then
return false
end
source_size := ftell(input_file)
if source_size < 0 then
return false
end
rewind(input_file)
input := malloc(cast(source_size: Word))
if fread(input, cast(source_size: Word), 1u, input_file) <> 1u then
return false
end
result^ := String(cast(input: ^Char), cast(source_size: Word))
return true
return result
end
proc escape_char(escape: Char, result: ^Char) -> Bool;
@ -312,118 +343,169 @@ begin
return successful
end
proc advance_source(source_code: SourceCode, length: Word) -> SourceCode;
proc source_file_empty(source_input: ^Byte) -> Bool;
var
source_file: ^SourceFile
begin
source_code.text := open_substring(source_code.text, length)
source_code.position.column := source_code.position.column + length
source_file := cast(source_input: ^SourceFile)
return source_code
if source_file^.index > source_file^.size then
source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle)
source_file^.index := 1u
end
return source_file^.size = 0u
end
proc skip_spaces(source_code: SourceCode) -> SourceCode;
proc source_file_head(source_input: ^Byte) -> Char;
var
source_file: ^SourceFile
begin
while source_code.text.length > 0u and is_space(source_code.text[1u]) do
if source_code.text[1u] = '\n' then
source_code.position.line := source_code.position.line + 1u
source_code.position.column := 1u
else
source_code.position.column := source_code.position.column + 1u
source_file := cast(source_input: ^SourceFile)
return source_file^.buffer[source_file^.index]
end
proc source_file_advance(source_input: ^Byte);
var
source_file: ^SourceFile
begin
source_file := cast(source_input: ^SourceFile)
source_file^.index := source_file^.index + 1u
end
proc source_code_empty(source_code: ^SourceCode) -> Bool;
begin
return source_code^.empty(source_code^.input)
end
proc source_code_head(source_code: SourceCode) -> Char;
begin
return source_code.head(source_code.input)
end
proc source_code_advance(source_code: ^SourceCode);
begin
source_code^.advance(source_code^.input)
source_code^.position.column := source_code^.position.column
end
proc source_code_break(source_code: ^SourceCode);
begin
source_code^.position.line := source_code^.position.line + 1u
source_code^.position.column := 0u
end
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
begin
return not source_code_empty(source_code) and source_code_head(source_code^) = expected
end
proc skip_spaces(source_code: ^SourceCode);
begin
while not source_code_empty(source_code) and is_space(source_code_head(source_code^)) do
if source_code_head(source_code^) = '\n' then
source_code_break(source_code)
end
source_code.text := open_substring(source_code.text, 1u)
source_code_advance(source_code)
end
return source_code
end
proc lex_identifier(source_code: ^SourceCode, token_content: ^String);
proc is_ident(char: Char) -> Bool;
begin
return is_alnum(char) or char = '_'
end
proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
var
content_length: Word
begin
content_length := 0u
token_content^ := source_code^.text
while is_alnum(source_code^.text[1u]) or source_code^.text[1u] = '_' do
content_length := content_length + 1u
source_code^ := advance_source(source_code^, 1u)
while not source_code_empty(source_code) and is_ident(source_code_head(source_code^)) do
string_buffer_push(token_content, source_code_head(source_code^))
source_code_advance(source_code)
end
token_content^ := substring(token_content^, 0u, content_length)
end
proc lex_comment(source_code: ^SourceCode, token_content: ^String) -> Bool;
proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
content_length: Word
trailing: Word
begin
content_length := 0u
token_content^ := source_code^.text
trailing := 0u
while source_code^.text.length > 0u and trailing < 2u do
if source_code^.text[1u] = '*' then
content_length := content_length + trailing
while not source_code_empty(source_code) and trailing < 2u do
if source_code_head(source_code^) = '*' then
string_buffer_push(token_content, '*')
trailing := 1u
elsif source_code^.text[1u] = ')' and trailing = 1u then
elsif source_code_head(source_code^) = ')' and trailing = 1u then
string_buffer_pop(token_content, 1u)
trailing := 2u
else
content_length := content_length + trailing + 1u
string_buffer_push(token_content, source_code_head(source_code^))
trailing := 0u
end
source_code^ := advance_source(source_code^, 1u)
source_code_advance(source_code)
end
return trailing = 2u
end
proc lex_character(input: ^Char, current_token: ^Token) -> ^Char;
proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
var
successful: Bool
begin
if input^ = '\\' then
input := input + 1
if escape_char(input^, @current_token^.value.char_value) then
input := input + 1
end
elsif input^ <> '\0' then
current_token^.value.char_value := input^
input := input + 1
successful := not source_code_empty(source_code)
if successful then
if source_code_head(source_code^) = '\\' then
source_code_advance(source_code)
successful := not source_code_empty(source_code) and escape_char(source_code_head(source_code^), token_content)
else
token_content^ := source_code_head(source_code^)
successful := true
end
end
return input
if successful then
source_code_advance(source_code)
end
return successful
end
proc lex_string(input: ^Char, current_token: ^Token) -> ^Char;
proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
token_end, constructed_string: ^Char
token_length: Word
is_valid: Bool
next_char: Char
begin
token_end := input
while token_end^ <> '\0' and not ((token_end - 1)^ <> '\\' and token_end^ = '"') do
token_end := token_end + 1
end
if token_end^ <> '\"' then
return input
end
token_length := cast(token_end - input: Word)
current_token^.value.string_value := cast(calloc(token_length, 1u): ^Char)
is_valid := true
constructed_string := current_token^.value.string_value
while input < token_end and is_valid do
if input^ = '\\' then
input := input + 1
if escape_char(input^, constructed_string) then
input := input + 1
else
is_valid := false
end
elsif input^ <> '\0' then
constructed_string^ := input^
input := input + 1
while is_valid and not source_code_empty(source_code) and source_code_head(source_code^) <> '"' do
is_valid := lex_character(source_code, @next_char)
if is_valid then
string_buffer_push(token_content, next_char)
end
constructed_string := constructed_string + 1
end
return token_end
if is_valid and source_code_expect(source_code, '"') then
source_code_advance(source_code)
else
is_valid := false
end
return is_valid
end
proc lex_number(source_code: ^SourceCode, token_content: ^Int);
begin
token_content^ := 0
while not source_code_empty(source_code) and is_digit(source_code_head(source_code^)) do
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int))
source_code_advance(source_code)
end
end
proc print_tokens(tokens: ^Token, tokens_size: Word);
@ -647,46 +729,43 @@ end
proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token;
var
token_end: ^Char
tokens, current_token: ^Token
token_length: Word
first_char: Char
token_content: String
token_buffer: StringBuffer
begin
tokens_size^ := 0u
tokens := nil
source_code := skip_spaces(source_code)
token_buffer := string_buffer_new()
while source_code.text.length <> 0u do
tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, Token.size): ^Token)
skip_spaces(@source_code)
while not source_code_empty(@source_code) do
tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, #size(Token)): ^Token)
current_token := tokens + tokens_size^
first_char := source_code.text[1u]
first_char := source_code_head(source_code)
if is_alpha(first_char) or first_char = '_' then
lex_identifier(@source_code, @token_content)
current_token^ := categorize_identifier(token_content)
lex_identifier(@source_code, @token_buffer)
current_token^ := categorize_identifier(string_buffer_clear(@token_buffer))
elsif is_digit(first_char) then
token_end := nil
current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10)
token_length := cast(token_end - source_code.text.ptr: Word)
lex_number(@source_code, @current_token^.value.int_value)
if token_end^ = 'u' then
if source_code_expect(@source_code, 'u') then
current_token^.kind := TOKEN_WORD
source_code := advance_source(source_code, token_length + 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_INTEGER
source_code := advance_source(source_code, token_length)
end
elsif first_char = '(' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_LEFT_PAREN
elsif source_code.text[1u] = '*' then
source_code := advance_source(source_code, 1u)
elsif source_code_head(source_code) = '*' then
source_code_advance(@source_code)
if lex_comment(@source_code, @token_content) then
current_token^.value.string := string_dup(token_content)
if lex_comment(@source_code, @token_buffer) then
current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
current_token^.kind := TOKEN_COMMENT
else
current_token^.kind := 0
@ -696,125 +775,125 @@ begin
end
elsif first_char = ')' then
current_token^.kind := TOKEN_RIGHT_PAREN
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '\'' then
token_end := lex_character(source_code.text.ptr + 1, current_token)
token_length := cast(token_end - source_code.text.ptr: Word)
source_code_advance(@source_code)
if token_end^ = '\'' then
if lex_character(@source_code, @current_token^.value.char_value) and source_code_expect(@source_code, '\'') then
current_token^.kind := TOKEN_CHARACTER
source_code := advance_source(source_code, token_length + 1u)
source_code_advance(@source_code)
else
source_code := advance_source(source_code, 1u)
current_token^.kind := 0
end
elsif first_char = '"' then
token_end := lex_string(source_code.text.ptr + 1, current_token)
if token_end^ = '"' then
source_code_advance(@source_code)
if lex_string(@source_code, @token_buffer) then
current_token^.kind := TOKEN_STRING
token_length := cast(token_end - source_code.text.ptr: Word)
source_code := advance_source(source_code, token_length + 1u)
current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
else
current_token^.kind := 0
end
elsif first_char = '[' then
current_token^.kind := TOKEN_LEFT_SQUARE
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ']' then
current_token^.kind := TOKEN_RIGHT_SQUARE
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '>' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_GREATER_THAN
elsif source_code.text[1u] = '=' then
elsif source_code_head(source_code) = '=' then
current_token^.kind := TOKEN_GREATER_EQUAL
source_code := advance_source(source_code, 1u)
elsif source_code.text[1u] = '>' then
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token^.kind := TOKEN_SHIFT_RIGHT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_GREATER_THAN
end
elsif first_char = '<' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_LESS_THAN
elsif source_code.text[1u] = '=' then
elsif source_code_head(source_code) = '=' then
current_token^.kind := TOKEN_LESS_EQUAL
source_code := advance_source(source_code, 1u)
elsif source_code.text[1u] = '<' then
source_code_advance(@source_code)
elsif source_code_head(source_code) = '<' then
current_token^.kind := TOKEN_SHIFT_LEFT
source_code := advance_source(source_code, 1u)
elsif source_code.text[1u] = '>' then
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token^.kind := TOKEN_NOT_EQUAL
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_LESS_THAN
end
elsif first_char = '=' then
current_token^.kind := TOKEN_EQUAL
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ';' then
current_token^.kind := TOKEN_SEMICOLON
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '.' then
current_token^.kind := TOKEN_DOT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ',' then
current_token^.kind := TOKEN_COMMA
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '+' then
current_token^.kind := TOKEN_PLUS
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '-' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_MINUS
elsif source_code.text[1u] = '>' then
elsif source_code_head(source_code) = '>' then
current_token^.kind := TOKEN_ARROW
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_MINUS
end
elsif first_char = '*' then
current_token^.kind := TOKEN_MULTIPLICATION
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '/' then
current_token^.kind := TOKEN_DIVISION
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '%' then
current_token^.kind := TOKEN_REMAINDER
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ':' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_COLON
elsif source_code.text[1u] = '=' then
elsif source_code_head(source_code) = '=' then
current_token^.kind := TOKEN_ASSIGNMENT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_COLON
end
elsif first_char = '^' then
current_token^.kind := TOKEN_HAT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '@' then
current_token^.kind := TOKEN_AT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '!' then
current_token^.kind := TOKEN_EXCLAMATION
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := 0
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
end
if current_token^.kind <> 0 then
tokens_size^ := tokens_size^ + 1u
source_code := skip_spaces(source_code)
skip_spaces(@source_code)
else
write_s("Lexical analysis error on \"")
write_c(first_char)
@ -832,7 +911,7 @@ var
result: ^CommandLine
begin
i := 1
result := cast(malloc(CommandLine.size): ^CommandLine)
result := cast(malloc(#size(CommandLine)): ^CommandLine)
result^.tokenize := false
result^.syntax_tree := false
result^.input := nil
@ -872,23 +951,38 @@ var
tokens_size: Word
source_code: SourceCode
command_line: ^CommandLine
return_code: Int
begin
return_code := 0
command_line := parse_command_line(argc, argv)
if command_line = nil then
return 2
return_code := 2
end
source_code.position := make_position()
if not read_source(command_line^.input, @source_code.text) then
perror(command_line^.input)
return 3
end
tokens := tokenize(source_code, @tokens_size)
if return_code = 0 then
source_code.position := make_position()
if command_line^.tokenize then
print_tokens(tokens, tokens_size)
source_code.input := cast(read_source(command_line^.input): ^Byte)
source_code.empty := source_file_empty
source_code.head := source_file_head
source_code.advance := source_file_advance
if source_code.input = nil then
perror(command_line^.input)
return_code := 3
end
end
return 0
if return_code = 0 then
tokens := tokenize(source_code, @tokens_size)
fclose(cast(source_code.input: ^SourceFile)^.handle)
if command_line^.tokenize then
print_tokens(tokens, tokens_size)
end
end
return return_code
end
begin