diff options
| author | Eugen Wissner <belka@caraus.de> | 2026-02-15 13:17:33 +0100 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2026-02-15 13:17:33 +0100 |
| commit | 80364d429e373dd1fcea7cc3cc67ff981ebdb5b6 (patch) | |
| tree | a303ed55a1fc277e6c4507e2c4d25aaa5a415dd7 /source/main.elna | |
| parent | fe055aa1ae36ee8058a2da7eabc63da913123a69 (diff) | |
| download | elna-80364d429e373dd1fcea7cc3cc67ff981ebdb5b6.tar.gz | |
Remove the old version code
Diffstat (limited to 'source/main.elna')
| -rw-r--r-- | source/main.elna | 841 |
1 files changed, 0 insertions, 841 deletions
diff --git a/source/main.elna b/source/main.elna deleted file mode 100644 index dae045b..0000000 --- a/source/main.elna +++ /dev/null @@ -1,841 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -program; - -import cstdio, cctype, common, command_line_interface, lexer; - -type - SourceFile* = record - buffer: [1024]Char; - handle: ^FILE; - size: Word; - index: Word - end; - StringBuffer* = record - data: Pointer; - size: Word; - capacity: Word - end; - SourceCode = record - position: TextLocation; - - input: Pointer; - empty: proc(Pointer) -> Bool; - advance: proc(Pointer); - head: proc(Pointer) -> Char - end; - Token* = record - kind: LexerKind; - value: union - int_value: Int; - string: String; - boolean_value: Bool; - char_value: Char - end - end; - Tokenizer* = record - length: Word; - data: ^Token - end; - -(* - Standard procedures. -*) -proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer; - return realloc(ptr, n * size) -end; - -proc substring(string: String, start: Word, count: Word) -> String; - return String(string.ptr + start, count) -end; - -proc open_substring(string: String, start: Word) -> String; - return substring(string, start, string.length - start) -end; - -proc string_dup(origin: String) -> String; -var - copy: ^Char; -begin - copy := cast(malloc(origin.length): ^Char); - strncpy(copy, origin.ptr, origin.length); - - return String(copy, origin.length) -end; - -proc string_buffer_new() -> StringBuffer; -var - result: StringBuffer; -begin - result.capacity := 64u; - result.data := malloc(result.capacity); - result.size := 0u; - - return result -end; - -proc string_buffer_push(buffer: ^StringBuffer, char: Char); -begin - if buffer^.size >= buffer^.capacity then - buffer^.capacity := buffer^.capacity + 1024u; - buffer^.data := realloc(buffer^.data, buffer^.capacity) - end; - cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char); - buffer^.size := buffer^.size + 1u -end; - -proc string_buffer_pop(buffer: ^StringBuffer, count: Word); -begin - buffer^.size := buffer^.size - count -end; - -proc string_buffer_clear(buffer: ^StringBuffer) -> String; -var - result: String; -begin - result := String(cast(buffer^.data: ^Char), buffer^.size); - buffer^.size := 0u; - return result -end; - -(* - Source code stream procedures. -*) - -proc read_source(filename: ^Char) -> ^SourceFile; -var - result: ^SourceFile; - file_handle: ^FILE; -begin - file_handle := fopen(filename, "rb\0".ptr); - - if file_handle <> nil then - result := cast(malloc(#size(SourceFile)): ^SourceFile); - result^.handle := file_handle; - result^.size := 0u; - result^.index := 1u - end; - return result -end; - -proc source_file_empty(source_input: Pointer) -> Bool; -var - source_file: ^SourceFile; -begin - source_file := cast(source_input: ^SourceFile); - - if source_file^.index > source_file^.size then - source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle); - source_file^.index := 1u - end; - - return source_file^.size = 0u -end; - -proc source_file_head(source_input: Pointer) -> Char; -var - source_file: ^SourceFile; -begin - source_file := cast(source_input: ^SourceFile); - - return source_file^.buffer[source_file^.index] -end; - -proc source_file_advance(source_input: Pointer); -var - source_file: ^SourceFile; -begin - source_file := cast(source_input: ^SourceFile); - - source_file^.index := source_file^.index + 1u -end; - -proc source_code_empty(source_code: ^SourceCode) -> Bool; - return source_code^.empty(source_code^.input) -end; - -proc source_code_head(source_code: SourceCode) -> Char; - return source_code.head(source_code.input) -end; - -proc source_code_advance(source_code: ^SourceCode); -begin - source_code^.advance(source_code^.input); - source_code^.position.column := source_code^.position.column -end; - -proc source_code_break(source_code: ^SourceCode); -begin - source_code^.position.line := source_code^.position.line + 1u; - source_code^.position.column := 0u -end; - -proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool; - return ~source_code_empty(source_code) & source_code_head(source_code^) = expected -end; - -(* - Token procedures. -*) - -proc lexer_escape(escape: Char, result: ^Char) -> Bool; -var - successful: Bool; -begin - case escape of - 'n': - result^ := '\n'; - successful := true - | 'a': - result^ := '\a'; - successful := true - | 'b': - result^ := '\b'; - successful := true - | 't': - result^ := '\t'; - successful := true - | 'f': - result^ := '\f'; - successful := true - | 'r': - result^ := '\r'; - successful := true - | 'v': - result^ := '\v'; - successful := true - | '\\': - result^ := '\\'; - successful := true - | '\'': - result^ := '\''; - successful := true - | '"': - result^ := '"'; - successful := true - | '?': - result^ := '\?'; - successful := true - | '0': - result^ := '\0'; - successful := true - else - successful := false - end; - return successful -end; - -(* Skip spaces. *) -proc lexer_spaces(source_code: ^SourceCode); -var - current: Char; -begin - while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do - current := source_code_head(source_code^); - - if current = '\n' then - source_code_break(source_code) - end; - source_code_advance(source_code) - end -end; - -(* Checker whether the character is allowed in an identificator. *) -proc lexer_is_ident(char: Char) -> Bool; - return isalnum(cast(char: Int)) <> 0 or char = '_' -end; - -proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer); -var - content_length: Word; -begin - while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do - string_buffer_push(token_content, source_code_head(source_code^)); - source_code_advance(source_code) - end -end; - -proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; -var - trailing: Word; -begin - trailing := 0u; - - while ~source_code_empty(source_code) & trailing < 2u do - if source_code_head(source_code^) = '*' then - string_buffer_push(token_content, '*'); - trailing := 1u - elsif source_code_head(source_code^) = ')' & trailing = 1u then - string_buffer_pop(token_content, 1u); - trailing := 2u - else - string_buffer_push(token_content, source_code_head(source_code^)); - trailing := 0u - end; - source_code_advance(source_code) - end; - - return trailing = 2u -end; - -proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool; -var - successful: Bool; -begin - successful := ~source_code_empty(source_code); - - if successful then - if source_code_head(source_code^) = '\\' then - source_code_advance(source_code); - - successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content) - else - token_content^ := source_code_head(source_code^); - successful := true - end - end; - if successful then - source_code_advance(source_code) - end; - return successful -end; - -proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; -var - token_end, constructed_string: ^Char; - token_length: Word; - is_valid: Bool := true; - next_char: Char; -begin - while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do - is_valid := lexer_character(source_code, @next_char); - - if is_valid then - string_buffer_push(token_content, next_char) - end - end; - - if is_valid & source_code_expect(source_code, '"') then - source_code_advance(source_code) - else - is_valid := false - end; - return is_valid -end; - -proc lexer_number(source_code: ^SourceCode, token_content: ^Int); -begin - token_content^ := 0; - - while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do - token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int)); - - source_code_advance(source_code) - end -end; - -(* Categorize an identifier. *) -proc lexer_categorize(token_content: String) -> Token; -var - current_token: Token; -begin - if token_content = "if" then - current_token.kind := LexerKind._if - elsif token_content = "then" then - current_token.kind := LexerKind._then - elsif token_content = "else" then - current_token.kind := LexerKind._else - elsif token_content = "elsif" then - current_token.kind := LexerKind._elsif - elsif token_content = "while" then - current_token.kind := LexerKind._while - elsif token_content = "do" then - current_token.kind := LexerKind._do - elsif token_content = "proc" then - current_token.kind := LexerKind._proc - elsif token_content = "begin" then - current_token.kind := LexerKind._begin - elsif token_content = "end" then - current_token.kind := LexerKind._end - elsif token_content = "extern" then - current_token.kind := LexerKind._extern - elsif token_content = "const" then - current_token.kind := LexerKind._const - elsif token_content = "var" then - current_token.kind := LexerKind._var - elsif token_content = "case" then - current_token.kind := LexerKind._case - elsif token_content = "of" then - current_token.kind := LexerKind._of - elsif token_content = "type" then - current_token.kind := LexerKind._type - elsif token_content = "record" then - current_token.kind := LexerKind._record - elsif token_content = "union" then - current_token.kind := LexerKind._union - elsif token_content = "true" then - current_token.kind := LexerKind.boolean; - current_token.value.boolean_value := true - elsif token_content = "false" then - current_token.kind := LexerKind.boolean; - current_token.value.boolean_value := false - elsif token_content = "nil" then - current_token.kind := LexerKind.null - elsif token_content = "or" then - current_token.kind := LexerKind._or - elsif token_content = "return" then - current_token.kind := LexerKind._return - elsif token_content = "cast" then - current_token.kind := LexerKind._cast - elsif token_content = "defer" then - current_token.kind := LexerKind._defer - elsif token_content = "program" then - current_token.kind := LexerKind._program - elsif token_content = "module" then - current_token.kind := LexerKind._module - elsif token_content = "import" then - current_token.kind := LexerKind._import - else - current_token.kind := LexerKind.identifier; - current_token.value.string := string_dup(token_content) - end; - - return current_token -end; - -proc lexer_add_token(lexer: ^Tokenizer, token: Token); -var - new_length: Word; -begin - new_length := lexer^.length + 1u; - lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token); - (lexer^.data + lexer^.length)^ := token; - lexer^.length := new_length -end; - -(* Read the next token from the input. *) -proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token; -var - current_token: Token; - first_char: Char; -begin - current_token.kind := LexerKind.unknown; - - first_char := source_code_head(source_code); - - if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then - lexer_identifier(@source_code, token_buffer); - current_token := lexer_categorize(string_buffer_clear(token_buffer)) - elsif first_char = '#' then - source_code_advance(@source_code); - lexer_identifier(@source_code, token_buffer); - - current_token.kind := LexerKind.trait; - current_token.value.string := string_dup(string_buffer_clear(token_buffer)) - elsif isdigit(cast(first_char: Int)) <> 0 then - lexer_number(@source_code, @current_token.value.int_value); - - if source_code_expect(@source_code, 'u') then - current_token.kind := LexerKind.word; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.integer - end - elsif first_char = '(' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.left_paren - elsif source_code_head(source_code) = '*' then - source_code_advance(@source_code); - - if lexer_comment(@source_code, token_buffer) then - current_token.value.string := string_dup(string_buffer_clear(token_buffer)); - current_token.kind := LexerKind.comment - else - current_token.kind := LexerKind.unknown - end - else - current_token.kind := LexerKind.left_paren - end - elsif first_char = ')' then - current_token.kind := LexerKind.right_paren; - source_code_advance(@source_code) - elsif first_char = '\'' then - source_code_advance(@source_code); - - if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then - current_token.kind := LexerKind.character; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.unknown - end - elsif first_char = '"' then - source_code_advance(@source_code); - - if lexer_string(@source_code, token_buffer) then - current_token.kind := LexerKind.string; - current_token.value.string := string_dup(string_buffer_clear(token_buffer)) - else - current_token.kind := LexerKind.unknown - end - elsif first_char = '[' then - current_token.kind := LexerKind.left_square; - source_code_advance(@source_code) - elsif first_char = ']' then - current_token.kind := LexerKind.right_square; - source_code_advance(@source_code) - elsif first_char = '>' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.greater_than - elsif source_code_head(source_code) = '=' then - current_token.kind := LexerKind.greater_equal; - source_code_advance(@source_code) - elsif source_code_head(source_code) = '>' then - current_token.kind := LexerKind.shift_right; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.greater_than - end - elsif first_char = '<' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.less_than - elsif source_code_head(source_code) = '=' then - current_token.kind := LexerKind.less_equal; - source_code_advance(@source_code) - elsif source_code_head(source_code) = '<' then - current_token.kind := LexerKind.shift_left; - source_code_advance(@source_code) - elsif source_code_head(source_code) = '>' then - current_token.kind := LexerKind.not_equal; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.less_than - end - elsif first_char = '=' then - current_token.kind := LexerKind.equal; - source_code_advance(@source_code) - elsif first_char = ';' then - current_token.kind := LexerKind.semicolon; - source_code_advance(@source_code) - elsif first_char = '.' then - current_token.kind := LexerKind.dot; - source_code_advance(@source_code) - elsif first_char = ',' then - current_token.kind := LexerKind.comma; - source_code_advance(@source_code) - elsif first_char = '+' then - current_token.kind := LexerKind.plus; - source_code_advance(@source_code) - elsif first_char = '-' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.minus - elsif source_code_head(source_code) = '>' then - current_token.kind := LexerKind.arrow; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.minus - end - elsif first_char = '*' then - current_token.kind := LexerKind.multiplication; - source_code_advance(@source_code) - elsif first_char = '/' then - current_token.kind := LexerKind.division; - source_code_advance(@source_code) - elsif first_char = '%' then - current_token.kind := LexerKind.remainder; - source_code_advance(@source_code) - elsif first_char = ':' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.colon - elsif source_code_head(source_code) = '=' then - current_token.kind := LexerKind.assignment; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.colon - end - elsif first_char = '^' then - current_token.kind := LexerKind.hat; - source_code_advance(@source_code) - elsif first_char = '@' then - current_token.kind := LexerKind.at; - source_code_advance(@source_code) - elsif first_char = '!' then - current_token.kind := LexerKind.exclamation; - source_code_advance(@source_code) - elsif first_char = '&' then - current_token.kind := LexerKind.and; - source_code_advance(@source_code) - elsif first_char = '~' then - current_token.kind := LexerKind.not; - source_code_advance(@source_code) - elsif first_char = '|' then - current_token.kind := LexerKind.pipe; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.unknown; - source_code_advance(@source_code) - end; - - return current_token -end; - -(* Split the source text into tokens. *) -proc lexer_text(source_code: SourceCode) -> Tokenizer; -var - current_token: Token; - token_buffer: StringBuffer; - lexer: Tokenizer; -begin - lexer := Tokenizer(0u, nil); - token_buffer := string_buffer_new(); - - lexer_spaces(@source_code); - - while ~source_code_empty(@source_code) do - current_token := lexer_next(source_code, @token_buffer); - - if current_token.kind <> LexerKind.unknown then - lexer_add_token(@lexer, current_token); - lexer_spaces(@source_code) - else - write_s("Lexical analysis error on \""); - write_c(source_code_head(source_code)); - write_s("\".\n") - end - end; - - return lexer -end; - -(* - Parser. -*) - -proc parse(tokens: ^Token, tokens_size: Word); -var - current_token: ^Token; - i: Word := 0u; -begin - while i < tokens_size do - current_token := tokens + i; - - case current_token^.kind of - LexerKind._if: - write_s("IF") - | LexerKind._then: - write_s("THEN") - | LexerKind._else: - write_s("ELSE") - | LexerKind._elsif: - write_s("ELSIF") - | LexerKind._while: - write_s("WHILE") - | LexerKind._do: - write_s("DO") - | LexerKind._proc: - write_s("PROC") - | LexerKind._begin: - write_s("BEGIN") - | LexerKind._end: - write_s("END") - | LexerKind._extern: - write_s("EXTERN") - | LexerKind._const: - write_s("CONST") - | LexerKind._var: - write_s("VAR") - | LexerKind._case: - write_s("CASE") - | LexerKind._of: - write_s("OF") - | LexerKind._type: - write_s("TYPE") - | LexerKind._record: - write_s("RECORD") - | LexerKind._union: - write_s("UNION") - | LexerKind.pipe: - write_s("|") - | LexerKind.to: - write_s("TO") - | LexerKind.boolean: - write_s("BOOLEAN<"); - write_b(current_token^.value.boolean_value); - write_c('>') - | LexerKind.null: - write_s("NIL") - | LexerKind.and: - write_s("&") - | LexerKind._or: - write_s("OR") - | LexerKind.not: - write_s("~") - | LexerKind._return: - write_s("RETURN") - | LexerKind._cast: - write_s("CAST") - | LexerKind.shift_left: - write_s("<<") - | LexerKind.shift_right: - write_s(">>") - | LexerKind.identifier: - write_c('<'); - write_s(current_token^.value.string); - write_c('>') - | LexerKind.trait: - write_c('#'); - write_s(current_token^.value.string) - | LexerKind.left_paren: - write_s("(") - | LexerKind.right_paren: - write_s(")") - | LexerKind.left_square: - write_s("[") - | LexerKind.right_square: - write_s("]") - | LexerKind.greater_equal: - write_s(">=") - | LexerKind.less_equal: - write_s("<=") - | LexerKind.greater_than: - write_s(">") - | LexerKind.less_than: - write_s("<") - | LexerKind.equal: - write_s("=") - | LexerKind.not_equal: - write_s("<>") - | LexerKind.semicolon: - write_c(';') - | LexerKind.dot: - write_c('.') - | LexerKind.comma: - write_c(',') - | LexerKind.plus: - write_c('+') - | LexerKind.minus: - write_c('-') - | LexerKind.multiplication: - write_c('*') - | LexerKind.division: - write_c('/') - | LexerKind.remainder: - write_c('%') - | LexerKind.assignment: - write_s(":=") - | LexerKind.colon: - write_c(':') - | LexerKind.hat: - write_c('^') - | LexerKind.at: - write_c('@') - | LexerKind.comment: - write_s("(* COMMENT *)") - | LexerKind.integer: - write_c('<'); - write_i(current_token^.value.int_value); - write_c('>') - | LexerKind.word: - write_c('<'); - write_i(current_token^.value.int_value); - write_s("u>") - | LexerKind.character: - write_c('<'); - write_i(cast(current_token^.value.char_value: Int)); - write_s("c>") - | LexerKind.string: - write_s("\"...\"") - | LexerKind._defer: - write_s("DEFER") - | LexerKind.exclamation: - write_c('!') - | LexerKind.arrow: - write_s("->") - | LexerKind._program: - write_s("PROGRAM") - | LexerKind._module: - write_s("MODULE") - | LexerKind._import: - write_s("IMPORT") - else - write_s("UNKNOWN<"); - write_i(cast(current_token^.kind: Int)); - write_c('>') - end; - write_c(' '); - - i := i + 1u - end; - write_c('\n') -end; - -(* - Compilation entry. -*) - -proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; -var - return_code: Int := 0; - lexer: Tokenizer; -begin - if command_line^.lex or command_line^.parse then - lexer := lexer_text(source_code) - end; - if command_line^.parse then - parse(lexer.data, lexer.length) - end; - - return return_code -end; - -proc process(argc: Int, argv: ^^Char) -> Int; -var - tokens: ^Token; - tokens_size: Word; - source_code: SourceCode; - command_line: ^CommandLine; - return_code: Int := 0; - source_file: ^SourceFile; -begin - command_line := parse_command_line(argc, argv); - if command_line = nil then - return_code := 2 - end; - - if return_code = 0 then - source_file := read_source(command_line^.input); - - if source_file = nil then - perror(command_line^.input); - return_code := 3 - end - end; - - if return_code = 0 then - defer - fclose(source_file^.handle) - end; - - source_code.position := TextLocation(1u, 1u); - source_code.input := cast(source_file: Pointer); - source_code.empty := source_file_empty; - source_code.head := source_file_head; - source_code.advance := source_file_advance; - - return_code := compile_in_stages(command_line, source_code) - end; - return return_code -end; - - return process(count, parameters) -end. |
