From 34abb6b4f5c52b44cea11c6a412f799815c488d9 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Thu, 10 Jul 2025 00:43:17 +0200 Subject: [PATCH] Support one hardcoded import --- boot/dependency.cc | 65 ++++++---- boot/parser.yy | 2 +- boot/result.cc | 5 + boot/semantic.cc | 36 +++--- boot/symbol.cc | 61 ++++++++- gcc/elna-generic.cc | 12 +- gcc/elna1.cc | 73 ++++++----- include/elna/boot/dependency.h | 39 ++++-- include/elna/boot/driver.h | 2 +- include/elna/boot/result.h | 2 + include/elna/boot/semantic.h | 6 +- include/elna/boot/symbol.h | 25 +++- include/elna/gcc/elna-generic.h | 4 +- source/Common.def | 12 -- source/Common.elna | 10 ++ source/Lexer.def | 107 ---------------- source/Lexer.elna | 220 ++++++++++++++++++++++---------- source/main.elna | 27 ++-- 18 files changed, 396 insertions(+), 312 deletions(-) delete mode 100644 source/Common.def delete mode 100644 source/Lexer.def diff --git a/boot/dependency.cc b/boot/dependency.cc index 9811422..a598068 100644 --- a/boot/dependency.cc +++ b/boot/dependency.cc @@ -17,46 +17,67 @@ along with GCC; see the file COPYING3. If not see #include "elna/boot/dependency.h" +#include +#include +#include + #include "elna/boot/driver.h" +#include "elna/boot/semantic.h" #include "parser.hh" namespace elna::boot { - dependency_graph::dependency_graph() + dependency::dependency(const char *path) + : error_container(path) { } - dependency_graph::dependency_graph(error_list&& errors) - : m_errors(std::move(errors)) - { - } - - bool dependency_graph::has_errors() const - { - return !errors().empty(); - } - - const error_list& dependency_graph::errors() const - { - return m_errors; - } - - dependency_graph read_sources(std::istream& entry_point, const char *entry_path) + dependency read_sources(std::istream& entry_point, const char *entry_path) { driver parse_driver{ entry_path }; lexer tokenizer(entry_point); yy::parser parser(tokenizer, parse_driver); + dependency outcome{ entry_path }; if (parser()) { - return dependency_graph(std::move(parse_driver.errors())); + std::swap(outcome.errors(), parse_driver.errors()); + return outcome; } else { - dependency_graph outcome; - outcome.modules.emplace_back(std::move(parse_driver.tree)); - - return outcome; + std::swap(outcome.tree, parse_driver.tree); } + declaration_visitor declaration_visitor(entry_path); + outcome.tree->accept(&declaration_visitor); + + if (!declaration_visitor.errors().empty()) + { + std::swap(outcome.errors(), parse_driver.errors()); + } + outcome.unresolved = declaration_visitor.unresolved; + + return outcome; + } + + std::filesystem::path build_path(const std::vector& segments) + { + std::filesystem::path result; + std::vector::const_iterator segment_iterator = std::cbegin(segments); + + if (segment_iterator == std::cend(segments)) + { + return result; + } + result = *segment_iterator; + + ++segment_iterator; + for (; segment_iterator != std::cend(segments); ++segment_iterator) + { + result /= *segment_iterator; + } + result.replace_extension(".elna"); + + return result; } } diff --git a/boot/parser.yy b/boot/parser.yy index ab17bc9..af3f7a3 100644 --- a/boot/parser.yy +++ b/boot/parser.yy @@ -182,7 +182,7 @@ program: } | "module" ";" import_part constant_part type_part variable_part procedure_part "end" "." { - auto tree = new boot::program(boot::make_position(@1)); + auto tree = new boot::unit(boot::make_position(@1)); std::swap(tree->imports, $3); std::swap(tree->constants, $4); diff --git a/boot/result.cc b/boot/result.cc index 41a7d51..fe55410 100644 --- a/boot/result.cc +++ b/boot/result.cc @@ -43,4 +43,9 @@ namespace elna::boot { return m_errors; } + + bool error_container::has_errors() const + { + return !m_errors.empty(); + } } diff --git a/boot/semantic.cc b/boot/semantic.cc index 3ebccde..5489959 100644 --- a/boot/semantic.cc +++ b/boot/semantic.cc @@ -74,9 +74,8 @@ namespace elna::boot return message; } - name_analysis_visitor::name_analysis_visitor(const char *path, std::shared_ptr symbols, - std::unordered_map>&& unresolved) - : error_container(path), symbols(symbols), unresolved(std::move(unresolved)) + name_analysis_visitor::name_analysis_visitor(const char *path, symbol_bag bag) + : error_container(path), bag(bag) { } @@ -120,20 +119,20 @@ namespace elna::boot void name_analysis_visitor::visit(type_declaration *definition) { definition->body().accept(this); - auto unresolved_declaration = this->unresolved.at(definition->identifier.identifier); + auto unresolved_declaration = this->bag.unresolved.at(definition->identifier.identifier); unresolved_declaration->reference = this->current_type; } void name_analysis_visitor::visit(named_type_expression *type_expression) { - auto unresolved_alias = this->unresolved.find(type_expression->name); + auto unresolved_alias = this->bag.unresolved.find(type_expression->name); - if (unresolved_alias != this->unresolved.end()) + if (unresolved_alias != this->bag.unresolved.end()) { this->current_type = type(unresolved_alias->second); } - else if (auto from_symbol_table = this->symbols->lookup(type_expression->name)) + else if (auto from_symbol_table = this->bag.lookup(type_expression->name)) { this->current_type = from_symbol_table->is_type()->symbol; } @@ -214,28 +213,24 @@ namespace elna::boot { declaration->variable_type().accept(this); - this->symbols->enter(declaration->identifier.identifier, - std::make_shared(this->current_type)); + this->bag.enter(declaration->identifier.identifier, std::make_shared(this->current_type)); } void name_analysis_visitor::visit(constant_declaration *definition) { definition->body().accept(this); - this->symbols->enter(definition->identifier.identifier, - std::make_shared(this->current_literal)); + this->bag.enter(definition->identifier.identifier, std::make_shared(this->current_literal)); } void name_analysis_visitor::visit(procedure_declaration *definition) { std::shared_ptr info; + auto heading = build_procedure(definition->heading()); if (definition->body.has_value()) { - info = std::make_shared(build_procedure(definition->heading()), - definition->parameter_names, this->symbols); - - this->symbols = info->symbols; + info = std::make_shared(heading, definition->parameter_names, this->bag.enter()); for (constant_declaration *const constant : definition->body.value().constants()) { @@ -249,14 +244,13 @@ namespace elna::boot { statement->accept(this); } - this->symbols = this->symbols->scope(); + this->bag.leave(); } else { - info = std::make_shared(build_procedure(definition->heading()), - definition->parameter_names); + info = std::make_shared(heading, definition->parameter_names); } - this->symbols->enter(definition->identifier.identifier, info); + this->bag.enter(definition->identifier.identifier, info); } void name_analysis_visitor::visit(assign_statement *statement) @@ -379,14 +373,14 @@ namespace elna::boot { type->accept(this); } - for (auto& unresolved : this->unresolved) + for (auto& unresolved : this->bag.unresolved) { std::vector path; if (check_unresolved_symbol(unresolved.second, path)) { auto info = std::make_shared(type_info(type(unresolved.second))); - this->symbols->enter(std::move(unresolved.first), info); + this->bag.enter(unresolved.first, info); } else { diff --git a/boot/symbol.cc b/boot/symbol.cc index a943774..1651cbb 100644 --- a/boot/symbol.cc +++ b/boot/symbol.cc @@ -313,13 +313,9 @@ namespace elna::boot } procedure_info::procedure_info(const procedure_type symbol, const std::vector names, - std::shared_ptr parent_table) - : symbol(symbol), names(names) + std::shared_ptr scope) + : symbol(symbol), names(names), symbols(scope) { - if (parent_table != nullptr) - { - this->symbols = std::make_shared(parent_table); - } } std::shared_ptr procedure_info::is_procedure() @@ -361,4 +357,57 @@ namespace elna::boot return result; } + + symbol_bag::symbol_bag() + { + this->symbols = std::make_shared(); + } + + symbol_bag::symbol_bag(forward_table&& unresolved, std::shared_ptr symbols) + : symbols(symbols), unresolved(unresolved) + { + } + + std::shared_ptr symbol_bag::lookup(const std::string& name) + { + for (auto import_bag : this->imports) + { + if (auto result = import_bag->lookup(name)) + { + return result; + } + } + return this->symbols->lookup(name); + } + + bool symbol_bag::enter(const std::string& name, std::shared_ptr entry) + { + return this->symbols->enter(name, entry); + } + + std::shared_ptr symbol_bag::enter() + { + this->symbols = std::make_shared(this->symbols); + return this->symbols; + } + + void symbol_bag::enter(std::shared_ptr child) + { + this->symbols = child; + } + + void symbol_bag::leave() + { + this->symbols = this->symbols->scope(); + } + + void symbol_bag::add_import(std::shared_ptr table) + { + this->imports.push_front(table); + } + + void symbol_bag::add_import(const symbol_bag& bag) + { + add_import(bag.symbols); + } } diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc index 5deffde..6626adb 100644 --- a/gcc/elna-generic.cc +++ b/gcc/elna-generic.cc @@ -29,16 +29,14 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "diagnostic.h" #include "realmpfr.h" -#include "stor-layout.h" #include "varasm.h" #include "fold-const.h" #include "langhooks.h" namespace elna::gcc { - generic_visitor::generic_visitor(std::shared_ptr symbol_table, - std::shared_ptr info_table) - : symbols(symbol_table), info_table(info_table) + generic_visitor::generic_visitor(std::shared_ptr symbol_table, elna::boot::symbol_bag bag) + : bag(bag), symbols(symbol_table) { } @@ -321,7 +319,7 @@ namespace elna::gcc DECL_STRUCT_FUNCTION(fndecl)->language = ggc_cleared_alloc(); enter_scope(); - this->info_table = this->info_table->lookup(definition->identifier.identifier)->is_procedure()->symbols; + this->bag.enter(this->bag.lookup(definition->identifier.identifier)->is_procedure()->symbols); tree argument_chain = DECL_ARGUMENTS(fndecl); for (; argument_chain != NULL_TREE; argument_chain = TREE_CHAIN(argument_chain)) @@ -339,7 +337,7 @@ namespace elna::gcc visit_statements(definition->body.value().body()); tree mapping = leave_scope(); - this->info_table = this->info_table->scope(); + this->bag.leave(); BLOCK_SUPERCONTEXT(BIND_EXPR_BLOCK(mapping)) = fndecl; DECL_INITIAL(fndecl) = BIND_EXPR_BLOCK(mapping); @@ -746,7 +744,7 @@ namespace elna::gcc void generic_visitor::visit(boot::variable_declaration *declaration) { this->current_expression = get_inner_alias( - this->info_table->lookup(declaration->identifier.identifier)->is_variable()->symbol, + this->bag.lookup(declaration->identifier.identifier)->is_variable()->symbol, this->symbols); location_t declaration_location = get_location(&declaration->position()); diff --git a/gcc/elna1.cc b/gcc/elna1.cc index ef13c57..c267c32 100644 --- a/gcc/elna1.cc +++ b/gcc/elna1.cc @@ -62,63 +62,62 @@ static bool elna_langhook_init(void) return true; } -static void elna_parse_file(const char *filename) +using dependency_state = elna::boot::dependency_state>; + +static elna::boot::dependency elna_parse_file(dependency_state& state, const char *filename) { - std::ifstream file{ filename, std::ios::in }; + auto module_table = std::make_shared(state.globals); + std::ifstream entry_point{ filename, std::ios::in }; - if (!file) + if (!entry_point) { - fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename); + fatal_error(UNKNOWN_LOCATION, "Cannot open filename %s: %m", filename); } - elna::boot::dependency_graph outcome = elna::boot::read_sources(file, filename); - - std::shared_ptr info_table = elna::boot::builtin_symbol_table(); - std::shared_ptr symbol_table = elna::gcc::builtin_symbol_table(); - linemap_add(line_table, LC_ENTER, 0, filename, 1); + elna::boot::dependency outcome = elna::boot::read_sources(entry_point, filename); + if (outcome.has_errors()) { elna::gcc::report_errors(outcome.errors()); } - else + elna::boot::symbol_bag outcome_bag = elna::boot::symbol_bag{ std::move(outcome.unresolved), module_table }; + + for (const auto& sub_tree : outcome.tree->imports) { - for (const std::unique_ptr& module_tree : outcome.modules) + std::filesystem::path sub_path = "source" / elna::boot::build_path(sub_tree->segments); + + if (state.cache.find(sub_path) == state.cache.end()) { - elna::boot::declaration_visitor declaration_visitor(filename); - declaration_visitor.visit(module_tree.get()); - - if (declaration_visitor.errors().empty()) - { - elna::boot::name_analysis_visitor name_analysis_visitor(filename, info_table, - std::move(declaration_visitor.unresolved)); - name_analysis_visitor.visit(module_tree.get()); - - if (name_analysis_visitor.errors().empty()) - { - elna::gcc::rewrite_symbol_table(info_table, symbol_table); - - elna::gcc::generic_visitor generic_visitor{ symbol_table, info_table }; - generic_visitor.visit(module_tree.get()); - } - else - { - elna::gcc::report_errors(name_analysis_visitor.errors()); - } - } - else - { - elna::gcc::report_errors(declaration_visitor.errors()); - } + elna_parse_file(state, state.allocate_path(sub_path)); } + outcome_bag.add_import(state.cache.find(sub_path)->second); } + elna::boot::name_analysis_visitor name_analysis_visitor(filename, outcome_bag); + outcome.tree->accept(&name_analysis_visitor); + + if (name_analysis_visitor.has_errors()) + { + elna::gcc::report_errors(name_analysis_visitor.errors()); + } + state.cache.insert({ filename, outcome_bag }); + elna::gcc::rewrite_symbol_table(module_table, state.custom); linemap_add(line_table, LC_LEAVE, 0, NULL, 0); + + return outcome; } static void elna_langhook_parse_file(void) { + dependency_state state{ elna::gcc::builtin_symbol_table() }; + for (unsigned int i = 0; i < num_in_fnames; i++) { - elna_parse_file(in_fnames[i]); + elna::boot::dependency outcome = elna_parse_file(state, in_fnames[i]); + + linemap_add(line_table, LC_ENTER, 0, in_fnames[i], 1); + elna::gcc::generic_visitor generic_visitor{ state.custom, state.cache.find(in_fnames[i])->second }; + outcome.tree->accept(&generic_visitor); + linemap_add(line_table, LC_LEAVE, 0, NULL, 0); } } diff --git a/include/elna/boot/dependency.h b/include/elna/boot/dependency.h index 64ed7dd..b2deb86 100644 --- a/include/elna/boot/dependency.h +++ b/include/elna/boot/dependency.h @@ -17,25 +17,48 @@ along with GCC; see the file COPYING3. If not see #pragma once +#include #include #include "elna/boot/result.h" #include "elna/boot/ast.h" namespace elna::boot { - class dependency_graph + class dependency : public error_container { error_list m_errors; public: - std::vector> modules; + std::unique_ptr tree; + std::unordered_map> unresolved; - bool has_errors() const; - const error_list& errors() const; - - dependency_graph(); - explicit dependency_graph(error_list&& errors); + explicit dependency(const char *path); }; - dependency_graph read_sources(std::istream& entry_point, const char *entry_path); + dependency read_sources(std::istream& entry_point, const char *entry_path); + std::filesystem::path build_path(const std::vector& segments); + + template + struct dependency_state + { + const std::shared_ptr globals; + T custom; + std::unordered_map cache; + + explicit dependency_state(T custom) + : globals(elna::boot::builtin_symbol_table()), custom(custom) + { + } + + const char *allocate_path(const std::filesystem::path path) + { + std::size_t current_size = this->allocated_paths.size(); + this->allocated_paths += path.native() + '\0'; + + return this->allocated_paths.data() + current_size; + } + + private: + std::string allocated_paths; + }; } diff --git a/include/elna/boot/driver.h b/include/elna/boot/driver.h index c9706bd..288aa0c 100644 --- a/include/elna/boot/driver.h +++ b/include/elna/boot/driver.h @@ -39,7 +39,7 @@ namespace elna::boot class driver : public error_container { public: - std::unique_ptr tree; + std::unique_ptr tree; driver(const char *input_file); }; diff --git a/include/elna/boot/result.h b/include/elna/boot/result.h index acf4a4f..3b2c26d 100644 --- a/include/elna/boot/result.h +++ b/include/elna/boot/result.h @@ -81,6 +81,8 @@ namespace elna::boot auto new_error = std::make_unique(arguments...); m_errors.emplace_back(std::move(new_error)); } + + bool has_errors() const; }; /** diff --git a/include/elna/boot/semantic.h b/include/elna/boot/semantic.h index d8d87d6..a510c3c 100644 --- a/include/elna/boot/semantic.h +++ b/include/elna/boot/semantic.h @@ -77,8 +77,7 @@ namespace elna::boot type current_type; constant_info::variant current_literal; - std::shared_ptr symbols; - std::unordered_map> unresolved; + symbol_bag bag; procedure_type build_procedure(procedure_type_expression& type_expression); std::vector build_composite_type(const std::vector& fields); @@ -87,8 +86,7 @@ namespace elna::boot std::vector& path); public: - explicit name_analysis_visitor(const char *path, std::shared_ptr symbols, - std::unordered_map>&& unresolved); + name_analysis_visitor(const char *path, symbol_bag bag); void visit(named_type_expression *type_expression) override; void visit(array_type_expression *type_expression) override; diff --git a/include/elna/boot/symbol.h b/include/elna/boot/symbol.h index d28c656..9a7b24c 100644 --- a/include/elna/boot/symbol.h +++ b/include/elna/boot/symbol.h @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #include #include #include +#include #include "elna/boot/result.h" @@ -281,6 +282,7 @@ namespace elna::boot }; using symbol_table = symbol_map, std::nullptr_t, nullptr>; + using forward_table = std::unordered_map>; class type_info : public info { @@ -299,7 +301,7 @@ namespace elna::boot std::shared_ptr symbols; procedure_info(const procedure_type symbol, const std::vector names, - std::shared_ptr parent_table = nullptr); + std::shared_ptr scope = nullptr); std::shared_ptr is_procedure() override; }; @@ -325,4 +327,25 @@ namespace elna::boot }; std::shared_ptr builtin_symbol_table(); + + class symbol_bag + { + std::shared_ptr symbols; + std::forward_list> imports; + + public: + forward_table unresolved; + + symbol_bag(); + symbol_bag(forward_table&& unresolved, std::shared_ptr symbols); + + std::shared_ptr lookup(const std::string& name); + bool enter(const std::string& name, std::shared_ptr entry); + std::shared_ptr enter(); + void enter(std::shared_ptr child); + void leave(); + + void add_import(std::shared_ptr table); + void add_import(const symbol_bag& bag); + }; } diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h index aabfce2..54cad5d 100644 --- a/include/elna/gcc/elna-generic.h +++ b/include/elna/gcc/elna-generic.h @@ -36,8 +36,8 @@ namespace elna::gcc class generic_visitor final : public boot::parser_visitor { tree current_expression{ NULL_TREE }; + elna::boot::symbol_bag bag; std::shared_ptr symbols; - std::shared_ptr info_table; void enter_scope(); tree leave_scope(); @@ -63,7 +63,7 @@ namespace elna::gcc bool assert_constant(location_t expression_location); public: - generic_visitor(std::shared_ptr symbol_table, std::shared_ptr info_table); + generic_visitor(std::shared_ptr symbol_table, elna::boot::symbol_bag bag); void visit(boot::program *program) override; void visit(boot::procedure_declaration *definition) override; diff --git a/source/Common.def b/source/Common.def deleted file mode 100644 index 9520230..0000000 --- a/source/Common.def +++ /dev/null @@ -1,12 +0,0 @@ -DEFINITION MODULE Common; - -TYPE - ShortString = ARRAY[1..256] OF CHAR; - Identifier = ARRAY[1..256] OF CHAR; - PIdentifier = POINTER TO Identifier; - TextLocation = RECORD - line: CARDINAL; - column: CARDINAL - END; - -END Common. diff --git a/source/Common.elna b/source/Common.elna index fa158b7..a3ab8cd 100644 --- a/source/Common.elna +++ b/source/Common.elna @@ -1,3 +1,13 @@ module; +type + ShortString = [256]Char; + Identifier = [256]Char; + PIdentifier = ^Identifier; + TextLocation* = record + line: Word; + column: Word + end; + FILE* = record end; + end. diff --git a/source/Lexer.def b/source/Lexer.def deleted file mode 100644 index 883c604..0000000 --- a/source/Lexer.def +++ /dev/null @@ -1,107 +0,0 @@ -DEFINITION MODULE Lexer; - -FROM FIO IMPORT File; - -FROM Common IMPORT Identifier, ShortString, TextLocation; - -TYPE - PLexerBuffer = POINTER TO CHAR; - BufferPosition = RECORD - iterator: PLexerBuffer; - location: TextLocation - END; - PBufferPosition = POINTER TO BufferPosition; - Lexer = RECORD - input: File; - buffer: PLexerBuffer; - size: CARDINAL; - length: CARDINAL; - start: BufferPosition; - current: BufferPosition - END; - PLexer = POINTER TO Lexer; - LexerKind = ( - lexerKindEof, - lexerKindIdentifier, - lexerKindIf, - lexerKindThen, - lexerKindElse, - lexerKindElsif, - lexerKindWhile, - lexerKindDo, - lexerKindProc, - lexerKindBegin, - lexerKindEnd, - lexerKindXor, - lexerKindConst, - lexerKindVar, - lexerKindCase, - lexerKindOf, - lexerKindType, - lexerKindRecord, - lexerKindUnion, - lexerKindPipe, - lexerKindTo, - lexerKindBoolean, - lexerKindNull, - lexerKindAnd, - lexerKindOr, - lexerKindTilde, - lexerKindReturn, - lexerKindDefer, - lexerKindRange, - lexerKindLeftParen, - lexerKindRightParen, - lexerKindLeftSquare, - lexerKindRightSquare, - lexerKindGreaterEqual, - lexerKindLessEqual, - lexerKindGreaterThan, - lexerKindLessThan, - lexerKindNotEqual, - lexerKindEqual, - lexerKindSemicolon, - lexerKindDot, - lexerKindComma, - lexerKindPlus, - lexerKindMinus, - lexerKindAsterisk, - lexerKindDivision, - lexerKindRemainder, - lexerKindAssignment, - lexerKindColon, - lexerKindHat, - lexerKindAt, - lexerKindComment, - lexerKindInteger, - lexerKindWord, - lexerKindCharacter, - lexerKindString, - lexerKindFrom, - lexerKindPointer, - lexerKindArray, - lexerKindArrow, - lexerKindProgram, - lexerKindModule, - lexerKindImport - ); - LexerToken = RECORD - CASE kind: LexerKind OF - lexerKindBoolean: booleanKind: BOOLEAN | - lexerKindIdentifier: identifierKind: Identifier | - lexerKindInteger: integerKind: INTEGER | - lexerKindString: stringKind: ShortString - END; - start_location: TextLocation; - end_location: TextLocation - END; - PLexerToken = POINTER TO LexerToken; - -PROCEDURE lexer_initialize(lexer: PLexer; input: File); -PROCEDURE lexer_destroy(lexer: PLexer); -(* Returns the last read token. *) -PROCEDURE lexer_current(lexer: PLexer): LexerToken; -(* Read and return the next token. *) -PROCEDURE lexer_lex(lexer: PLexer): LexerToken; - -END Lexer. diff --git a/source/Lexer.elna b/source/Lexer.elna index 8566d4c..11df504 100644 --- a/source/Lexer.elna +++ b/source/Lexer.elna @@ -1,17 +1,9 @@ module; -from FIO import ReadNBytes; -from SYSTEM import ADR, TSIZE; - -from DynamicStrings import String, InitStringCharStar, KillString; -from StringConvert import StringToInteger; -from Storage import DEALLOCATE, ALLOCATE; -from Strings import Length; -from MemUtils import MemCopy, MemZero; -from StrCase import Lower; +import Common; const - CHUNK_SIZE = 85536; + CHUNK_SIZE := 85536; type (* @@ -62,20 +54,109 @@ type transitionStateDecimalSuffix, transitionStateEnd ); - TransitionAction = proc(PLexer, PLexerToken); + LexerToken = record + kind: LexerKind; + value: union + booleanKind: Bool; + identifierKind: Identifier; + integerKind: Int; + stringKind: ShortString + end; + start_location: TextLocation; + end_location: TextLocation + end; + TransitionAction = proc(^Lexer, ^LexerToken); Transition = record action: TransitionAction; next_state: TransitionState end; TransitionClasses = [22]Transition; + BufferPosition* = record + iterator: ^Char; + location: TextLocation + end; + Lexer* = record + input: ^FILE; + buffer: ^Char; + size: Word; + length: Word; + start: BufferPosition; + current: BufferPosition + end; + LexerKind* = ( + lexerKindEof, + lexerKindIdentifier, + lexerKindIf, + lexerKindThen, + lexerKindElse, + lexerKindElsif, + lexerKindWhile, + lexerKindDo, + lexerKindProc, + lexerKindBegin, + lexerKindEnd, + lexerKindXor, + lexerKindConst, + lexerKindVar, + lexerKindCase, + lexerKindOf, + lexerKindType, + lexerKindRecord, + lexerKindUnion, + lexerKindPipe, + lexerKindTo, + lexerKindBoolean, + lexerKindNull, + lexerKindAnd, + lexerKindOr, + lexerKindTilde, + lexerKindReturn, + lexerKindDefer, + lexerKindRange, + lexerKindLeftParen, + lexerKindRightParen, + lexerKindLeftSquare, + lexerKindRightSquare, + lexerKindGreaterEqual, + lexerKindLessEqual, + lexerKindGreaterThan, + lexerKindLessThan, + lexerKindNotEqual, + lexerKindEqual, + lexerKindSemicolon, + lexerKindDot, + lexerKindComma, + lexerKindPlus, + lexerKindMinus, + lexerKindAsterisk, + lexerKindDivision, + lexerKindRemainder, + lexerKindAssignment, + lexerKindColon, + lexerKindHat, + lexerKindAt, + lexerKindComment, + lexerKindInteger, + lexerKindWord, + lexerKindCharacter, + lexerKindString, + lexerKindFrom, + lexerKindPointer, + lexerKindArray, + lexerKindArrow, + lexerKindProgram, + lexerKindModule, + lexerKindImport + ); + var classification: [128]TransitionClass; transitions: [16]TransitionClasses; proc initialize_classification(); var - i: CARDINAL; + i: Word; begin classification[1] := transitionClassEof; (* NUL *) classification[2] := transitionClassInvalid; (* SOH *) @@ -213,12 +294,12 @@ begin end end; -proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN; +proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; var - result: BOOLEAN; - index: CARDINAL; - keyword_length: CARDINAL; - continue: BOOLEAN; + result: Bool; + index: Word; + keyword_length: Word; + continue: Bool; begin index := 0; result := true; @@ -237,25 +318,25 @@ begin end; (* Reached the end of file. *) -proc transition_action_eof(lexer: PLexer, token: PLexerToken); +proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); begin token^.kind := lexerKindEof end; -proc increment(position: PBufferPosition); +proc increment(position: ^BufferPosition); begin INC(position^.iterator) end; (* Add the character to the token currently read and advance to the next character. *) -proc transition_action_accumulate(lexer: PLexer, token: PLexerToken); +proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); begin increment(ADR(lexer^.current)) end; (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) -proc transition_action_finalize(lexer: PLexer, token: PLexerToken); +proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); begin if lexer^.start.iterator^ = ':' then token^.kind := lexerKindColon @@ -278,7 +359,7 @@ begin end; (* An action for tokens containing multiple characters. *) -proc transition_action_composite(lexer: PLexer, token: PLexerToken); +proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); begin if lexer^.start.iterator^ = '<' then if lexer^.current.iterator^ = '>' then @@ -304,7 +385,7 @@ begin end; (* Skip a space. *) -proc transition_action_skip(lexer: PLexer, token: PLexerToken); +proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); begin increment(ADR(lexer^.start)); @@ -316,9 +397,9 @@ begin end; (* Delimited string action. *) -proc transition_action_delimited(lexer: PLexer, token: PLexerToken); +proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); var - text_length: CARDINAL; + text_length: Word; begin if lexer^.start.iterator^ = '(' then token^.kind := lexerKindComment @@ -347,7 +428,7 @@ begin end; (* Finalize keyword or identifier. *) -proc transition_action_key_id(lexer: PLexer, token: PLexerToken); +proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); begin token^.kind := lexerKindIdentifier; @@ -355,95 +436,95 @@ begin DEC(token^.identifierKind[1], lexer^.start.iterator); MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); - if compare_keyword('program', lexer^.start, lexer^.current.iterator) then + if compare_keyword("program", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindProgram end; - if compare_keyword('import', lexer^.start, lexer^.current.iterator) then + if compare_keyword("import", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindImport end; - if compare_keyword('const', lexer^.start, lexer^.current.iterator) then + if compare_keyword("const", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindConst end; - if compare_keyword('var', lexer^.start, lexer^.current.iterator) then + if compare_keyword("var", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindVar end; - if compare_keyword('if', lexer^.start, lexer^.current.iterator) then + if compare_keyword("if", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindIf end; - if compare_keyword('then', lexer^.start, lexer^.current.iterator) then + if compare_keyword("then", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindThen end; - if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then + if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindElsif end; - if compare_keyword('else', lexer^.start, lexer^.current.iterator) then + if compare_keyword("else", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindElse end; - if compare_keyword('while', lexer^.start, lexer^.current.iterator) then + if compare_keyword("while", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindWhile end; - if compare_keyword('do', lexer^.start, lexer^.current.iterator) then + if compare_keyword("do", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindDo end; - if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then + if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindProc end; - if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then + if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindBegin end; - if compare_keyword('end', lexer^.start, lexer^.current.iterator) then + if compare_keyword("end", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindEnd end; - if compare_keyword('type', lexer^.start, lexer^.current.iterator) then + if compare_keyword("type", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindType end; - if compare_keyword('record', lexer^.start, lexer^.current.iterator) then + if compare_keyword("record", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindRecord end; - if compare_keyword('union', lexer^.start, lexer^.current.iterator) then + if compare_keyword("union", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindUnion end; - if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then + if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindNull end; - if compare_keyword('or', lexer^.start, lexer^.current.iterator) then + if compare_keyword("or", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindOr end; - if compare_keyword('return', lexer^.start, lexer^.current.iterator) then + if compare_keyword("return", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindReturn end; - if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then + if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindDefer end; - if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then + if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindTo end; - if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then + if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindCase end; - if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then + if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindOf end; - if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then + if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindFrom end; - if compare_keyword('module', lexer^.start, lexer^.current.iterator) then + if compare_keyword("module", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindModule end; - if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then + if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindXor end; - if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then + if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindPointer end; - if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then + if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindArray end; - if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then + if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindBoolean; token^.booleanKind := true end; - if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then + if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then token^.kind := lexerKindBoolean; token^.booleanKind := false end @@ -451,7 +532,7 @@ end; (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) -proc transition_action_single(lexer: PLexer, token: PLexerToken); +proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); begin if lexer^.current.iterator^ = '&' then token^.kind := lexerKindAnd @@ -502,11 +583,11 @@ begin end; (* Handle an integer literal. *) -proc transition_action_integer(lexer: PLexer, token: PLexerToken); +proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); var buffer: String; - integer_length: CARDINAL; - found: BOOLEAN; + integer_length: Word; + found: Bool; begin token^.kind := lexerKindInteger; @@ -805,7 +886,7 @@ begin transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd end; -proc lexer_initialize(lexer: PLexer, input: File); +proc lexer_make*(lexer: ^Lexer, input: ^FILE); begin lexer^.input := input; lexer^.length := 0; @@ -815,19 +896,20 @@ begin lexer^.size := CHUNK_SIZE end; -proc lexer_current(lexer: PLexer) -> LexerToken; +(* Returns the last read token. *) +proc lexer_current*(lexer: ^Lexer) -> LexerToken; var current_class: TransitionClass; current_state: TransitionState; current_transition: Transition; result: LexerToken; - index1: CARDINAL; - index2: CARDINAL; + index1: Word; + index2: Word; begin lexer^.current := lexer^.start; current_state := transitionStateStart; - while current_state <> transitionStateEnd DO + while current_state <> transitionStateEnd do index1 := ORD(lexer^.current.iterator^); INC(index1); current_class := classification[index1]; @@ -849,7 +931,8 @@ begin return result end; -proc lexer_lex(lexer: PLexer) -> LexerToken; +(* Read and return the next token. *) +proc lexer_lex*(lexer: ^Lexer) -> LexerToken; var result: LexerToken; begin @@ -865,12 +948,15 @@ begin return result end; -proc lexer_destroy(lexer: PLexer); +proc lexer_destroy*(lexer: ^Lexer); begin DEALLOCATE(lexer^.buffer, lexer^.size) end; +proc lexer_initialize(); begin initialize_classification(); initialize_transitions() +end; + end. diff --git a/source/main.elna b/source/main.elna index 946e32e..f6c7ce7 100644 --- a/source/main.elna +++ b/source/main.elna @@ -3,7 +3,7 @@ obtain one at https://mozilla.org/MPL/2.0/. *) program; -import dummy; +import Common, Lexer; const SEEK_SET* := 0; @@ -80,13 +80,9 @@ type _module, _import ); - Position* = record - line: Word; - column: Word - end; Location* = record - first: Position; - last: Position + first: TextLocation; + last: TextLocation end; SourceFile* = record buffer: [1024]Char; @@ -94,14 +90,13 @@ type size: Word; index: Word end; - FILE* = record end; StringBuffer* = record data: Pointer; size: Word; capacity: Word end; SourceCode = record - position: Position; + position: TextLocation; input: Pointer; empty: proc(Pointer) -> Bool; @@ -123,7 +118,7 @@ type lex: Bool; parse: Bool end; - Lexer* = record + Tokenizer* = record length: Word; data: ^Token end; @@ -592,7 +587,7 @@ begin return current_token end; -proc lexer_add_token(lexer: ^Lexer, token: Token); +proc lexer_add_token(lexer: ^Tokenizer, token: Token); var new_length: Word; begin @@ -778,13 +773,13 @@ begin end; (* Split the source text into tokens. *) -proc lexer_text(source_code: SourceCode) -> Lexer; +proc lexer_text(source_code: SourceCode) -> Tokenizer; var current_token: Token; token_buffer: StringBuffer; - lexer: Lexer; + lexer: Tokenizer; begin - lexer := Lexer(0u, nil); + lexer := Tokenizer(0u, nil); token_buffer := string_buffer_new(); lexer_spaces(@source_code); @@ -1024,7 +1019,7 @@ end; proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; var return_code: Int; - lexer: Lexer; + lexer: Tokenizer; begin return_code := 0; @@ -1068,7 +1063,7 @@ begin fclose(source_file^.handle) end; - source_code.position := Position(1u, 1u); + source_code.position := TextLocation(1u, 1u); source_code.input := cast(source_file: Pointer); source_code.empty := source_file_empty; source_code.head := source_file_head;