From 8e89d33c214e3dad03a86057c2c45a0dc7608ad7 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Mon, 2 Feb 2026 12:10:40 +0100 Subject: [PATCH] Split up the rakefile --- Rakefile | 68 --- boot/stage19/cl.elna | 73 +-- frontend/dependency.cc | 102 ---- frontend/semantic.cc | 644 ------------------- include/elna/frontend/dependency.h | 55 -- include/elna/frontend/semantic.h | 190 ------ rakelib/ninja.rake | 75 +++ source/Transpiler.elna | 631 ------------------- source/lexer.elna | 952 ----------------------------- 9 files changed, 92 insertions(+), 2698 deletions(-) delete mode 100644 frontend/dependency.cc delete mode 100644 frontend/semantic.cc delete mode 100644 include/elna/frontend/dependency.h delete mode 100644 include/elna/frontend/semantic.h create mode 100644 rakelib/ninja.rake delete mode 100644 source/Transpiler.elna delete mode 100644 source/lexer.elna diff --git a/Rakefile b/Rakefile index 46703ca..d12b9a7 100644 --- a/Rakefile +++ b/Rakefile @@ -13,7 +13,6 @@ STAGES = Dir.glob('boot/stage*') .sort { |a, b| a.delete_prefix('stage').to_i <=> b.delete_prefix('stage').to_i } .drop(1) # First assembly stage does not count. -CLEAN.include 'build/boot', 'build/valid' CLEAN.include 'doc/*.pdf' CLOBBER.include 'build' @@ -51,73 +50,6 @@ task :convert do end end -file "build/valid/#{STAGES.last}/cl" => 'build/build.ninja' do |t| - sh 'ninja', '-f', t.prerequisites.first -end - -file 'build/build.ninja' => ['build'] do |t| - File.open t.name, 'w' do |f| - f << <<~NINJA - builddir = build - cflags = -fpie -g - - rule cc - command = gcc $cflags -nostdlib -o $out $in - - rule as - command = gcc $cflags -c -o $out $in - - rule link1 - command = ld -o $out $in - - rule link2 - command = ld -o $out --dynamic-linker /lib32/ld-linux-riscv32-ilp32d.so.1 /usr/lib/crt1.o /usr/lib/crti.o -lc $in /usr/lib/crtn.o - - rule bootstrap - command = $bootstrap < \$in > \$out - NINJA - f << <<~NINJA - - build build/boot/stage1/cl: cc boot/stage1.s - build build/valid/stage1/cl.s: bootstrap boot/stage1.s | build/boot/stage1/cl - bootstrap = build/boot/stage1/cl - - build build/valid/stage1/cl.o: as build/valid/stage1/cl.s - build build/valid/stage1/cl: link1 build/valid/stage1/cl.o - NINJA - STAGES.each do |stage| - stage_number = stage.delete_prefix('stage').to_i - - arguments_path = Pathname.new('boot') + stage + 'linker.arg' - if arguments_path.exist? - link = 'link2' - else - link = 'link1' - end - boot_stage = "build/boot/stage#{stage_number}" - valid_stage = "build/valid/stage#{stage_number}" - f << <<~NINJA - - build #{boot_stage}/cl.s: bootstrap boot/stage#{stage_number}/cl.elna | build/valid/stage#{stage_number.pred}/cl - bootstrap = build/valid/stage#{stage_number.pred}/cl - - build #{boot_stage}/cl.o: as #{boot_stage}/cl.s - build #{boot_stage}/cl: #{link} #{boot_stage}/cl.o - - build #{valid_stage}/cl.s: bootstrap boot/stage#{stage_number}/cl.elna | #{boot_stage}/cl - bootstrap = build/boot/stage#{stage_number}/cl - - build #{valid_stage}/cl.o: as #{valid_stage}/cl.s - build #{valid_stage}/cl: #{link} #{valid_stage}/cl.o - NINJA - end - f << <<~NINJA - - default build/valid/#{STAGES.last}/cl - NINJA - end -end - rule '.pdf' => '.adoc' do |t| Asciidoctor.convert_file t.source, backend: 'pdf', safe: :safe end diff --git a/boot/stage19/cl.elna b/boot/stage19/cl.elna index 49fbbed..f721aef 100644 --- a/boot/stage19/cl.elna +++ b/boot/stage19/cl.elna @@ -316,14 +316,13 @@ type ElnaLexerAction = (none, accumulate, skip, single, eof, finalize, composite, key_id, integer, delimited); + ElnaLexerTransition = record + action: ElnaLexerAction; + next_state: Word + end; (** * Classification table assigns each possible character to a group (class). All * characters of the same group a handled equivalently. - * - * Transition = record - * action: TransitionAction; - * next_state: TransitionState - * end; *) ElnaLexerClass = ( invalid, @@ -586,7 +585,7 @@ var * Each transition table entry is 8 bytes long. The table has 19 rows (transition states) * and 23 columns (character classes), so 3496 = 8 * 19 * 23. *) - transition_table: [874]Word; + transition_table: [19][23]ElnaLexerTransition; lexer_state: ElnaLexerCursor; source_code: Word; @@ -4745,20 +4744,15 @@ end; proc _elna_lexer_get_transition(current_state: Word, character_class: Word); var - row_position: Word; column_position: Word; target: Word; begin (* Each state is 8 bytes long (2 words: action and next state). There are 23 character classes, so a transition row 8 * 23 = 184 bytes long. *) - row_position := current_state - 1; - row_position := row_position * 184; - column_position := character_class - 1; column_position := column_position * 8; - target := @transition_table; - target := target + row_position; + target := @transition_table[current_state]; return target + column_position end; @@ -4772,12 +4766,12 @@ end; *) proc _elna_lexer_set_transition(current_state: Word, character_class: Word, action: Word, next_state: Word); var - transition: Word; + transition: ^ElnaLexerTransition; begin transition := _elna_lexer_get_transition(current_state, character_class); - _elna_lexer_transition_set_action(transition, action); - _elna_lexer_transition_set_state(transition, next_state) + transition^.action := action; + transition^.next_state := next_state end; (* Sets same action and state transition for all character classes in one transition row. *) @@ -4950,37 +4944,6 @@ begin _elna_lexer_set_transition(ElnaLexerState.trait, ElnaLexerClass.x, ElnaLexerAction.accumulate, ElnaLexerState.trait) end; -proc _elna_lexer_transition_get_action(this: Word); - return this^ -end; - -proc _elna_lexer_transition_set_action(this: Word, value: Word); -begin - this^ := value -end; - -proc _elna_lexer_transition_get_state(this: Word); -begin - this := this + 4; - return this^ -end; - -proc _elna_lexer_transition_set_state(this: Word, value: Word); -begin - this := this + 4; - this^ := value -end; - -(** - * Resets the lexer state for reading the next token. - *) -proc _elna_lexer_reset(); -begin - (* Transition start state is 1. *) - lexer_state.state := ElnaLexerState.start; - lexer_state.finish := lexer_state.start -end; - (** * One time lexer initialization. *) @@ -5228,21 +5191,16 @@ end; proc _elna_lexer_execute_transition(kind: Word); var - next_transition: Word; - next_state: Word; + next_transition: ^ElnaLexerTransition; global_state: Word; - action_to_perform: Word; begin next_transition := _elna_lexer_next_transition(); - next_state := _elna_lexer_transition_get_state(next_transition); - action_to_perform := _elna_lexer_transition_get_action(next_transition); - global_state := @lexer_state; - global_state^ := next_state; - _elna_lexer_execute_action(action_to_perform, kind); + global_state^ := next_transition^.next_state; + _elna_lexer_execute_action(next_transition^.action, kind); - return next_state + return next_transition^.next_state end; proc _elna_lexer_advance_token(kind: Word); @@ -5257,10 +5215,13 @@ end; (** * Reads the next token and writes its type into the address in the kind parameter. + * Resets the lexer state for reading the next token. *) proc _elna_lexer_read_token(kind: Word); begin - _elna_lexer_reset(); + lexer_state.state := ElnaLexerState.start; + lexer_state.finish := lexer_state.start; + _elna_lexer_advance_token(kind) end; diff --git a/frontend/dependency.cc b/frontend/dependency.cc deleted file mode 100644 index 25658f8..0000000 --- a/frontend/dependency.cc +++ /dev/null @@ -1,102 +0,0 @@ -/* Dependency graph analysis. - Copyright (C) 2025 Free Software Foundation, Inc. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#include "elna/frontend/dependency.h" - -#include -#include -#include - -#include "elna/frontend/driver.h" -#include "elna/frontend/semantic.h" -#include "parser.hh" - -namespace elna::frontend -{ - dependency::dependency(const char *path) - : error_container(path) - { - } - - dependency read_source(std::istream& entry_point, const char *entry_path) - { - driver parse_driver{ entry_path }; - lexer tokenizer(entry_point); - yy::parser parser(tokenizer, parse_driver); - - dependency outcome{ entry_path }; - if (parser()) - { - std::swap(outcome.errors(), parse_driver.errors()); - return outcome; - } - else - { - std::swap(outcome.tree, parse_driver.tree); - } - declaration_visitor declaration_visitor(entry_path); - outcome.tree->accept(&declaration_visitor); - - if (!declaration_visitor.errors().empty()) - { - std::swap(outcome.errors(), declaration_visitor.errors()); - } - outcome.unresolved = declaration_visitor.unresolved; - - return outcome; - } - - error_list analyze_semantics(const char *path, std::unique_ptr& tree, symbol_bag bag) - { - name_analysis_visitor name_analyser(path, bag); - tree->accept(&name_analyser); - - if (name_analyser.has_errors()) - { - return std::move(name_analyser.errors()); - } - type_analysis_visitor type_analyzer(path, bag); - tree->accept(&type_analyzer); - - if (type_analyzer.has_errors()) - { - return std::move(type_analyzer.errors()); - } - return error_list{}; - } - - std::filesystem::path build_path(const std::vector& segments) - { - std::filesystem::path result; - std::vector::const_iterator segment_iterator = std::cbegin(segments); - - if (segment_iterator == std::cend(segments)) - { - return result; - } - result = *segment_iterator; - - ++segment_iterator; - for (; segment_iterator != std::cend(segments); ++segment_iterator) - { - result /= *segment_iterator; - } - result.replace_extension(".elna"); - - return result; - } -} diff --git a/frontend/semantic.cc b/frontend/semantic.cc deleted file mode 100644 index 36c75b8..0000000 --- a/frontend/semantic.cc +++ /dev/null @@ -1,644 +0,0 @@ -/* Name analysis. - Copyright (C) 2025 Free Software Foundation, Inc. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#include "elna/frontend/semantic.h" - -#include -#include - -namespace elna::frontend -{ - undeclared_error::undeclared_error(const std::string& identifier, const char *path, const struct position position) - : error(path, position), identifier(identifier) - { - } - - std::string undeclared_error::what() const - { - return "Type '" + identifier + "' not declared"; - } - - already_declared_error::already_declared_error(const std::string& identifier, - const char *path, const struct position position) - : error(path, position), identifier(identifier) - { - } - - std::string already_declared_error::what() const - { - return "Symbol '" + identifier + "' has been already declared"; - } - - field_duplication_error::field_duplication_error(const std::string& field_name, - const char *path, const struct position position) - : error(path, position), field_name(field_name) - { - } - - std::string field_duplication_error::what() const - { - return "Repeated field name '" + field_name + "'"; - } - - cyclic_declaration_error::cyclic_declaration_error(const std::vector& cycle, - const char *path, const struct position position) - : error(path, position), cycle(cycle) - { - } - - std::string cyclic_declaration_error::what() const - { - auto segment = std::cbegin(this->cycle); - std::string message = "Type declaration forms a cycle: " + *segment; - - ++segment; - for (; segment != std::cend(this->cycle); ++segment) - { - message += " -> " + *segment; - } - return message; - } - - return_error::return_error(const std::string& identifier, const char *path, const struct position position) - : error(path, position), identifier(identifier) - { - } - - std::string return_error::what() const - { - return "Procedure '" + identifier + "' is expected to return, but does not have a return statement"; - } - - variable_initializer_error::variable_initializer_error(const char *path, const struct position position) - : error(path, position) - { - } - - std::string variable_initializer_error::what() const - { - return "Only one variable can be initialized"; - } - - type_analysis_visitor::type_analysis_visitor(const char *path, symbol_bag bag) - : error_container(path), bag(bag) - { - } - - void type_analysis_visitor::visit(program *program) - { - visit(static_cast(program)); - } - - void type_analysis_visitor::visit(procedure_declaration *definition) - { - if (definition->body.has_value() && definition->heading().return_type.proper_type != nullptr) - { - for (statement *const statement : definition->body.value().body()) - { - statement->accept(this); - } - if (!this->returns) - { - add_error(definition->identifier.name, this->input_file, definition->position()); - } - } - } - - void type_analysis_visitor::visit(assign_statement *) - { - } - - void type_analysis_visitor::visit(if_statement *) - { - } - - void type_analysis_visitor::visit(while_statement *) - { - } - - void type_analysis_visitor::visit(return_statement *) - { - this->returns = true; - } - - void type_analysis_visitor::visit(defer_statement *) - { - } - - void type_analysis_visitor::visit(case_statement *) - { - } - - void type_analysis_visitor::visit(procedure_call *) - { - } - - bool type_analysis_visitor::check_unresolved_symbol(std::shared_ptr alias, - std::vector& alias_path) - { - if (std::find(std::cbegin(alias_path), std::cend(alias_path), alias->name) != std::cend(alias_path)) - { - return false; - } - alias_path.push_back(alias->name); - - if (auto another_alias = alias->reference.get()) - { - return check_unresolved_symbol(another_alias, alias_path); - } - return true; - } - - void type_analysis_visitor::visit(unit *unit) - { - for (type_declaration *const type : unit->types) - { - type->accept(this); - } - for (procedure_declaration *const procedure : unit->procedures) - { - this->returns = false; - procedure->accept(this); - } - } - - void type_analysis_visitor::visit(type_declaration *definition) - { - std::vector alias_path; - auto unresolved_type = this->bag.lookup(definition->identifier.name)->is_type()->symbol.get(); - - if (!check_unresolved_symbol(unresolved_type, alias_path)) - { - add_error(alias_path, this->input_file, definition->position()); - } - } - - name_analysis_visitor::name_analysis_visitor(const char *path, symbol_bag bag) - : error_container(path), bag(bag) - { - } - - procedure_type name_analysis_visitor::build_procedure(procedure_type_expression& type_expression) - { - procedure_type::return_t result_return; - - if (type_expression.return_type.no_return) - { - result_return = procedure_type::return_t(std::monostate{}); - } - else if (type_expression.return_type.proper_type != nullptr) - { - type_expression.return_type.proper_type->accept(this); - result_return = procedure_type::return_t(this->current_type); - } - else - { - result_return = procedure_type::return_t(); - } - procedure_type result_type = procedure_type(result_return); - - for (struct type_expression *parameter : type_expression.parameters) - { - parameter->accept(this); - result_type.parameters.push_back(this->current_type); - } - return result_type; - } - - void name_analysis_visitor::visit(program *program) - { - visit(static_cast(program)); - - for (statement *const statement : program->body) - { - statement->accept(this); - } - } - - void name_analysis_visitor::visit(type_declaration *definition) - { - definition->body().accept(this); - auto resolved = this->bag.resolve(definition->identifier.name, this->current_type); - auto info = std::make_shared(type(resolved)); - - info->exported = definition->identifier.exported; - this->bag.enter(definition->identifier.name, info); - } - - void name_analysis_visitor::visit(named_type_expression *type_expression) - { - auto unresolved_alias = this->bag.declared(type_expression->name); - - if (unresolved_alias != nullptr) - { - this->current_type = type(unresolved_alias); - } - else if (auto from_symbol_table = this->bag.lookup(type_expression->name)) - { - this->current_type = from_symbol_table->is_type()->symbol; - } - else - { - add_error(type_expression->name, this->input_file, type_expression->position()); - this->current_type = type(); - } - } - - void name_analysis_visitor::visit(pointer_type_expression *type_expression) - { - type_expression->base().accept(this); - this->current_type = type(std::make_shared(this->current_type)); - } - - void name_analysis_visitor::visit(array_type_expression *type_expression) - { - type_expression->base().accept(this); - this->current_type = type(std::make_shared(this->current_type, type_expression->size)); - } - - std::vector name_analysis_visitor::build_composite_type(const std::vector& fields) - { - std::vector result; - std::set field_names; - - for (auto& field : fields) - { - if (field_names.find(field.first) != field_names.cend()) - { - add_error(field.first, this->input_file, field.second->position()); - } - else - { - field_names.insert(field.first); - field.second->accept(this); - result.push_back(std::make_pair(field.first, this->current_type)); - } - } - return result; - } - - void name_analysis_visitor::visit(record_type_expression *type_expression) - { - auto result_type = std::make_shared(); - - result_type->fields = build_composite_type(type_expression->fields); - - this->current_type = type(result_type); - } - - void name_analysis_visitor::visit(union_type_expression *type_expression) - { - auto result_type = std::make_shared(); - - result_type->fields = build_composite_type(type_expression->fields); - - this->current_type = type(result_type); - } - - void name_analysis_visitor::visit(procedure_type_expression *type_expression) - { - std::shared_ptr result_type = - std::make_shared(std::move(build_procedure(*type_expression))); - - this->current_type = type(result_type); - } - - void name_analysis_visitor::visit(enumeration_type_expression *type_expression) - { - std::shared_ptr result_type = std::make_shared(type_expression->members); - - this->current_type = type(result_type); - } - - void name_analysis_visitor::visit(variable_declaration *declaration) - { - declaration->variable_type().accept(this); - - for (const auto& variable_identifier : declaration->identifiers) - { - auto variable_symbol = std::make_shared(this->current_type, declaration->is_extern); - - variable_symbol->exported = variable_identifier.exported; - if (!this->bag.enter(variable_identifier.name, variable_symbol)) - { - add_error(variable_identifier.name, this->input_file, - declaration->position()); - } - } - } - - void name_analysis_visitor::visit(constant_declaration *definition) - { - definition->body().accept(this); - auto constant_symbol = std::make_shared(this->current_literal); - - constant_symbol->exported = definition->identifier.exported; - this->bag.enter(definition->identifier.name, constant_symbol); - } - - void name_analysis_visitor::visit(procedure_declaration *definition) - { - std::shared_ptr info; - auto heading = build_procedure(definition->heading()); - - if (definition->body.has_value()) - { - info = std::make_shared(heading, definition->parameter_names, this->bag.enter()); - - for (constant_declaration *const constant : definition->body.value().constants()) - { - constant->accept(this); - } - for (variable_declaration *const variable : definition->body.value().variables()) - { - variable->accept(this); - } - for (statement *const statement : definition->body.value().body()) - { - statement->accept(this); - } - this->bag.leave(); - } - else - { - info = std::make_shared(heading, definition->parameter_names); - } - info->exported = definition->identifier.exported; - this->bag.enter(definition->identifier.name, info); - } - - void name_analysis_visitor::visit(assign_statement *statement) - { - statement->lvalue().accept(this); - statement->rvalue().accept(this); - } - - void name_analysis_visitor::visit(if_statement *statement) - { - statement->body().prerequisite().accept(this); - for (struct statement *const statement : statement->body().statements) - { - statement->accept(this); - } - for (const auto branch : statement->branches) - { - branch->prerequisite().accept(this); - - for (struct statement *const statement : branch->statements) - { - statement->accept(this); - } - } - if (statement->alternative != nullptr) - { - for (struct statement *const statement : *statement->alternative) - { - statement->accept(this); - } - } - } - - void name_analysis_visitor::visit(import_declaration *) - { - } - - void name_analysis_visitor::visit(while_statement *statement) - { - statement->body().prerequisite().accept(this); - for (struct statement *const statement : statement->body().statements) - { - statement->accept(this); - } - for (const auto branch : statement->branches) - { - branch->prerequisite().accept(this); - - for (struct statement *const statement : branch->statements) - { - statement->accept(this); - } - } - } - - void name_analysis_visitor::visit(return_statement *statement) - { - statement->return_expression().accept(this); - } - - void name_analysis_visitor::visit(defer_statement *statement) - { - for (struct statement *const statement : statement->statements) - { - statement->accept(this); - } - } - - void name_analysis_visitor::visit(case_statement *statement) - { - statement->condition().accept(this); - for (const switch_case& case_block : statement->cases) - { - for (expression *const case_label : case_block.labels) - { - case_label->accept(this); - } - for (struct statement *const statement : case_block.statements) - { - statement->accept(this); - } - } - if (statement->alternative != nullptr) - { - for (struct statement *const statement : *statement->alternative) - { - statement->accept(this); - } - } - } - - void name_analysis_visitor::visit(procedure_call *call) - { - call->callable().accept(this); - for (expression *const argument: call->arguments) - { - argument->accept(this); - } - } - - void name_analysis_visitor::visit(unit *unit) - { - for (type_declaration *const type : unit->types) - { - type->accept(this); - } - for (variable_declaration *const variable : unit->variables) - { - variable->accept(this); - } - for (procedure_declaration *const procedure : unit->procedures) - { - procedure->accept(this); - } - } - - void name_analysis_visitor::visit(traits_expression *trait) - { - if (!trait->parameters.empty()) - { - trait->parameters.front()->accept(this); - trait->types.push_back(this->current_type); - } - } - - void name_analysis_visitor::visit(cast_expression *expression) - { - expression->value().accept(this); - expression->target().accept(this); - expression->expression_type = this->current_type; - } - - void name_analysis_visitor::visit(binary_expression *expression) - { - expression->lhs().accept(this); - expression->rhs().accept(this); - } - - void name_analysis_visitor::visit(unary_expression *expression) - { - expression->operand().accept(this); - } - - void name_analysis_visitor::visit(variable_expression *) - { - } - - void name_analysis_visitor::visit(array_access_expression *expression) - { - expression->base().accept(this); - expression->index().accept(this); - } - - void name_analysis_visitor::visit(field_access_expression *expression) - { - expression->base().accept(this); - } - - void name_analysis_visitor::visit(dereference_expression *expression) - { - expression->base().accept(this); - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - void name_analysis_visitor::visit(literal *literal) - { - this->current_literal = literal->value; - } - - declaration_visitor::declaration_visitor(const char *path) - : error_container(path) - { - } - - void declaration_visitor::visit(program *program) - { - visit(static_cast(program)); - } - - void declaration_visitor::visit(import_declaration *) - { - } - - void declaration_visitor::visit(unit *unit) - { - for (import_declaration *const _import : unit->imports) - { - _import->accept(this); - } - for (type_declaration *const type : unit->types) - { - type->accept(this); - } - for (variable_declaration *const variable : unit->variables) - { - variable->accept(this); - } - for (procedure_declaration *const procedure : unit->procedures) - { - procedure->accept(this); - } - } - - void declaration_visitor::visit(type_declaration *definition) - { - const std::string& type_identifier = definition->identifier.name; - - if (!this->unresolved.insert({ type_identifier, std::make_shared(type_identifier) }).second) - { - add_error(definition->identifier.name, this->input_file, - definition->position()); - } - } - - void declaration_visitor::visit(variable_declaration *declaration) - { - if (declaration->has_initializer() && declaration->identifiers.size() > 1) - { - add_error(this->input_file, declaration->position()); - } - } - - void declaration_visitor::visit(procedure_declaration *definition) - { - if (!definition->body.has_value()) - { - return; - } - for (variable_declaration *const variable : definition->body.value().variables()) - { - variable->accept(this); - } - } -} diff --git a/include/elna/frontend/dependency.h b/include/elna/frontend/dependency.h deleted file mode 100644 index f1502d1..0000000 --- a/include/elna/frontend/dependency.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Dependency graph analysis. - Copyright (C) 2025 Free Software Foundation, Inc. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#pragma once - -#include -#include -#include "elna/frontend/result.h" -#include "elna/frontend/ast.h" -#include "elna/frontend/symbol.h" - -namespace elna::frontend -{ - class dependency : public error_container - { - error_list m_errors; - - public: - std::unique_ptr tree; - forward_table unresolved; - - explicit dependency(const char *path); - }; - - dependency read_source(std::istream& entry_point, const char *entry_path); - std::filesystem::path build_path(const std::vector& segments); - error_list analyze_semantics(const char *path, std::unique_ptr& tree, symbol_bag bag); - - template - struct dependency_state - { - const std::shared_ptr globals; - T custom; - std::unordered_map cache; - - explicit dependency_state(T custom) - : globals(builtin_symbol_table()), custom(custom) - { - } - }; -} diff --git a/include/elna/frontend/semantic.h b/include/elna/frontend/semantic.h deleted file mode 100644 index 8a295e4..0000000 --- a/include/elna/frontend/semantic.h +++ /dev/null @@ -1,190 +0,0 @@ -/* Name analysis. - Copyright (C) 2025 Free Software Foundation, Inc. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#pragma once - -#include -#include -#include -#include - -#include "elna/frontend/ast.h" -#include "elna/frontend/result.h" -#include "elna/frontend/symbol.h" - -namespace elna::frontend -{ - class undeclared_error : public error - { - const std::string identifier; - - public: - undeclared_error(const std::string& identifier, const char *path, const struct position position); - - std::string what() const override; - }; - - class already_declared_error : public error - { - const std::string identifier; - - public: - already_declared_error(const std::string& identifier, const char *path, const struct position position); - - std::string what() const override; - }; - - class field_duplication_error : public error - { - const std::string field_name; - - public: - field_duplication_error(const std::string& field_name, const char *path, const struct position position); - - std::string what() const override; - }; - - class cyclic_declaration_error : public error - { - const std::vector cycle; - - public: - cyclic_declaration_error(const std::vector& cycle, - const char *path, const struct position position); - - std::string what() const override; - }; - - class return_error : public error - { - const std::string identifier; - - public: - return_error(const std::string& identifier, const char *path, const struct position position); - - std::string what() const override; - }; - - class variable_initializer_error : public error - { - public: - variable_initializer_error(const char *path, const struct position position); - - std::string what() const override; - }; - - /** - * Checks types. - */ - class type_analysis_visitor final : public empty_visitor, public error_container - { - bool returns; - symbol_bag bag; - - bool check_unresolved_symbol(std::shared_ptr alias, - std::vector& path); - - public: - explicit type_analysis_visitor(const char *path, symbol_bag bag); - - void visit(program *program) override; - - void visit(procedure_declaration *definition) override; - void visit(assign_statement *) override; - void visit(if_statement *) override; - void visit(while_statement *) override; - void visit(return_statement *) override; - void visit(defer_statement *) override; - void visit(case_statement *) override; - void visit(procedure_call *) override; - void visit(unit *unit) override; - void visit(type_declaration *definition) override; - }; - - /** - * Performs name analysis. - */ - class name_analysis_visitor final : public parser_visitor, public error_container - { - type current_type; - constant_info::variant current_literal; - - symbol_bag bag; - - procedure_type build_procedure(procedure_type_expression& type_expression); - std::vector build_composite_type(const std::vector& fields); - - public: - name_analysis_visitor(const char *path, symbol_bag bag); - - void visit(named_type_expression *type_expression) override; - void visit(array_type_expression *type_expression) override; - void visit(pointer_type_expression *type_expression) override; - void visit(program *program) override; - void visit(type_declaration *definition) override; - void visit(record_type_expression *type_expression) override; - void visit(union_type_expression *type_expression) override; - void visit(procedure_type_expression *type_expression) override; - void visit(enumeration_type_expression *type_expression) override; - - void visit(variable_declaration *declaration) override; - void visit(constant_declaration *definition) override; - void visit(procedure_declaration *definition) override; - void visit(assign_statement *statement) override; - void visit(if_statement *statement) override; - void visit(import_declaration *) override; - void visit(while_statement *statement) override; - void visit(return_statement *statement) override; - void visit(defer_statement *statement) override; - void visit(case_statement *statement) override; - void visit(procedure_call *call) override; - void visit(unit *unit) override; - void visit(cast_expression *expression) override; - void visit(traits_expression *trait) override; - void visit(binary_expression *expression) override; - void visit(unary_expression *expression) override; - void visit(variable_expression *) override; - void visit(array_access_expression *expression) override; - void visit(field_access_expression *expression) override; - void visit(dereference_expression *expression) override; - void visit(literal *literal) override; - void visit(literal *literal) override; - void visit(literal *literal) override; - void visit(literal *literal) override; - void visit(literal *literal) override; - void visit(literal *literal) override; - void visit(literal *literal) override; - }; - - /** - * Collects global declarations without resolving any symbols. - */ - class declaration_visitor final : public empty_visitor, public error_container - { - public: - forward_table unresolved; - - explicit declaration_visitor(const char *path); - - void visit(program *program) override; - void visit(import_declaration *) override; - void visit(unit *unit) override; - void visit(type_declaration *definition) override; - void visit(variable_declaration *declaration) override; - void visit(procedure_declaration *definition) override; - }; -} diff --git a/rakelib/ninja.rake b/rakelib/ninja.rake new file mode 100644 index 0000000..702f9b5 --- /dev/null +++ b/rakelib/ninja.rake @@ -0,0 +1,75 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. +# frozen_string_literal: true + +require 'rake/clean' + +CLEAN.include 'build/boot', 'build/valid' + +file 'build/build.ninja' => ['build'] do |t| + File.open t.name, 'w' do |f| + f << <<~NINJA + builddir = build + cflags = -fpie -g + + rule cc + command = gcc $cflags -nostdlib -o $out $in + + rule as + command = gcc $cflags -c -o $out $in + + rule link1 + command = ld -o $out $in + + rule link2 + command = ld -o $out --dynamic-linker /lib32/ld-linux-riscv32-ilp32d.so.1 /usr/lib/crt1.o /usr/lib/crti.o -lc $in /usr/lib/crtn.o + + rule bootstrap + command = $bootstrap < \$in > \$out + NINJA + f << <<~NINJA + + build build/boot/stage1/cl: cc boot/stage1.s + build build/valid/stage1/cl.s: bootstrap boot/stage1.s | build/boot/stage1/cl + bootstrap = build/boot/stage1/cl + + build build/valid/stage1/cl.o: as build/valid/stage1/cl.s + build build/valid/stage1/cl: link1 build/valid/stage1/cl.o + NINJA + STAGES.each do |stage| + stage_number = stage.delete_prefix('stage').to_i + + arguments_path = Pathname.new('boot') + stage + 'linker.arg' + if arguments_path.exist? + link = 'link2' + else + link = 'link1' + end + boot_stage = "build/boot/stage#{stage_number}" + valid_stage = "build/valid/stage#{stage_number}" + f << <<~NINJA + + build #{boot_stage}/cl.s: bootstrap boot/stage#{stage_number}/cl.elna | build/valid/stage#{stage_number.pred}/cl + bootstrap = build/valid/stage#{stage_number.pred}/cl + + build #{boot_stage}/cl.o: as #{boot_stage}/cl.s + build #{boot_stage}/cl: #{link} #{boot_stage}/cl.o + + build #{valid_stage}/cl.s: bootstrap boot/stage#{stage_number}/cl.elna | #{boot_stage}/cl + bootstrap = build/boot/stage#{stage_number}/cl + + build #{valid_stage}/cl.o: as #{valid_stage}/cl.s + build #{valid_stage}/cl: #{link} #{valid_stage}/cl.o + NINJA + end + f << <<~NINJA + + default build/valid/#{STAGES.last}/cl + NINJA + end +end + +file "build/valid/#{STAGES.last}/cl" => 'build/build.ninja' do |t| + sh 'ninja', '-f', t.prerequisites.first +end diff --git a/source/Transpiler.elna b/source/Transpiler.elna deleted file mode 100644 index 5a65036..0000000 --- a/source/Transpiler.elna +++ /dev/null @@ -1,631 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -from FIO import File, WriteNBytes, WriteLine, WriteChar, WriteString; -from NumberIO import IntToStr; - -import common, Parser; - -type - TranspilerContext* = record - input_name: String; - output: File; - definition: File; - indentation: Word - end; - -proc indent(context: ^TranspilerContext); -var - count: Word; -begin - count := 0; - - while count < context^.indentation do - WriteString(context^.output, " "); - count := count + 1u - end -end; - -(* Write a semicolon followed by a newline. *) -proc write_semicolon(output: File); -begin - WriteChar(output, ';'); - WriteLine(output) -end; - -proc transpile_import_statement(context: ^TranspilerContext, import_statement: ^AstImportStatement); -var - current_symbol: ^Identifier; -begin - WriteString(context^.output, "FROM "); - transpile_identifier(context, import_statement^.package); - - WriteString(context^.output, " IMPORT "); - - current_symbol := import_statement^.symbols; - transpile_identifier(context, current_symbol^); - current_symbol := current_symbol + 1; - - while current_symbol^[1] <> '\0' do - WriteString(context^.output, ", "); - transpile_identifier(context, current_symbol^); - current_symbol := current_symbol + 1; - end; - write_semicolon(context^.output) -end; - -proc transpile_import_part(context: ^TranspilerContext, imports: ^^AstImportStatement); -var - import_statement: ^AstImportStatement; -begin - while imports^ <> nil do - transpile_import_statement(context, imports^); - imports := imports + 1 - end; - WriteLine(context^.output) -end; - -proc transpile_constant_declaration(context: ^TranspilerContext, declaration: ^AstConstantDeclaration); -var - buffer: [20]Char; -begin - WriteString(context^.output, " "); - transpile_identifier(context, declaration^.constant_name); - - WriteString(context^.output, " = "); - - IntToStr(declaration^.constant_value, 0, buffer); - WriteString(context^.output, buffer); - - write_semicolon(context^.output) -end; - -proc transpile_constant_part(context: ^TranspilerContext, declarations: ^^AstConstantDeclaration, extra_newline: Bool); -var - current_declaration: ^^AstConstantDeclaration; -begin - if declarations^ <> nil then - WriteString(context^.output, "CONST"); - WriteLine(context^.output); - - current_declaration := declarations; - while current_declaration^ <> nil do - transpile_constant_declaration(context, current_declaration^); - - current_declaration := current_declaration + 1 - end; - if extra_newline then - WriteLine(context^.output) - end - end -end; - -proc transpile_module(context: ^TranspilerContext, result: ^AstModule); -begin - if result^.main = false then - WriteString(context^.output, "IMPLEMENTATION ") - end; - WriteString(context^.output, "MODULE "); - - (* Write the module name and end the line with a semicolon and newline. *) - transpile_module_name(context); - - write_semicolon(context^.output); - WriteLine(context^.output); - - (* Write the module body. *) - - transpile_import_part(context, result^.imports); - transpile_constant_part(context, result^.constants, true); - transpile_type_part(context, result^.types); - transpile_variable_part(context, result^.variables, true); - transpile_procedure_part(context, result^.procedures); - transpile_statement_part(context, result^.statements); - - WriteString(context^.output, "END "); - transpile_module_name(context); - - WriteChar(context^.output, "."); - WriteLine(context^.output) -end; - -proc transpile_type_fields(context: ^TranspilerContext, fields: ^AstFieldDeclaration); -var - current_field: ^AstFieldDeclaration; -begin - current_field := fields; - - while current_field^.field_name[1] <> '\0' do - WriteString(context^.output, " "); - transpile_identifier(context, current_field^.field_name); - - WriteString(context^.output, ": "); - transpile_type_expression(context, current_field^.field_type); - - current_field := current_field + 1; - - if current_field^.field_name[1] <> '\0' then - WriteChar(context^.output, ';') - end; - WriteLine(context^.output) - end -end; - -proc transpile_record_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); -begin - WriteString(context^.output, "RECORD"); - WriteLine(context^.output); - transpile_type_fields(context, type_expression^.fields); - WriteString(context^.output, " END") -end; - -proc transpile_pointer_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); -begin - WriteString(context^.output, "POINTER TO "); - - transpile_type_expression(context, type_expression^.target) -end; - -proc transpile_array_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); -var - buffer: [20]Char; -begin - WriteString(context^.output, "ARRAY"); - - if type_expression^.length <> 0 then - WriteString(context^.output, "[1.."); - - IntToStr(type_expression^.length, 0, buffer); - WriteString(context^.output, buffer); - - WriteChar(context^.output, ']') - end; - WriteString(context^.output, " OF "); - - transpile_type_expression(context, type_expression^.base) -end; - -proc transpile_enumeration_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); -var - current_case: ^Identifier; -begin - current_case := type_expression^.cases; - - WriteString(context^.output, "("); - WriteLine(context^.output); - WriteString(context^.output, " "); - transpile_identifier(context, current_case^); - current_case := current_case + 1; - - while current_case^[1] <> '\0' do - WriteChar(context^.output, ','); - WriteLine(context^.output); - WriteString(context^.output, " "); - transpile_identifier(context, current_case^); - - current_case := current_case + 1 - end; - WriteLine(context^.output); - WriteString(context^.output, " )") -end; - -proc transpile_identifier(context: ^TranspilerContext, identifier: Identifier); -var - written_bytes: Word; -begin - written_bytes := WriteNBytes(context^.output, cast(identifier[1]: Word), @identifier[2]) -end; - -proc transpile_procedure_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); -var - result: ^AstTypeExpression; - current_parameter: ^^AstTypeExpression; - parameter_count: Word; -begin - WriteString(context^.output, "PROCEDURE("); - current_parameter := type_expression^.parameters; - - while current_parameter^ <> nil do - transpile_type_expression(context, current_parameter^); - - current_parameter := current_parameter + 1; - - if current_parameter^ <> nil then - WriteString(context^.output, ", ") - end - end; - WriteChar(context^.output, ')') -end; - -proc transpile_type_expression(context: ^TranspilerContext, type_expression: ^AstTypeExpression); -begin - if type_expression^.kind = astTypeExpressionKindRecord then - transpile_record_type(context, type_expression) - end; - if type_expression^.kind = astTypeExpressionKindEnumeration then - transpile_enumeration_type(context, type_expression) - end; - if type_expression^.kind = astTypeExpressionKindArray then - transpile_array_type(context, type_expression) - end; - if type_expression^.kind = astTypeExpressionKindPointer then - transpile_pointer_type(context, type_expression) - end; - if type_expression^.kind = astTypeExpressionKindProcedure then - transpile_procedure_type(context, type_expression) - end; - if type_expression^.kind = astTypeExpressionKindNamed then - transpile_identifier(context, type_expression^.name) - end -end; - -proc transpile_type_declaration(context: ^TranspilerContext, declaration: ^AstTypedDeclaration); -var - written_bytes: Word; -begin - WriteString(context^.output, " "); - - transpile_identifier(context^.output, declaration^.identifier); - WriteString(context^.output, " = "); - - transpile_type_expression(context, declaration^.type_expression); - write_semicolon(context^.output) -end; - -proc transpile_type_part(context: ^TranspilerContext, declarations: ^^AstTypedDeclaration); -var - current_declaration: ^^AstTypedDeclaration; -begin - if declarations^ <> nil then - WriteString(context^.output, "TYPE"); - WriteLine(context^.output); - - current_declaration := declarations; - while current_declaration^ <> nil do - transpile_type_declaration(context, current_declaration^); - - current_declaration := current_declaration + 1 - end; - WriteLine(context^.output) - end -end; - -proc transpile_variable_declaration(context: ^TranspilerContext, declaration: ^AstVariableDeclaration); -begin - WriteString(context^.output, " "); - transpile_identifier(context, declaration^.variable_name); - - WriteString(context^.output, ": "); - - transpile_type_expression(context, declaration^.variable_type); - write_semicolon(context^.output) -end; - -proc transpile_variable_part(context: ^TranspilerContext, declarations: ^^AstVariableDeclaration, extra_newline: Bool); -var - current_declaration: ^^AstVariableDeclaration; -begin - if declarations^ <> nil then - WriteString(context^.output, "VAR"); - WriteLine(context^.output); - - current_declaration := declarations; - while current_declaration^ <> nil do - transpile_variable_declaration(context, current_declaration^); - - current_declaration := current_declaration + 1 - end; - if extra_newline then - WriteLine(context^.output) - end - end -end; - -proc transpile_procedure_heading(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration); -var - parameter_index: Word; - current_parameter: ^AstTypedDeclaration; -begin - WriteString(context^.output, "PROCEDURE "); - transpile_identifier(context, declaration^.name); - WriteChar(context^.output, '('); - - parameter_index := 0; - current_parameter := declaration^.parameters; - - while parameter_index < declaration^.parameter_count do - transpile_identifier(context, current_parameter^.identifier); - WriteString(context^.output, ": "); - transpile_type_expression(context, current_parameter^.type_expression); - - parameter_index := parameter_index + 1u; - current_parameter := current_parameter + 1; - - if parameter_index <> declaration^.parameter_count then - WriteString(context^.output, "; ") - end - end; - - WriteString(context^.output, ")"); - - (* Check for the return type and write it. *) - if declaration^.return_type <> nil then - WriteString(context^.output, ": "); - transpile_type_expression(context, declaration^.return_type) - end; - write_semicolon(context^.output) -end; - -proc transpile_unary_operator(context: ^TranspilerContext, operator: AstUnaryOperator); -begin - if operator = AstUnaryOperator.minus then - WriteChar(context^.output, '-') - end; - if operator = AstUnaryOperator.not then - WriteChar(context^.output, '~') - end -end; - -proc transpile_binary_operator(context: ^TranspilerContext, operator: AstBinaryOperator); -begin - case operator of - AstBinaryOperator.sum: WriteChar(context^.output, '+') - | AstBinaryOperator.subtraction: WriteChar(context^.output, '-') - | AstBinaryOperator.multiplication: WriteChar(context^.output, '*') - | AstBinaryOperator.equals: WriteChar(context^.output, '=') - | AstBinaryOperator.not_equals: WriteChar(context^.output, '#') - | AstBinaryOperator.less: WriteChar(context^.output, '<') - | AstBinaryOperator.greater: WriteChar(context^.output, '>') - | AstBinaryOperator.less_equal: WriteString(context^.output, "<=") - | AstBinaryOperator.greater_equal: WriteString(context^.output, ">=") - | AstBinaryOperator.disjunction: WriteString(context^.output, "OR") - | AstBinaryOperatorConjunction: WriteString(context^.output, "AND") - end -end; - -proc transpile_expression(context: ^TranspilerContext, expression: ^AstExpression); -var - literal: ^AstLiteral; - buffer: [20]Char; - argument_index: Word; - current_argument: ^^AstExpression; -begin - if expression^.kind = astExpressionKindLiteral then - literal := expression^.literal; - - if literal^.kind = AstLiteralKind.integer then - IntToStr(literal^.integer, 0, buffer); - WriteString(context^.output, buffer) - end; - if literal^.kind = AstLiteralKind.string then - WriteString(context^.output, literal^.string) - end; - if literal^.kind = AstLiteralKind.null then - WriteString(context^.output, "NIL") - end; - if (literal^.kind = AstLiteralKind.boolean) & literal^.boolean then - WriteString(context^.output, "TRUE") - end; - if (literal^.kind = AstLiteralKind.boolean) & (literal^.boolean = false) then - WriteString(context^.output, "FALSE") - end - end; - if expression^.kind = astExpressionKindIdentifier then - transpile_identifier(context, expression^.identifier) - end; - if expression^.kind = astExpressionKindDereference then - transpile_expression(context, expression^.reference); - WriteChar(context^.output, '^') - end; - if expression^.kind = astExpressionKindArrayAccess then - transpile_expression(context, expression^.array); - WriteChar(context^.output, '['); - transpile_expression(context, expression^.index); - WriteChar(context^.output, ']') - end; - if expression^.kind = astExpressionKindFieldAccess then - transpile_expression(context, expression^.aggregate); - WriteChar(context^.output, '.'); - transpile_identifier(contextexpression^.field) - end; - if expression^.kind = astExpressionKindUnary then - transpile_unary_operator(context, expression^.unary_operator); - transpile_expression(context, expression^.unary_operand) - end; - if expression^.kind = astExpressionKindBinary then - WriteChar(context^.output, '('); - transpile_expression(context, expression^.lhs); - WriteChar(context^.output, ' '); - transpile_binary_operator(context, expression^.binary_operator); - WriteChar(context^.output, ' '); - transpile_expression(context, expression^.rhs); - WriteChar(context^.output, ')') - end; - if expression^.kind = astExpressionKindCall then - transpile_expression(context, expression^.callable); - WriteChar(context^.output, '('); - - current_argument := expression^.arguments; - if expression^.argument_count > 0 then - transpile_expression(context, current_argument^); - - argument_index := 1u; - current_argument := current_argument + 1; - - while argument_index < expression^.argument_count do - WriteString(context^.output, ", "); - - transpile_expression(context, current_argument^); - - current_argument := current_argument + 1; - argument_index := argument_index + 1u - end - end; - WriteChar(context^.output, ')') - end -end; - -proc transpile_if_statement(context: ^TranspilerContext, statement: ^AstStatement); -begin - WriteString(context^.output, "IF "); - transpile_expression(context, statement^.if_condition); - - WriteString(context^.output, " THEN"); - WriteLine(context^.output); - context^.indentation := context^.indentation + 1u; - - transpile_compound_statement(context, statement^.if_branch); - context^.indentation := context^.indentation - 1u; - indent(context); - WriteString(context^.output, "END") -end; - -proc transpile_while_statement(context: ^TranspilerContext, statement: ^AstStatement); -begin - WriteString(context^.output, "WHILE "); - transpile_expression(context, statement^.while_condition); - - WriteString(context^.output, " DO"); - WriteLine(context^.output); - context^.indentation := context^.indentation + 1u; - - transpile_compound_statement(context, statement^.while_body); - context^.indentation := context^.indentation - 1u; - indent(context); - WriteString(context^.output, "END") -end; - -proc transpile_assignment_statement(context: ^TranspilerContext, statement: ^AstStatement); -begin - transpile_expression(context, statement^.assignee); - WriteString(context^.output, " := "); - transpile_expression(context, statement^.assignment) -end; - -proc transpile_return_statement(context: ^TranspilerContext, statement: ^AstStatement); -begin - WriteString(context^.output, "RETURN "); - - transpile_expression(context, statement^.returned) -end; - -proc transpile_compound_statement(context: ^TranspilerContext, statement: AstCompoundStatement); -var - current_statement: ^^AstStatement; - index: Word; -begin - index := 0; - current_statement := statement.statements; - - while index < statement.count do - transpile_statement(context, current_statement^); - - current_statement := current_statement + 1; - index := index + 1u; - - if index <> statement.count then - WriteChar(context^.output, ';') - end; - WriteLine(context^.output) - end -end; - -proc transpile_statement(context: ^TranspilerContext, statement: ^AstStatement); -begin - indent(context); - - if statement^.kind = astStatementKindIf then - transpile_if_statement(context, statement) - end; - if statement^.kind = astStatementKindWhile then - transpile_while_statement(context, statement) - end; - if statement^.kind = astStatementKindReturn then - transpile_return_statement(context, statement) - end; - if statement^.kind = astStatementKindAssignment then - transpile_assignment_statement(context, statement) - end; - if statement^.kind = astStatementKindCall then - transpile_expression(context, statement^.call) - end -end; - -proc transpile_statement_part(context: ^TranspilerContext, compound: AstCompoundStatement); -begin - if compound.count > 0 then - WriteString(context^.output, "BEGIN"); - WriteLine(context^.output); - - context^.indentation := context^.indentation + 1u; - transpile_compound_statement(context, compound); - context^.indentation := context^.indentation - 1u; - end -end; - -proc transpile_procedure_declaration(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration); -begin - transpile_procedure_heading(context, declaration); - - transpile_constant_part(context, declaration^.constants, false); - transpile_variable_part(context, declaration^.variables, false); - transpile_statement_part(context, declaration^.statements); - - WriteString(context^.output, "END "); - transpile_identifier(context^.output, declaration^.name); - - write_semicolon(context^.output) -end; - -proc transpile_procedure_part(context: ^TranspilerContext, declaration: ^^AstProcedureDeclaration); -begin - while declaration^ <> nil do - transpile_procedure_declaration(context, declaration^); - WriteLine(context^.output); - - declaration := declaration + 1 - end -end; - -proc transpile_module_name(context: ^TranspilerContext); -var - counter: Word; - last_slash: Word; -begin - counter := 1u; - last_slash := 0u; - - while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do - if context^.input_name[counter] = '/' then - last_slash := counter - end; - counter := counter + 1u - end; - - if last_slash = 0u then - counter := 1u - end; - if last_slash <> 0u then - counter := last_slash + 1u - end; - while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do - WriteChar(context^.output, context^.input_name[counter]); - counter := counter + 1u - end -end; - -proc transpile*(ast_module: ^AstModule, output: File, definition: File, input_name: String); -var - context: TranspilerContext; -begin - context.input_name := input_name; - context.output := output; - context.definition := definition; - context.indentation := 0u; - - transpile_module(@context, ast_module) -end; - -end. diff --git a/source/lexer.elna b/source/lexer.elna deleted file mode 100644 index d5f529b..0000000 --- a/source/lexer.elna +++ /dev/null @@ -1,952 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -import cstdio, cstring, cctype, cstdlib, common; - -const - CHUNK_SIZE := 85536u; - -type - (* - * Classification table assigns each possible character to a group (class). All - * characters of the same group are handled equivalently. - * - * Classification: - *) - TransitionClass = ( - invalid, - digit, - alpha, - space, - colon, - equals, - left_paren, - right_paren, - asterisk, - underscore, - single, - hex, - zero, - x, - eof, - dot, - minus, - single_quote, - double_quote, - greater, - less, - other - ); - TransitionState = ( - start, - colon, - identifier, - decimal, - greater, - minus, - left_paren, - less, - dot, - comment, - closing_comment, - character, - string, - leading_zero, - decimal_suffix, - finish - ); - LexerToken = record - kind: LexerKind; - value: union - booleanKind: Bool; - identifierKind: Identifier; - integerKind: Int; - stringKind: String - end; - start_location: TextLocation; - end_location: TextLocation - end; - TransitionAction = proc(^Lexer, ^LexerToken); - Transition = record - action: TransitionAction; - next_state: TransitionState - end; - TransitionClasses = [22]Transition; - - BufferPosition* = record - iterator: ^Char; - location: TextLocation - end; - Lexer* = record - input: ^FILE; - buffer: ^Char; - size: Word; - length: Word; - start: BufferPosition; - current: BufferPosition - end; - LexerKind* = ( - unknown, - identifier, - _if, - _then, - _else, - _elsif, - _while, - _do, - _proc, - _begin, - _end, - _extern, - _const, - _var, - _case, - _of, - _type, - _record, - _union, - pipe, - to, - boolean, - null, - and, - _or, - _xor, - not, - _return, - _cast, - shift_left, - shift_right, - left_paren, - right_paren, - left_square, - right_square, - greater_equal, - less_equal, - greater_than, - less_than, - not_equal, - equal, - semicolon, - dot, - comma, - plus, - minus, - multiplication, - division, - remainder, - assignment, - colon, - hat, - at, - comment, - integer, - word, - character, - string, - _defer, - exclamation, - arrow, - trait, - _program, - _module, - _import - ); - -var - classification: [128]TransitionClass; - transitions: [16]TransitionClasses; - -proc initialize_classification(); -var - i: Word; -begin - classification[1] := TransitionClass.eof; (* NUL *) - classification[2] := TransitionClass.invalid; (* SOH *) - classification[3] := TransitionClass.invalid; (* STX *) - classification[4] := TransitionClass.invalid; (* ETX *) - classification[5] := TransitionClass.invalid; (* EOT *) - classification[6] := TransitionClass.invalid; (* EMQ *) - classification[7] := TransitionClass.invalid; (* ACK *) - classification[8] := TransitionClass.invalid; (* BEL *) - classification[9] := TransitionClass.invalid; (* BS *) - classification[10] := TransitionClass.space; (* HT *) - classification[11] := TransitionClass.space; (* LF *) - classification[12] := TransitionClass.invalid; (* VT *) - classification[13] := TransitionClass.invalid; (* FF *) - classification[14] := TransitionClass.space; (* CR *) - classification[15] := TransitionClass.invalid; (* SO *) - classification[16] := TransitionClass.invalid; (* SI *) - classification[17] := TransitionClass.invalid; (* DLE *) - classification[18] := TransitionClass.invalid; (* DC1 *) - classification[19] := TransitionClass.invalid; (* DC2 *) - classification[20] := TransitionClass.invalid; (* DC3 *) - classification[21] := TransitionClass.invalid; (* DC4 *) - classification[22] := TransitionClass.invalid; (* NAK *) - classification[23] := TransitionClass.invalid; (* SYN *) - classification[24] := TransitionClass.invalid; (* ETB *) - classification[25] := TransitionClass.invalid; (* CAN *) - classification[26] := TransitionClass.invalid; (* EM *) - classification[27] := TransitionClass.invalid; (* SUB *) - classification[28] := TransitionClass.invalid; (* ESC *) - classification[29] := TransitionClass.invalid; (* FS *) - classification[30] := TransitionClass.invalid; (* GS *) - classification[31] := TransitionClass.invalid; (* RS *) - classification[32] := TransitionClass.invalid; (* US *) - classification[33] := TransitionClass.space; (* Space *) - classification[34] := TransitionClass.single; (* ! *) - classification[35] := TransitionClass.double_quote; (* " *) - classification[36] := TransitionClass.other; (* # *) - classification[37] := TransitionClass.other; (* $ *) - classification[38] := TransitionClass.single; (* % *) - classification[39] := TransitionClass.single; (* & *) - classification[40] := TransitionClass.single_quote; (* ' *) - classification[41] := TransitionClass.left_paren; (* ( *) - classification[42] := TransitionClass.right_paren; (* ) *) - classification[43] := TransitionClass.asterisk; (* * *) - classification[44] := TransitionClass.single; (* + *) - classification[45] := TransitionClass.single; (* , *) - classification[46] := TransitionClass.minus; (* - *) - classification[47] := TransitionClass.dot; (* . *) - classification[48] := TransitionClass.single; (* / *) - classification[49] := TransitionClass.zero; (* 0 *) - classification[50] := TransitionClass.digit; (* 1 *) - classification[51] := TransitionClass.digit; (* 2 *) - classification[52] := TransitionClass.digit; (* 3 *) - classification[53] := TransitionClass.digit; (* 4 *) - classification[54] := TransitionClass.digit; (* 5 *) - classification[55] := TransitionClass.digit; (* 6 *) - classification[56] := TransitionClass.digit; (* 7 *) - classification[57] := TransitionClass.digit; (* 8 *) - classification[58] := TransitionClass.digit; (* 9 *) - classification[59] := TransitionClass.colon; (* : *) - classification[60] := TransitionClass.single; (* ; *) - classification[61] := TransitionClass.less; (* < *) - classification[62] := TransitionClass.equals; (* = *) - classification[63] := TransitionClass.greater; (* > *) - classification[64] := TransitionClass.other; (* ? *) - classification[65] := TransitionClass.single; (* @ *) - classification[66] := TransitionClass.alpha; (* A *) - classification[67] := TransitionClass.alpha; (* B *) - classification[68] := TransitionClass.alpha; (* C *) - classification[69] := TransitionClass.alpha; (* D *) - classification[70] := TransitionClass.alpha; (* E *) - classification[71] := TransitionClass.alpha; (* F *) - classification[72] := TransitionClass.alpha; (* G *) - classification[73] := TransitionClass.alpha; (* H *) - classification[74] := TransitionClass.alpha; (* I *) - classification[75] := TransitionClass.alpha; (* J *) - classification[76] := TransitionClass.alpha; (* K *) - classification[77] := TransitionClass.alpha; (* L *) - classification[78] := TransitionClass.alpha; (* M *) - classification[79] := TransitionClass.alpha; (* N *) - classification[80] := TransitionClass.alpha; (* O *) - classification[81] := TransitionClass.alpha; (* P *) - classification[82] := TransitionClass.alpha; (* Q *) - classification[83] := TransitionClass.alpha; (* R *) - classification[84] := TransitionClass.alpha; (* S *) - classification[85] := TransitionClass.alpha; (* T *) - classification[86] := TransitionClass.alpha; (* U *) - classification[87] := TransitionClass.alpha; (* V *) - classification[88] := TransitionClass.alpha; (* W *) - classification[89] := TransitionClass.alpha; (* X *) - classification[90] := TransitionClass.alpha; (* Y *) - classification[91] := TransitionClass.alpha; (* Z *) - classification[92] := TransitionClass.single; (* [ *) - classification[93] := TransitionClass.other; (* \ *) - classification[94] := TransitionClass.single; (* ] *) - classification[95] := TransitionClass.single; (* ^ *) - classification[96] := TransitionClass.underscore; (* _ *) - classification[97] := TransitionClass.other; (* ` *) - classification[98] := TransitionClass.hex; (* a *) - classification[99] := TransitionClass.hex; (* b *) - classification[100] := TransitionClass.hex; (* c *) - classification[101] := TransitionClass.hex; (* d *) - classification[102] := TransitionClass.hex; (* e *) - classification[103] := TransitionClass.hex; (* f *) - classification[104] := TransitionClass.alpha; (* g *) - classification[105] := TransitionClass.alpha; (* h *) - classification[106] := TransitionClass.alpha; (* i *) - classification[107] := TransitionClass.alpha; (* j *) - classification[108] := TransitionClass.alpha; (* k *) - classification[109] := TransitionClass.alpha; (* l *) - classification[110] := TransitionClass.alpha; (* m *) - classification[111] := TransitionClass.alpha; (* n *) - classification[112] := TransitionClass.alpha; (* o *) - classification[113] := TransitionClass.alpha; (* p *) - classification[114] := TransitionClass.alpha; (* q *) - classification[115] := TransitionClass.alpha; (* r *) - classification[116] := TransitionClass.alpha; (* s *) - classification[117] := TransitionClass.alpha; (* t *) - classification[118] := TransitionClass.alpha; (* u *) - classification[119] := TransitionClass.alpha; (* v *) - classification[120] := TransitionClass.alpha; (* w *) - classification[121] := TransitionClass.x; (* x *) - classification[122] := TransitionClass.alpha; (* y *) - classification[123] := TransitionClass.alpha; (* z *) - classification[124] := TransitionClass.other; (* { *) - classification[125] := TransitionClass.single; (* | *) - classification[126] := TransitionClass.other; (* } *) - classification[127] := TransitionClass.single; (* ~ *) - classification[128] := TransitionClass.invalid; (* DEL *) - - i := 129u; - while i <= 256u do - classification[i] := TransitionClass.other; - i := i + 1u - end -end; - -proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; -var - result: Bool; - index: Word; - continue: Bool; -begin - index := 0u; - result := true; - continue := (index < keyword.length) & (token_start.iterator <> token_end); - - while continue & result do - result := keyword[index] = token_start.iterator^ - or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^; - token_start.iterator := token_start.iterator + 1; - index := index + 1u; - continue := (index < keyword.length) & (token_start.iterator <> token_end) - end; - result := result & index = keyword.length; - - return result & (token_start.iterator = token_end) -end; - -(* Reached the end of file. *) -proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); -begin - token^.kind := LexerKind.unknown -end; - -proc increment(position: ^BufferPosition); -begin - position^.iterator := position^.iterator + 1 -end; - -(* Add the character to the token currently read and advance to the next character. *) -proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); -begin - increment(@lexer^.current) -end; - -(* The current character is not a part of the token. Finish the token already - * read. Don't advance to the next character. *) -proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); -begin - if lexer^.start.iterator^ = ':' then - token^.kind := LexerKind.colon - end; - if lexer^.start.iterator^ = '>' then - token^.kind := LexerKind.greater_than - end; - if lexer^.start.iterator^ = '<' then - token^.kind := LexerKind.less_than - end; - if lexer^.start.iterator^ = '(' then - token^.kind := LexerKind.left_paren - end; - if lexer^.start.iterator^ = '-' then - token^.kind := LexerKind.minus - end; - if lexer^.start.iterator^ = '.' then - token^.kind := LexerKind.dot - end -end; - -(* An action for tokens containing multiple characters. *) -proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); -begin - if lexer^.start.iterator^ = '<' then - if lexer^.current.iterator^ = '>' then - token^.kind := LexerKind.not_equal - end; - if lexer^.current.iterator^ = '=' then - token^.kind := LexerKind.less_equal - end - end; - if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then - token^.kind := LexerKind.greater_equal - end; - if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then - token^.kind := LexerKind.assignment - end; - if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then - token^.kind := LexerKind.arrow - end; - increment(@lexer^.current) -end; - -(* Skip a space. *) -proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); -begin - increment(@lexer^.start); - - if lexer^.start.iterator^ = '\n' then - lexer^.start.location.line := lexer^.start.location.line + 1u; - lexer^.start.location.column := 1u - end; - lexer^.current := lexer^.start -end; - -(* Delimited string action. *) -proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); -var - text_length: Word; -begin - if lexer^.start.iterator^ = '(' then - token^.kind := LexerKind.comment - end; - if lexer^.start.iterator^ = '"' then - text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); - - token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); - memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); - - token^.kind := LexerKind.character - end; - if lexer^.start.iterator^ = '\'' then - text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); - - token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); - memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); - - token^.kind := LexerKind.string - end; - increment(@lexer^.current) -end; - -(* Finalize keyword or identifier. *) -proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); -begin - token^.kind := LexerKind.identifier; - - token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char); - memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word)); - - if compare_keyword("program", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._program - end; - if compare_keyword("import", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._import - end; - if compare_keyword("const", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._const - end; - if compare_keyword("var", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._var - end; - if compare_keyword("if", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._if - end; - if compare_keyword("then", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._then - end; - if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._elsif - end; - if compare_keyword("else", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._else - end; - if compare_keyword("while", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._while - end; - if compare_keyword("do", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._do - end; - if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._proc - end; - if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._begin - end; - if compare_keyword("end", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._end - end; - if compare_keyword("type", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._type - end; - if compare_keyword("record", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._record - end; - if compare_keyword("union", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._union - end; - if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.null - end; - if compare_keyword("or", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._or - end; - if compare_keyword("return", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._return - end; - if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._defer - end; - if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.to - end; - if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._case - end; - if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._of - end; - if compare_keyword("module", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._module - end; - if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind._xor - end; - if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.boolean; - token^.value.booleanKind := true - end; - if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then - token^.kind := LexerKind.boolean; - token^.value.booleanKind := false - end -end; - -(* Action for tokens containing only one character. The character cannot be - * followed by other characters forming a composite token. *) -proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); -begin - if lexer^.current.iterator^ = '&' then - token^.kind := LexerKind.and - end; - if lexer^.current.iterator^ = ';' then - token^.kind := LexerKind.semicolon - end; - if lexer^.current.iterator^ = ',' then - token^.kind := LexerKind.comma - end; - if lexer^.current.iterator^ = '~' then - token^.kind := LexerKind.not - end; - if lexer^.current.iterator^ = ')' then - token^.kind := LexerKind.right_paren - end; - if lexer^.current.iterator^ = '[' then - token^.kind := LexerKind.left_square - end; - if lexer^.current.iterator^ = ']' then - token^.kind := LexerKind.right_square - end; - if lexer^.current.iterator^ = '^' then - token^.kind := LexerKind.hat - end; - if lexer^.current.iterator^ = '=' then - token^.kind := LexerKind.equal - end; - if lexer^.current.iterator^ = '+' then - token^.kind := LexerKind.plus - end; - if lexer^.current.iterator^ = '*' then - token^.kind := LexerKind.multiplication - end; - if lexer^.current.iterator^ = '/' then - token^.kind := LexerKind.division - end; - if lexer^.current.iterator^ = '%' then - token^.kind := LexerKind.remainder - end; - if lexer^.current.iterator^ = '@' then - token^.kind := LexerKind.at - end; - if lexer^.current.iterator^ = '|' then - token^.kind := LexerKind.pipe - end; - increment(@lexer^.current) -end; - -(* Handle an integer literal. *) -proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); -var - buffer: String; - integer_length: Word; - found: Bool; -begin - token^.kind := LexerKind.integer; - - integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word); - memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier)); - memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length); - - token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0'; - token^.value.integerKind := atoi(@token^.value.identifierKind[2]) -end; - -proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; -var - default_transition: Transition; - state_index: Int; -begin - default_transition.action := default_action; - default_transition.next_state := next_state; - state_index := cast(current_state: Int) + 1; - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition; - transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition; - - return state_index -end; - -(* - * The transition table describes transitions from one state to another, given - * a symbol (character class). - * - * The table has m rows and n columns, where m is the amount of states and n is - * the amount of classes. So given the current state and a classified character - * the table can be used to look up the next state. - * - * Each cell is a word long. - * - The least significant byte of the word is a row number (beginning with 0). - * It specifies the target state. "ff" means that this is an end state and no - * transition is possible. - * - The next byte is the action that should be performed when transitioning. - * For the meaning of actions see labels in the lex_next function, which - * handles each action. - *) -proc initialize_transitions(); -var - state_index: Int; -begin - (* Start state. *) - state_index := cast(TransitionState.start: Int) + 1; - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip; - transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start; - - transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon; - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren; - - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single; - transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot; - - transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus; - - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character; - - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string; - - transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater; - - transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less; - - transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish; - - (* Colon state. *) - state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - (* Identifier state. *) - state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; - - (* Decimal state. *) - state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix; - - (* Greater state. *) - state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - (* Minus state. *) - state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; - - (* Left paren state. *) - state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment; - - (* Less state. *) - state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; - - (* Hexadecimal after 0x. *) - state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite; - transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish; - - (* Comment. *) - state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment); - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - (* Closing comment. *) - state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment); - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited; - transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; - transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - (* Character. *) - state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character); - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited; - transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish; - - (* String. *) - state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string); - - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited; - transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish; - - (* Leading zero. *) - state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish; - - (* Digit with a character suffix. *) - state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish); - - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; - - transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; - transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish -end; - -proc lexer_make*(lexer: ^Lexer, input: ^FILE); -begin - lexer^.input := input; - lexer^.length := 0u; - - lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char); - memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE); - lexer^.size := CHUNK_SIZE -end; - -(* Returns the last read token. *) -proc lexer_current*(lexer: ^Lexer) -> LexerToken; -var - current_class: TransitionClass; - current_state: TransitionState; - current_transition: Transition; - result: LexerToken; - index1: Word; - index2: Word; -begin - lexer^.current := lexer^.start; - current_state := TransitionState.start; - - while current_state <> TransitionState.finish do - index1 := cast(lexer^.current.iterator^: Word) + 1u; - current_class := classification[index1]; - - index1 := cast(current_state: Word) + 1u; - index2 := cast(current_class: Word) + 1u; - - current_transition := transitions[index1][index2]; - if current_transition.action <> nil then - current_transition.action(lexer, @result) - end; - current_state := current_transition.next_state - end; - result.start_location := lexer^.start.location; - result.end_location := lexer^.current.location; - - return result -end; - -(* Read and return the next token. *) -proc lexer_lex*(lexer: ^Lexer) -> LexerToken; -var - result: LexerToken; -begin - if lexer^.length = 0u then - lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input); - lexer^.current.location.column := 1u; - lexer^.current.location.line := 1u; - lexer^.current.iterator := lexer^.buffer - end; - lexer^.start := lexer^.current; - - result := lexer_current(lexer); - return result -end; - -proc lexer_destroy*(lexer: ^Lexer); -begin - free(cast(lexer^.buffer: Pointer)) -end; - -proc lexer_initialize(); -begin - initialize_classification(); - initialize_transitions() -end; - -end.