diff --git a/Rakefile b/Rakefile index 8f20428..43eae73 100644 --- a/Rakefile +++ b/Rakefile @@ -3,74 +3,39 @@ # obtain one at https://mozilla.org/MPL/2.0/. require 'pathname' -require 'open3' require 'rake/clean' -require_relative 'tools/support' -# Dependencies. -GCC_VERSION = "15.1.0" - -# Paths. -HOST_GCC = TMP + 'host/gcc' +TMP = Pathname.new('./build') HOST_INSTALL = TMP + 'host/install' CLOBBER.include TMP +CLEAN.include(TMP + 'boot') -directory(TMP + 'tools') -directory HOST_GCC directory HOST_INSTALL -task default: [TMP + 'elna'] do - sh (TMP + 'elna').to_path, '--parse', 'source.elna' +task default: ['source/main.elna', TMP + 'boot/elna'] do |t| + sources, compiler = t.prerequisites.partition { |f| f.end_with? '.elna' } + + sh *compiler, '--parse', *sources end -namespace :boot do - desc 'Download and configure the bootstrap compiler' - task configure: [TMP + 'tools', HOST_GCC, HOST_INSTALL] do - url = URI.parse "https://gcc.gnu.org/pub/gcc/releases/gcc-#{GCC_VERSION}/gcc-#{GCC_VERSION}.tar.xz" - options = find_build_target GCC_VERSION - source_directory = TMP + "tools/gcc-#{GCC_VERSION}" - frontend_link = source_directory + 'gcc' +rule(/boot\/.+\.o$/ => ->(file) { + Pathname.new('source') + + Pathname.new(file).relative_path_from(TMP + 'boot').sub_ext('.elna') +}) do |t| + Pathname.new(t.name).dirname.mkpath + compiler = HOST_INSTALL + 'bin/gelna' - download_and_pipe url, source_directory.dirname, ['tar', '-Jxv'] - - sh 'contrib/download_prerequisites', chdir: source_directory.to_path - File.symlink Pathname.new('.').relative_path_from(frontend_link), (frontend_link + 'elna') - - configure_options = [ - "--prefix=#{HOST_INSTALL.realpath}", - "--with-sysroot=#{options.sysroot.realpath}", - '--enable-languages=c,c++,elna', - '--disable-bootstrap', - '--disable-multilib', - "--target=#{options.build}", - "--build=#{options.build}", - "--host=#{options.build}" - ] - flags = '-O0 -g -fPIC -I/opt/homebrew/Cellar/flex/2.6.4_2/include' - env = { - 'CC' => options.gcc, - 'CXX' => options.gxx, - 'CFLAGS' => flags, - 'CXXFLAGS' => flags, - } - configure = source_directory.relative_path_from(HOST_GCC) + 'configure' - sh env, configure.to_path, *configure_options, chdir: HOST_GCC.to_path - end - - desc 'Make and install the bootstrap compiler' - task :make do - cwd = HOST_GCC.to_path - - sh 'make', '-j', Etc.nprocessors.to_s, chdir: cwd - sh 'make', 'install', chdir: cwd - end + sh compiler.to_path, '-c', '-o', t.name, *t.prerequisites end -desc 'Build the bootstrap compiler' -task boot: %w[boot:configure boot:make] +file TMP + 'boot/elna' => FileList['source/**/*.elna'].reject { |file| + file != file.downcase +}.map { |file| + TMP + 'boot' + + Pathname.new(file).relative_path_from('source').sub_ext('.o') +} do |t| + compiler = HOST_INSTALL + 'bin/gcc' -file (TMP + 'elna').to_path => ['source.elna'] -file (TMP + 'elna').to_path => [(HOST_INSTALL + 'bin/gelna').to_path] do |task| - sh (HOST_INSTALL + 'bin/gelna').to_path, '-o', task.name, task.prerequisites.first + sh compiler.to_path, '-o', t.name, *t.prerequisites end diff --git a/boot/ast.cc b/boot/ast.cc index ad25adb..1707960 100644 --- a/boot/ast.cc +++ b/boot/ast.cc @@ -225,7 +225,7 @@ namespace elna::boot variable_declaration::variable_declaration(const struct position position, identifier_definition identifier, std::shared_ptr variable_type) - : definition(position, identifier), m_variable_type(variable_type) + : declaration(position, identifier), m_variable_type(variable_type) { } @@ -239,28 +239,28 @@ namespace elna::boot return *m_variable_type; } - definition::definition(const struct position position, identifier_definition identifier) + declaration::declaration(const struct position position, identifier_definition identifier) : node(position), identifier(identifier) { } - constant_definition::constant_definition(const struct position position, identifier_definition identifier, + constant_declaration::constant_declaration(const struct position position, identifier_definition identifier, expression *body) - : definition(position, identifier), m_body(body) + : declaration(position, identifier), m_body(body) { } - void constant_definition::accept(parser_visitor *visitor) + void constant_declaration::accept(parser_visitor *visitor) { visitor->visit(this); } - expression& constant_definition::body() + expression& constant_declaration::body() { return *m_body; } - constant_definition::~constant_definition() + constant_declaration::~constant_declaration() { delete m_body; } @@ -307,55 +307,55 @@ namespace elna::boot return this; } - procedure_definition::procedure_definition(const struct position position, identifier_definition identifier, + procedure_declaration::procedure_declaration(const struct position position, identifier_definition identifier, procedure_type_expression *heading, block&& body) - : definition(position, identifier), m_heading(heading), body(std::move(body)) + : declaration(position, identifier), m_heading(heading), body(std::move(body)) { } - procedure_definition::procedure_definition(const struct position position, identifier_definition identifier, + procedure_declaration::procedure_declaration(const struct position position, identifier_definition identifier, procedure_type_expression *heading) - : definition(position, identifier), m_heading(heading), body(std::nullopt) + : declaration(position, identifier), m_heading(heading), body(std::nullopt) { } - void procedure_definition::accept(parser_visitor *visitor) + void procedure_declaration::accept(parser_visitor *visitor) { visitor->visit(this); } - procedure_type_expression& procedure_definition::heading() + procedure_type_expression& procedure_declaration::heading() { return *m_heading; } - procedure_definition::~procedure_definition() + procedure_declaration::~procedure_declaration() { delete m_heading; } - type_definition::type_definition(const struct position position, identifier_definition identifier, + type_declaration::type_declaration(const struct position position, identifier_definition identifier, type_expression *body) - : definition(position, identifier), m_body(body) + : declaration(position, identifier), m_body(body) { } - type_definition::~type_definition() + type_declaration::~type_declaration() { delete m_body; } - void type_definition::accept(parser_visitor *visitor) + void type_declaration::accept(parser_visitor *visitor) { visitor->visit(this); } - type_expression& type_definition::body() + type_expression& type_declaration::body() { return *m_body; } - block::block(std::vector&& constants, std::vector&& variables, + block::block(std::vector&& constants, std::vector&& variables, std::vector&& body) : m_variables(std::move(variables)), m_constants(std::move(constants)), m_body(std::move(body)) { @@ -381,7 +381,7 @@ namespace elna::boot return m_variables; } - const std::vector& block::constants() + const std::vector& block::constants() { return m_constants; } @@ -401,7 +401,7 @@ namespace elna::boot { delete variable; } - for (constant_definition *constant : this->constants()) + for (constant_declaration *constant : this->constants()) { delete constant; } @@ -419,7 +419,7 @@ namespace elna::boot unit::~unit() { - for (procedure_definition *procedure : this->procedures) + for (procedure_declaration *procedure : this->procedures) { delete procedure; } @@ -427,11 +427,11 @@ namespace elna::boot { delete variable; } - for (type_definition *type : this->types) + for (type_declaration *type : this->types) { delete type; } - for (constant_definition *constant : this->constants) + for (constant_declaration *constant : this->constants) { delete constant; } diff --git a/boot/parser.yy b/boot/parser.yy index 28c5df9..3971b9b 100644 --- a/boot/parser.yy +++ b/boot/parser.yy @@ -133,8 +133,8 @@ along with GCC; see the file COPYING3. If not see %type > case_labels; %type switch_case; %type > switch_cases; -%type constant_definition; -%type > constant_part constant_definitions; +%type constant_declaration; +%type > constant_part constant_declarations; %type > variable_declarations variable_part variable_declaration; %type type_expression; %type > type_expressions; @@ -148,12 +148,12 @@ along with GCC; see the file COPYING3. If not see %type return_statement; %type statement; %type > required_statements optional_statements statement_part; -%type procedure_definition; +%type procedure_declaration; %type , elna::boot::procedure_type_expression *>> procedure_heading; %type return_declaration; -%type > procedure_definitions procedure_part; -%type type_definition; -%type > type_definitions type_part; +%type > procedure_declarations procedure_part; +%type type_declaration; +%type > type_declarations type_part; %type > block; %type field_declaration formal_parameter; %type >> @@ -167,28 +167,28 @@ along with GCC; see the file COPYING3. If not see %type > import_declarations import_part; %% program: - "program" import_part constant_part type_part variable_part procedure_part "begin" optional_statements "end" "." + "program" ";" import_part constant_part type_part variable_part procedure_part statement_part "end" "." { - auto tree = new boot::program(boot::make_position(@7)); + auto tree = new boot::program(boot::make_position(@1)); - std::swap(tree->imports, $2); - std::swap(tree->constants, $3); - std::swap(tree->types , $4); - std::swap(tree->variables, $5); - std::swap(tree->procedures, $6); + std::swap(tree->imports, $3); + std::swap(tree->constants, $4); + std::swap(tree->types , $5); + std::swap(tree->variables, $6); + std::swap(tree->procedures, $7); std::swap(tree->body, $8); driver.tree.reset(tree); } - | "module" import_part constant_part type_part variable_part procedure_part "end" "." + | "module" ";" import_part constant_part type_part variable_part procedure_part "end" "." { - auto tree = new boot::program(boot::make_position(@7)); + auto tree = new boot::program(boot::make_position(@1)); - std::swap(tree->imports, $2); - std::swap(tree->constants, $3); - std::swap(tree->types , $4); - std::swap(tree->variables, $5); - std::swap(tree->procedures, $6); + std::swap(tree->imports, $3); + std::swap(tree->constants, $4); + std::swap(tree->types , $5); + std::swap(tree->variables, $6); + std::swap(tree->procedures, $7); driver.tree.reset(tree); } @@ -228,27 +228,27 @@ procedure_heading: formal_parameter_list return_declaration $$.second->parameters.push_back(type); } } -procedure_definition: - "proc" identifier_definition procedure_heading ";" block +procedure_declaration: + "proc" identifier_definition procedure_heading ";" block ";" { - $$ = new boot::procedure_definition(boot::make_position(@1), std::move($2), $3.second, std::move(*$5)); + $$ = new boot::procedure_declaration(boot::make_position(@1), std::move($2), $3.second, std::move(*$5)); std::swap($3.first, $$->parameter_names); } - | "proc" identifier_definition procedure_heading ";" "extern" + | "proc" identifier_definition procedure_heading ";" "extern" ";" { - $$ = new boot::procedure_definition(boot::make_position(@1), std::move($2), $3.second); + $$ = new boot::procedure_declaration(boot::make_position(@1), std::move($2), $3.second); std::swap($3.first, $$->parameter_names); } -procedure_definitions: - procedure_definition procedure_definitions +procedure_declarations: + procedure_declaration procedure_declarations { std::swap($$, $2); $$.emplace($$.cbegin(), std::move($1)); } - | procedure_definition { $$.emplace_back(std::move($1)); } + | procedure_declaration { $$.emplace_back(std::move($1)); } procedure_part: /* no procedure definitions */ {} - | procedure_definitions { std::swap($$, $1); } + | procedure_declarations { std::swap($$, $1); } call_expression: designator_expression actual_parameter_list { $$ = new boot::procedure_call(boot::make_position(@1), $1); @@ -454,9 +454,9 @@ optional_statements: field_declaration: IDENTIFIER ":" type_expression { $$ = std::make_pair($1, $3); } required_fields: - field_declaration required_fields + field_declaration ";" required_fields { - std::swap($$, $2); + std::swap($$, $3); $$.emplace($$.cbegin(), $1); } | field_declaration { $$.emplace_back($1); } @@ -501,7 +501,7 @@ identifiers: $$.emplace($$.cbegin(), std::move($1)); } | IDENTIFIER { $$.emplace_back(std::move($1)); } -variable_declaration: identifier_definitions ":" type_expression +variable_declaration: identifier_definitions ":" type_expression ";" { std::shared_ptr shared_type{ $3 }; @@ -523,12 +523,12 @@ variable_declarations: variable_part: /* no variable declarations */ {} | "var" variable_declarations { std::swap($$, $2); } -constant_definition: identifier_definition ":=" expression +constant_declaration: identifier_definition ":=" expression ";" { - $$ = new boot::constant_definition(boot::make_position(@1), std::move($1), $3); + $$ = new boot::constant_declaration(boot::make_position(@1), std::move($1), $3); } -constant_definitions: - constant_definition constant_definitions +constant_declarations: + constant_declaration constant_declarations { std::swap($$, $2); $$.insert($$.cbegin(), $1); @@ -536,7 +536,7 @@ constant_definitions: | /* no constant definitions */ {} constant_part: /* no constant definitions */ {} - | "const" constant_definitions { std::swap($$, $2); } + | "const" constant_declarations { std::swap($$, $2); } import_declaration: IDENTIFIER "." import_declaration { @@ -545,8 +545,7 @@ import_declaration: } | IDENTIFIER { $$.emplace_back(std::move($1)); } import_declarations: - /* no import declarations */ {} - | import_declaration "," import_declarations + import_declaration "," import_declarations { std::swap($$, $3); $$.emplace($$.cbegin(), new boot::import_declaration(boot::make_position(@1), std::move($1))); @@ -557,13 +556,13 @@ import_declarations: } import_part: /* no import declarations */ {} - | "import" import_declarations { std::swap($$, $2); } -type_definition: identifier_definition "=" type_expression + | "import" import_declarations ";" { std::swap($$, $2); } +type_declaration: identifier_definition "=" type_expression ";" { - $$ = new boot::type_definition(boot::make_position(@1), std::move($1), $3); + $$ = new boot::type_declaration(boot::make_position(@1), std::move($1), $3); } -type_definitions: - type_definition type_definitions +type_declarations: + type_declaration type_declarations { std::swap($$, $2); $$.insert($$.cbegin(), $1); @@ -571,7 +570,7 @@ type_definitions: | /* no type definitions */ {} type_part: /* no type definitions */ {} - | "type" type_definitions { std::swap($$, $2); } + | "type" type_declarations { std::swap($$, $2); } formal_parameter: IDENTIFIER ":" type_expression { $$ = std::make_pair($1, $3); } formal_parameter_list: diff --git a/boot/semantic.cc b/boot/semantic.cc index a8bcbe9..02caae7 100644 --- a/boot/semantic.cc +++ b/boot/semantic.cc @@ -83,7 +83,7 @@ namespace elna::boot } } - void declaration_visitor::visit(type_definition *definition) + void declaration_visitor::visit(type_declaration *definition) { definition->body().accept(this); auto unresolved_declaration = this->unresolved.at(definition->identifier.identifier); @@ -164,9 +164,12 @@ namespace elna::boot void declaration_visitor::visit(variable_declaration *declaration) { declaration->variable_type().accept(this); + + this->symbols->enter(declaration->identifier.identifier, + std::make_shared(this->current_type)); } - void declaration_visitor::visit(constant_definition *definition) + void declaration_visitor::visit(constant_declaration *definition) { definition->body().accept(this); @@ -174,7 +177,7 @@ namespace elna::boot std::make_shared(this->current_literal)); } - void declaration_visitor::visit(procedure_definition *definition) + void declaration_visitor::visit(procedure_declaration *definition) { std::shared_ptr info = std::make_shared( build_procedure(definition->heading()), definition->parameter_names); @@ -184,7 +187,7 @@ namespace elna::boot if (definition->body.has_value()) { - for (constant_definition *const constant : definition->body.value().constants()) + for (constant_declaration *const constant : definition->body.value().constants()) { constant->accept(this); } @@ -293,7 +296,7 @@ namespace elna::boot void declaration_visitor::visit(unit *unit) { - for (type_definition *const type : unit->types) + for (type_declaration *const type : unit->types) { const std::string& type_identifier = type->identifier.identifier; @@ -303,7 +306,7 @@ namespace elna::boot add_error(type->identifier.identifier, this->input_file, type->position()); } } - for (type_definition *const type : unit->types) + for (type_declaration *const type : unit->types) { type->accept(this); } @@ -316,7 +319,7 @@ namespace elna::boot { variable->accept(this); } - for (procedure_definition *const procedure : unit->procedures) + for (procedure_declaration *const procedure : unit->procedures) { procedure->accept(this); } diff --git a/boot/symbol.cc b/boot/symbol.cc index 58a0752..af03159 100644 --- a/boot/symbol.cc +++ b/boot/symbol.cc @@ -332,8 +332,8 @@ namespace elna::boot return std::static_pointer_cast(shared_from_this()); } - variable_info::variable_info(const std::string& name, const type symbol) - : name(name), symbol(symbol) + variable_info::variable_info(const type symbol) + : symbol(symbol) { } diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc index b6b2354..1e45b7f 100644 --- a/gcc/elna-generic.cc +++ b/gcc/elna-generic.cc @@ -99,23 +99,25 @@ namespace elna::gcc } - std::deque> do_semantic_analysis(const char *path, - const std::unique_ptr& ast, std::shared_ptr info_table, - std::shared_ptr symbols, std::unordered_map& unresolved) + std::unordered_map do_semantic_analysis(std::shared_ptr info_table, + std::shared_ptr symbols) { - boot::declaration_visitor declaration_visitor(path, info_table); + std::unordered_map unresolved; - declaration_visitor.visit(ast.get()); - - if (declaration_visitor.errors().empty()) + for (auto& [symbol_name, symbol_info] : *info_table) { - for (auto& [symbol_name, symbol_info] : declaration_visitor.unresolved) + std::vector type_path; + + // The top level symbol table has basic (builtin) types in it which are not aliases. + if (auto type_info = symbol_info->is_type()) { - std::vector type_path; - handle_symbol(symbol_name, symbol_info, symbols, unresolved, type_path); + if (auto alias_type = type_info->symbol.get()) + { + handle_symbol(symbol_name, alias_type, symbols, unresolved, type_path); + } } } - return std::move(declaration_visitor.errors()); + return unresolved; } generic_visitor::generic_visitor(std::shared_ptr symbol_table, @@ -318,7 +320,7 @@ namespace elna::gcc } } - void generic_visitor::declare_procedure(boot::procedure_definition *const definition) + void generic_visitor::declare_procedure(boot::procedure_declaration *const definition) { tree declaration_type = build_procedure_type(definition->heading()); tree fndecl = build_fn_decl(definition->identifier.identifier.c_str(), declaration_type); @@ -409,11 +411,11 @@ namespace elna::gcc { declaration->accept(this); } - for (boot::constant_definition *const constant : unit->constants) + for (boot::constant_declaration *const constant : unit->constants) { constant->accept(this); } - for (boot::type_definition *const type : unit->types) + for (boot::type_declaration *const type : unit->types) { type->accept(this); } @@ -421,17 +423,17 @@ namespace elna::gcc { variable->accept(this); } - for (boot::procedure_definition *const procedure : unit->procedures) + for (boot::procedure_declaration *const procedure : unit->procedures) { declare_procedure(procedure); } - for (boot::procedure_definition *const procedure : unit->procedures) + for (boot::procedure_declaration *const procedure : unit->procedures) { procedure->accept(this); } } - void generic_visitor::visit(boot::procedure_definition *definition) + void generic_visitor::visit(boot::procedure_declaration *definition) { if (!definition->body.has_value()) { @@ -449,7 +451,7 @@ namespace elna::gcc { this->symbols->enter(IDENTIFIER_POINTER(DECL_NAME(argument_chain)), argument_chain); } - for (boot::constant_definition *const constant : definition->body.value().constants()) + for (boot::constant_declaration *const constant : definition->body.value().constants()) { constant->accept(this); } @@ -818,7 +820,7 @@ namespace elna::gcc } } - void generic_visitor::visit(boot::constant_definition *definition) + void generic_visitor::visit(boot::constant_declaration *definition) { location_t definition_location = get_location(&definition->position()); definition->body().accept(this); @@ -858,7 +860,7 @@ namespace elna::gcc this->current_expression = NULL_TREE; } - void generic_visitor::visit(boot::type_definition *definition) + void generic_visitor::visit(boot::type_declaration *definition) { location_t definition_location = get_location(&definition->position()); this->current_expression = this->unresolved.at(definition->identifier.identifier); diff --git a/gcc/elna1.cc b/gcc/elna1.cc index bcd3df5..d5997bc 100644 --- a/gcc/elna1.cc +++ b/gcc/elna1.cc @@ -84,19 +84,21 @@ static void elna_parse_file(const char *filename) { for (const std::unique_ptr& module_tree : outcome.modules) { - std::unordered_map unresolved; + elna::boot::declaration_visitor declaration_visitor(filename, info_table); - auto semantic_errors = elna::gcc::do_semantic_analysis(filename, module_tree, - info_table, symbol_table, unresolved); + declaration_visitor.visit(module_tree.get()); - if (semantic_errors.empty()) + if (declaration_visitor.errors().empty()) { + std::unordered_map unresolved = elna::gcc::do_semantic_analysis( + info_table, symbol_table); + elna::gcc::generic_visitor generic_visitor{ symbol_table, std::move(unresolved) }; generic_visitor.visit(module_tree.get()); } else { - elna::gcc::report_errors(semantic_errors); + elna::gcc::report_errors(declaration_visitor.errors()); } } } diff --git a/include/elna/boot/ast.h b/include/elna/boot/ast.h index 6ba29f7..1e4f57e 100644 --- a/include/elna/boot/ast.h +++ b/include/elna/boot/ast.h @@ -54,9 +54,9 @@ namespace elna::boot }; class variable_declaration; - class constant_definition; - class procedure_definition; - class type_definition; + class constant_declaration; + class procedure_declaration; + class type_declaration; class procedure_call; class cast_expression; class assign_statement; @@ -93,9 +93,9 @@ namespace elna::boot struct parser_visitor { virtual void visit(variable_declaration *) = 0; - virtual void visit(constant_definition *) = 0; - virtual void visit(procedure_definition *) = 0; - virtual void visit(type_definition *) = 0; + virtual void visit(constant_declaration *) = 0; + virtual void visit(procedure_declaration *) = 0; + virtual void visit(type_declaration *) = 0; virtual void visit(procedure_call *) = 0; virtual void visit(cast_expression *) = 0; virtual void visit(traits_expression *) = 0; @@ -178,10 +178,10 @@ namespace elna::boot /** * Symbol definition. */ - class definition : public node + class declaration : public node { protected: - definition(const struct position position, identifier_definition identifier); + declaration(const struct position position, identifier_definition identifier); public: const identifier_definition identifier; @@ -292,7 +292,7 @@ namespace elna::boot /** * Variable declaration. */ - class variable_declaration : public definition + class variable_declaration : public declaration { std::shared_ptr m_variable_type; @@ -320,18 +320,18 @@ namespace elna::boot /** * Constant definition. */ - class constant_definition : public definition + class constant_declaration : public declaration { expression *m_body; public: - constant_definition(const struct position position, identifier_definition identifier, + constant_declaration(const struct position position, identifier_definition identifier, expression *body); void accept(parser_visitor *visitor) override; expression& body(); - virtual ~constant_definition() override; + virtual ~constant_declaration() override; }; /** @@ -354,7 +354,7 @@ namespace elna::boot struct block { - block(std::vector&& constants, std::vector&& variables, + block(std::vector&& constants, std::vector&& variables, std::vector&& body); block(const block&) = delete; block(block&& that); @@ -363,14 +363,14 @@ namespace elna::boot block& operator=(block&& that); const std::vector& variables(); - const std::vector& constants(); + const std::vector& constants(); const std::vector& body(); virtual ~block(); private: std::vector m_variables; - std::vector m_constants; + std::vector m_constants; std::vector m_body; }; @@ -378,7 +378,7 @@ namespace elna::boot /** * Procedure definition. */ - class procedure_definition : public definition + class procedure_declaration : public declaration { procedure_type_expression *m_heading; @@ -386,28 +386,28 @@ namespace elna::boot std::optional body; std::vector parameter_names; - procedure_definition(const struct position position, identifier_definition identifier, + procedure_declaration(const struct position position, identifier_definition identifier, procedure_type_expression *heading, block&& body); - procedure_definition(const struct position position, identifier_definition identifier, + procedure_declaration(const struct position position, identifier_definition identifier, procedure_type_expression *heading); void accept(parser_visitor *visitor) override; procedure_type_expression& heading(); - virtual ~procedure_definition() override; + virtual ~procedure_declaration() override; }; /** * Type definition. */ - class type_definition : public definition + class type_declaration : public declaration { type_expression *m_body; public: - type_definition(const struct position position, identifier_definition identifier, + type_declaration(const struct position position, identifier_definition identifier, type_expression *expression); - ~type_definition(); + ~type_declaration(); void accept(parser_visitor *visitor) override; @@ -670,10 +670,10 @@ namespace elna::boot { public: std::vector imports; - std::vector constants; - std::vector types; + std::vector constants; + std::vector types; std::vector variables; - std::vector procedures; + std::vector procedures; unit(const struct position position); virtual void accept(parser_visitor *visitor) override; diff --git a/include/elna/boot/semantic.h b/include/elna/boot/semantic.h index 5594d76..ac96667 100644 --- a/include/elna/boot/semantic.h +++ b/include/elna/boot/semantic.h @@ -54,27 +54,26 @@ namespace elna::boot constant_info::variant current_literal; std::shared_ptr symbols; + std::unordered_map> unresolved; procedure_type build_procedure(procedure_type_expression& type_expression); public: - std::unordered_map> unresolved; - explicit declaration_visitor(const char *path, std::shared_ptr symbols); void visit(named_type_expression *type_expression) override; void visit(array_type_expression *type_expression) override; void visit(pointer_type_expression *type_expression) override; void visit(program *program) override; - void visit(type_definition *definition) override; + void visit(type_declaration *definition) override; void visit(record_type_expression *type_expression) override; void visit(union_type_expression *type_expression) override; void visit(procedure_type_expression *type_expression) override; void visit(enumeration_type_expression *type_expression) override; void visit(variable_declaration *declaration) override; - void visit(constant_definition *definition) override; - void visit(procedure_definition *definition) override; + void visit(constant_declaration *definition) override; + void visit(procedure_declaration *definition) override; void visit(assign_statement *statement) override; void visit(if_statement *statement) override; void visit(import_declaration *) override; diff --git a/include/elna/boot/symbol.h b/include/elna/boot/symbol.h index 698b585..b87f012 100644 --- a/include/elna/boot/symbol.h +++ b/include/elna/boot/symbol.h @@ -202,10 +202,9 @@ namespace elna::boot class variable_info : public info { public: - const std::string name; const type symbol; - variable_info(const std::string& name, const type symbol); + variable_info(const type symbol); std::shared_ptr is_variable() override; }; diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h index fe241ff..94f0845 100644 --- a/include/elna/gcc/elna-generic.h +++ b/include/elna/gcc/elna-generic.h @@ -33,9 +33,8 @@ along with GCC; see the file COPYING3. If not see namespace elna::gcc { - std::deque> do_semantic_analysis(const char *path, - const std::unique_ptr& ast, std::shared_ptr info_table, - std::shared_ptr symbols, std::unordered_map& unresolved); + std::unordered_map do_semantic_analysis(std::shared_ptr info_table, + std::shared_ptr symbols); tree handle_symbol(const std::string& symbol_name, std::shared_ptr reference, std::shared_ptr symbols, std::unordered_map& unresolved, std::vector& path); @@ -46,7 +45,7 @@ namespace elna::gcc std::shared_ptr symbols; std::unordered_map unresolved; - void declare_procedure(boot::procedure_definition *const definition); + void declare_procedure(boot::procedure_declaration *const definition); tree build_procedure_type(boot::procedure_type_expression& type); void build_composite_type(const std::vector& fields, tree composite_type_node); @@ -79,7 +78,7 @@ namespace elna::gcc std::unordered_map&& unresolved); void visit(boot::program *program) override; - void visit(boot::procedure_definition *definition) override; + void visit(boot::procedure_declaration *definition) override; void visit(boot::procedure_call *call) override; void visit(boot::cast_expression *expression) override; void visit(boot::traits_expression *trait) override; @@ -92,8 +91,8 @@ namespace elna::gcc void visit(boot::literal *string) override; void visit(boot::binary_expression *expression) override; void visit(boot::unary_expression *expression) override; - void visit(boot::constant_definition *definition) override; - void visit(boot::type_definition *definition) override; + void visit(boot::constant_declaration *definition) override; + void visit(boot::type_declaration *definition) override; void visit(boot::variable_declaration *declaration) override; void visit(boot::variable_expression *expression) override; void visit(boot::array_access_expression *expression) override; diff --git a/rakelib/boot.rake b/rakelib/boot.rake new file mode 100644 index 0000000..46ba65a --- /dev/null +++ b/rakelib/boot.rake @@ -0,0 +1,107 @@ +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. -} +# frozen_string_literal: true + +require 'uri' +require 'net/http' +require 'open3' + +def gcc_verbose(gcc_binary) + read, write = IO.pipe + sh({'LANG' => 'C'}, gcc_binary, '--verbose', err: write) + write.close + output = read.read + read.close + output +end + +def find_build_target(gcc_version) + gcc_verbose('gcc') + .lines + .find { |line| line.start_with? 'Target: ' } + .split(' ') + .last + .strip +end + +def download_and_pipe(url, target, command) + target.mkpath + + Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == 'https') do |http| + request = Net::HTTP::Get.new url.request_uri + + http.request request do |response| + case response + when Net::HTTPRedirection + download_and_pipe URI.parse(response['location']), target, command + when Net::HTTPSuccess + Dir.chdir target.to_path do + Open3.popen2(*command) do |stdin, stdout, wait_thread| + Thread.new do + stdout.each { |line| puts line } + end + + response.read_body do |chunk| + stdin.write chunk + end + stdin.close + + wait_thread.value + end + end + else + response.error! + end + end + end +end + +namespace :boot do + # Dependencies. + GCC_VERSION = "15.1.0" + HOST_GCC = TMP + 'host/gcc' + + directory HOST_GCC + directory(TMP + 'tools') + + desc 'Download and configure the bootstrap compiler' + task configure: [TMP + 'tools', HOST_GCC, HOST_INSTALL] do + url = URI.parse "https://gcc.gnu.org/pub/gcc/releases/gcc-#{GCC_VERSION}/gcc-#{GCC_VERSION}.tar.xz" + build_target = find_build_target GCC_VERSION + source_directory = TMP + "tools/gcc-#{GCC_VERSION}" + frontend_link = source_directory + 'gcc' + + download_and_pipe url, source_directory.dirname, ['tar', '-Jxv'] + + sh 'contrib/download_prerequisites', chdir: source_directory.to_path + File.symlink Pathname.new('.').relative_path_from(frontend_link), (frontend_link + 'elna') + + configure_options = [ + "--prefix=#{HOST_INSTALL.realpath}", + '--with-sysroot=/', + '--enable-languages=c,c++,m2,elna', + '--disable-bootstrap', + '--disable-multilib', + "--target=#{build_target}", + "--build=#{build_target}", + "--host=#{build_target}" + ] + flags = '-O0 -g -fPIC' + env = { + 'CFLAGS' => flags, + 'CXXFLAGS' => flags, + } + configure = source_directory.relative_path_from(HOST_GCC) + 'configure' + sh env, configure.to_path, *configure_options, chdir: HOST_GCC + end + + desc 'Make and install the bootstrap compiler' + task :make do + sh 'make', '-j', Etc.nprocessors.to_s, chdir: HOST_GCC + sh 'make', 'install', chdir: HOST_GCC + end +end + +desc 'Build the bootstrap compiler' +task boot: %w[boot:configure boot:make] diff --git a/source/CommandLineInterface.def b/source/CommandLineInterface.def new file mode 100644 index 0000000..e4688c4 --- /dev/null +++ b/source/CommandLineInterface.def @@ -0,0 +1,16 @@ +DEFINITION MODULE CommandLineInterface; + +FROM Common IMPORT ShortString; + +TYPE + CommandLine = RECORD + input: ShortString; + output: ShortString; + lex: BOOLEAN; + parse: BOOLEAN + END; + PCommandLine = POINTER TO CommandLine; + +PROCEDURE parse_command_line(): PCommandLine; + +END CommandLineInterface. diff --git a/source/CommandLineInterface.elna b/source/CommandLineInterface.elna new file mode 100644 index 0000000..22389dc --- /dev/null +++ b/source/CommandLineInterface.elna @@ -0,0 +1,89 @@ +module; + +from SYSTEM import ADR, TSIZE; + +from Args import GetArg, Narg; +from FIO import WriteString, WriteChar, WriteLine, StdErr; +from Storage import ALLOCATE; +from Strings import CompareStr, Length; +from MemUtils import MemZero; + +from Common import ShortString; + +proc parse_command_line() -> PCommandLine; +var + parameter: ShortString; + i: CARDINAL; + result: PCommandLine; + parsed: BOOLEAN; +begin + i := 1; + NEW(result); + result^.lex := false; + result^.parse := false; + MemZero(ADR(result^.input), 256); + result^.output[1] := CHAR(0); + + while (i < Narg()) & (result <> nil) do + parsed := GetArg(parameter, i); + parsed := false; + + if CompareStr(parameter, '--lex') = 0 then + parsed := true; + result^.lex := true + end; + if CompareStr(parameter, '--parse') = 0 then + parsed := true; + result^.parse := true + end; + if CompareStr(parameter, '-o') = 0 then + INC(i); + + if i = Narg() then + WriteString(StdErr, 'Fatal error: expecting a file name following -o.'); + result := nil + end; + if i < Narg() then + parsed := GetArg(parameter, i); + result^.output := parameter + end; + parsed := true + end; + if (parameter[1] <> '-') & (parsed = false) then + parsed := true; + + if Length(result^.input) > 0 then + WriteString(StdErr, 'Fatal error: only one source file can be compiled at once. First given "'); + WriteString(StdErr, result^.input); + WriteString(StdErr, '", then "'); + WriteString(StdErr, parameter); + WriteString(StdErr, '".'); + WriteLine(StdErr); + result := nil + end; + if result <> nil then + result^.input := parameter + end + end; + if parsed = false then + WriteString(StdErr, 'Fatal error: unknown command line options: '); + + WriteString(StdErr, parameter); + WriteChar(StdErr, '.'); + WriteLine(StdErr); + + result := nil + end; + + i := i + 1 + end; + if (result <> nil) & (Length(result^.input) = 0) then + WriteString(StdErr, 'Fatal error: no input files.'); + WriteLine(StdErr); + result := nil + end; + + return result +end; + +end. diff --git a/source/Common.def b/source/Common.def new file mode 100644 index 0000000..9520230 --- /dev/null +++ b/source/Common.def @@ -0,0 +1,12 @@ +DEFINITION MODULE Common; + +TYPE + ShortString = ARRAY[1..256] OF CHAR; + Identifier = ARRAY[1..256] OF CHAR; + PIdentifier = POINTER TO Identifier; + TextLocation = RECORD + line: CARDINAL; + column: CARDINAL + END; + +END Common. diff --git a/source/Common.elna b/source/Common.elna new file mode 100644 index 0000000..fa158b7 --- /dev/null +++ b/source/Common.elna @@ -0,0 +1,3 @@ +module; + +end. diff --git a/source/Compiler.elna b/source/Compiler.elna new file mode 100644 index 0000000..f72f091 --- /dev/null +++ b/source/Compiler.elna @@ -0,0 +1,73 @@ +program; + +from FIO import Close, IsNoError, File, OpenToRead, OpenToWrite, StdErr, StdOut, WriteLine, WriteString; +from SYSTEM import ADR; +from M2RTS import HALT, ExitOnHalt; + +from Lexer import Lexer, lexer_destroy, lexer_initialize; +from Parser import Parser; +from Transpiler import transpile; +from CommandLineInterface import PCommandLine, parse_command_line; +from Parser import PAstModule, parse; +from Strings import Length; + +var + command_line: PCommandLine; + +proc compile_from_stream(); +var + lexer: Lexer; + source_input: File; + source_output: File; + ast_module: PAstModule; +begin + source_input := OpenToRead(command_line^.input); + + if IsNoError(source_input) = false then + WriteString(StdErr, 'Fatal error: failed to read the input file "'); + WriteString(StdErr, command_line^.input); + WriteString(StdErr, '".'); + WriteLine(StdErr); + + ExitOnHalt(2) + end; + source_output := nil; + + if Length(command_line^.output) > 0 then + source_output := OpenToWrite(command_line^.output); + + if IsNoError(source_output) = false then + WriteString(StdErr, 'Fatal error: failed to create the output file "'); + WriteString(StdErr, command_line^.output); + WriteString(StdErr, '".'); + WriteLine(StdErr); + + ExitOnHalt(2) + end + end; + + if IsNoError(source_input) then + lexer_initialize(ADR(lexer), source_input); + + ast_module := parse(ADR(lexer)); + transpile(ast_module, StdOut, source_output, command_line^.input); + + lexer_destroy(ADR(lexer)); + + Close(source_output); + Close(source_input) + end +end; + +begin + ExitOnHalt(0); + command_line := parse_command_line(); + + if command_line <> nil then + compile_from_stream() + end; + if command_line = nil then + ExitOnHalt(1) + end; + HALT() +end. diff --git a/source/Lexer.def b/source/Lexer.def new file mode 100644 index 0000000..883c604 --- /dev/null +++ b/source/Lexer.def @@ -0,0 +1,107 @@ +DEFINITION MODULE Lexer; + +FROM FIO IMPORT File; + +FROM Common IMPORT Identifier, ShortString, TextLocation; + +TYPE + PLexerBuffer = POINTER TO CHAR; + BufferPosition = RECORD + iterator: PLexerBuffer; + location: TextLocation + END; + PBufferPosition = POINTER TO BufferPosition; + Lexer = RECORD + input: File; + buffer: PLexerBuffer; + size: CARDINAL; + length: CARDINAL; + start: BufferPosition; + current: BufferPosition + END; + PLexer = POINTER TO Lexer; + LexerKind = ( + lexerKindEof, + lexerKindIdentifier, + lexerKindIf, + lexerKindThen, + lexerKindElse, + lexerKindElsif, + lexerKindWhile, + lexerKindDo, + lexerKindProc, + lexerKindBegin, + lexerKindEnd, + lexerKindXor, + lexerKindConst, + lexerKindVar, + lexerKindCase, + lexerKindOf, + lexerKindType, + lexerKindRecord, + lexerKindUnion, + lexerKindPipe, + lexerKindTo, + lexerKindBoolean, + lexerKindNull, + lexerKindAnd, + lexerKindOr, + lexerKindTilde, + lexerKindReturn, + lexerKindDefer, + lexerKindRange, + lexerKindLeftParen, + lexerKindRightParen, + lexerKindLeftSquare, + lexerKindRightSquare, + lexerKindGreaterEqual, + lexerKindLessEqual, + lexerKindGreaterThan, + lexerKindLessThan, + lexerKindNotEqual, + lexerKindEqual, + lexerKindSemicolon, + lexerKindDot, + lexerKindComma, + lexerKindPlus, + lexerKindMinus, + lexerKindAsterisk, + lexerKindDivision, + lexerKindRemainder, + lexerKindAssignment, + lexerKindColon, + lexerKindHat, + lexerKindAt, + lexerKindComment, + lexerKindInteger, + lexerKindWord, + lexerKindCharacter, + lexerKindString, + lexerKindFrom, + lexerKindPointer, + lexerKindArray, + lexerKindArrow, + lexerKindProgram, + lexerKindModule, + lexerKindImport + ); + LexerToken = RECORD + CASE kind: LexerKind OF + lexerKindBoolean: booleanKind: BOOLEAN | + lexerKindIdentifier: identifierKind: Identifier | + lexerKindInteger: integerKind: INTEGER | + lexerKindString: stringKind: ShortString + END; + start_location: TextLocation; + end_location: TextLocation + END; + PLexerToken = POINTER TO LexerToken; + +PROCEDURE lexer_initialize(lexer: PLexer; input: File); +PROCEDURE lexer_destroy(lexer: PLexer); +(* Returns the last read token. *) +PROCEDURE lexer_current(lexer: PLexer): LexerToken; +(* Read and return the next token. *) +PROCEDURE lexer_lex(lexer: PLexer): LexerToken; + +END Lexer. diff --git a/source/Lexer.elna b/source/Lexer.elna new file mode 100644 index 0000000..8566d4c --- /dev/null +++ b/source/Lexer.elna @@ -0,0 +1,876 @@ +module; + +from FIO import ReadNBytes; +from SYSTEM import ADR, TSIZE; + +from DynamicStrings import String, InitStringCharStar, KillString; +from StringConvert import StringToInteger; +from Storage import DEALLOCATE, ALLOCATE; +from Strings import Length; +from MemUtils import MemCopy, MemZero; +from StrCase import Lower; + +const + CHUNK_SIZE = 85536; + +type + (* + * Classification table assigns each possible character to a group (class). All + * characters of the same group a handled equivalently. + * + * Classification: + *) + TransitionClass = ( + transitionClassInvalid, + transitionClassDigit, + transitionClassAlpha, + transitionClassSpace, + transitionClassColon, + transitionClassEquals, + transitionClassLeftParen, + transitionClassRightParen, + transitionClassAsterisk, + transitionClassUnderscore, + transitionClassSingle, + transitionClassHex, + transitionClassZero, + transitionClassX, + transitionClassEof, + transitionClassDot, + transitionClassMinus, + transitionClassSingleQuote, + transitionClassDoubleQuote, + transitionClassGreater, + transitionClassLess, + transitionClassOther + ); + TransitionState = ( + transitionStateStart, + transitionStateColon, + transitionStateIdentifier, + transitionStateDecimal, + transitionStateGreater, + transitionStateMinus, + transitionStateLeftParen, + transitionStateLess, + transitionStateDot, + transitionStateComment, + transitionStateClosingComment, + transitionStateCharacter, + transitionStateString, + transitionStateLeadingZero, + transitionStateDecimalSuffix, + transitionStateEnd + ); + TransitionAction = proc(PLexer, PLexerToken); + Transition = record + action: TransitionAction; + next_state: TransitionState + end; + TransitionClasses = [22]Transition; + +var + classification: [128]TransitionClass; + transitions: [16]TransitionClasses; + +proc initialize_classification(); +var + i: CARDINAL; +begin + classification[1] := transitionClassEof; (* NUL *) + classification[2] := transitionClassInvalid; (* SOH *) + classification[3] := transitionClassInvalid; (* STX *) + classification[4] := transitionClassInvalid; (* ETX *) + classification[5] := transitionClassInvalid; (* EOT *) + classification[6] := transitionClassInvalid; (* EMQ *) + classification[7] := transitionClassInvalid; (* ACK *) + classification[8] := transitionClassInvalid; (* BEL *) + classification[9] := transitionClassInvalid; (* BS *) + classification[10] := transitionClassSpace; (* HT *) + classification[11] := transitionClassSpace; (* LF *) + classification[12] := transitionClassInvalid; (* VT *) + classification[13] := transitionClassInvalid; (* FF *) + classification[14] := transitionClassSpace; (* CR *) + classification[15] := transitionClassInvalid; (* SO *) + classification[16] := transitionClassInvalid; (* SI *) + classification[17] := transitionClassInvalid; (* DLE *) + classification[18] := transitionClassInvalid; (* DC1 *) + classification[19] := transitionClassInvalid; (* DC2 *) + classification[20] := transitionClassInvalid; (* DC3 *) + classification[21] := transitionClassInvalid; (* DC4 *) + classification[22] := transitionClassInvalid; (* NAK *) + classification[23] := transitionClassInvalid; (* SYN *) + classification[24] := transitionClassInvalid; (* ETB *) + classification[25] := transitionClassInvalid; (* CAN *) + classification[26] := transitionClassInvalid; (* EM *) + classification[27] := transitionClassInvalid; (* SUB *) + classification[28] := transitionClassInvalid; (* ESC *) + classification[29] := transitionClassInvalid; (* FS *) + classification[30] := transitionClassInvalid; (* GS *) + classification[31] := transitionClassInvalid; (* RS *) + classification[32] := transitionClassInvalid; (* US *) + classification[33] := transitionClassSpace; (* Space *) + classification[34] := transitionClassSingle; (* ! *) + classification[35] := transitionClassDoubleQuote; (* " *) + classification[36] := transitionClassOther; (* # *) + classification[37] := transitionClassOther; (* $ *) + classification[38] := transitionClassSingle; (* % *) + classification[39] := transitionClassSingle; (* & *) + classification[40] := transitionClassSingleQuote; (* ' *) + classification[41] := transitionClassLeftParen; (* ( *) + classification[42] := transitionClassRightParen; (* ) *) + classification[43] := transitionClassAsterisk; (* * *) + classification[44] := transitionClassSingle; (* + *) + classification[45] := transitionClassSingle; (* , *) + classification[46] := transitionClassMinus; (* - *) + classification[47] := transitionClassDot; (* . *) + classification[48] := transitionClassSingle; (* / *) + classification[49] := transitionClassZero; (* 0 *) + classification[50] := transitionClassDigit; (* 1 *) + classification[51] := transitionClassDigit; (* 2 *) + classification[52] := transitionClassDigit; (* 3 *) + classification[53] := transitionClassDigit; (* 4 *) + classification[54] := transitionClassDigit; (* 5 *) + classification[55] := transitionClassDigit; (* 6 *) + classification[56] := transitionClassDigit; (* 7 *) + classification[57] := transitionClassDigit; (* 8 *) + classification[58] := transitionClassDigit; (* 9 *) + classification[59] := transitionClassColon; (* : *) + classification[60] := transitionClassSingle; (* ; *) + classification[61] := transitionClassLess; (* < *) + classification[62] := transitionClassEquals; (* = *) + classification[63] := transitionClassGreater; (* > *) + classification[64] := transitionClassOther; (* ? *) + classification[65] := transitionClassSingle; (* @ *) + classification[66] := transitionClassAlpha; (* A *) + classification[67] := transitionClassAlpha; (* B *) + classification[68] := transitionClassAlpha; (* C *) + classification[69] := transitionClassAlpha; (* D *) + classification[70] := transitionClassAlpha; (* E *) + classification[71] := transitionClassAlpha; (* F *) + classification[72] := transitionClassAlpha; (* G *) + classification[73] := transitionClassAlpha; (* H *) + classification[74] := transitionClassAlpha; (* I *) + classification[75] := transitionClassAlpha; (* J *) + classification[76] := transitionClassAlpha; (* K *) + classification[77] := transitionClassAlpha; (* L *) + classification[78] := transitionClassAlpha; (* M *) + classification[79] := transitionClassAlpha; (* N *) + classification[80] := transitionClassAlpha; (* O *) + classification[81] := transitionClassAlpha; (* P *) + classification[82] := transitionClassAlpha; (* Q *) + classification[83] := transitionClassAlpha; (* R *) + classification[84] := transitionClassAlpha; (* S *) + classification[85] := transitionClassAlpha; (* T *) + classification[86] := transitionClassAlpha; (* U *) + classification[87] := transitionClassAlpha; (* V *) + classification[88] := transitionClassAlpha; (* W *) + classification[89] := transitionClassAlpha; (* X *) + classification[90] := transitionClassAlpha; (* Y *) + classification[91] := transitionClassAlpha; (* Z *) + classification[92] := transitionClassSingle; (* [ *) + classification[93] := transitionClassOther; (* \ *) + classification[94] := transitionClassSingle; (* ] *) + classification[95] := transitionClassSingle; (* ^ *) + classification[96] := transitionClassUnderscore; (* _ *) + classification[97] := transitionClassOther; (* ` *) + classification[98] := transitionClassHex; (* a *) + classification[99] := transitionClassHex; (* b *) + classification[100] := transitionClassHex; (* c *) + classification[101] := transitionClassHex; (* d *) + classification[102] := transitionClassHex; (* e *) + classification[103] := transitionClassHex; (* f *) + classification[104] := transitionClassAlpha; (* g *) + classification[105] := transitionClassAlpha; (* h *) + classification[106] := transitionClassAlpha; (* i *) + classification[107] := transitionClassAlpha; (* j *) + classification[108] := transitionClassAlpha; (* k *) + classification[109] := transitionClassAlpha; (* l *) + classification[110] := transitionClassAlpha; (* m *) + classification[111] := transitionClassAlpha; (* n *) + classification[112] := transitionClassAlpha; (* o *) + classification[113] := transitionClassAlpha; (* p *) + classification[114] := transitionClassAlpha; (* q *) + classification[115] := transitionClassAlpha; (* r *) + classification[116] := transitionClassAlpha; (* s *) + classification[117] := transitionClassAlpha; (* t *) + classification[118] := transitionClassAlpha; (* u *) + classification[119] := transitionClassAlpha; (* v *) + classification[120] := transitionClassAlpha; (* w *) + classification[121] := transitionClassX; (* x *) + classification[122] := transitionClassAlpha; (* y *) + classification[123] := transitionClassAlpha; (* z *) + classification[124] := transitionClassOther; (* { *) + classification[125] := transitionClassSingle; (* | *) + classification[126] := transitionClassOther; (* } *) + classification[127] := transitionClassSingle; (* ~ *) + classification[128] := transitionClassInvalid; (* DEL *) + + i := 129; + while i <= 256 do + classification[i] := transitionClassOther; + INC(i) + end +end; + +proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN; +var + result: BOOLEAN; + index: CARDINAL; + keyword_length: CARDINAL; + continue: BOOLEAN; +begin + index := 0; + result := true; + keyword_length := Length(keyword); + continue := (index < keyword_length) & (token_start.iterator <> token_end); + + while continue & result do + result := (keyword[index] = token_start.iterator^) or (Lower(keyword[index]) = token_start.iterator^); + INC(token_start.iterator); + INC(index); + continue := (index < keyword_length) & (token_start.iterator <> token_end) + end; + result := result & (index = Length(keyword)); + + return result & (token_start.iterator = token_end) +end; + +(* Reached the end of file. *) +proc transition_action_eof(lexer: PLexer, token: PLexerToken); +begin + token^.kind := lexerKindEof +end; + +proc increment(position: PBufferPosition); +begin + INC(position^.iterator) +end; + +(* Add the character to the token currently read and advance to the next character. *) +proc transition_action_accumulate(lexer: PLexer, token: PLexerToken); +begin + increment(ADR(lexer^.current)) +end; + +(* The current character is not a part of the token. Finish the token already + * read. Don't advance to the next character. *) +proc transition_action_finalize(lexer: PLexer, token: PLexerToken); +begin + if lexer^.start.iterator^ = ':' then + token^.kind := lexerKindColon + end; + if lexer^.start.iterator^ = '>' then + token^.kind := lexerKindGreaterThan + end; + if lexer^.start.iterator^ = '<' then + token^.kind := lexerKindLessThan + end; + if lexer^.start.iterator^ = '(' then + token^.kind := lexerKindLeftParen + end; + if lexer^.start.iterator^ = '-' then + token^.kind := lexerKindMinus + end; + if lexer^.start.iterator^ = '.' then + token^.kind := lexerKindDot + end +end; + +(* An action for tokens containing multiple characters. *) +proc transition_action_composite(lexer: PLexer, token: PLexerToken); +begin + if lexer^.start.iterator^ = '<' then + if lexer^.current.iterator^ = '>' then + token^.kind := lexerKindNotEqual + end; + if lexer^.current.iterator^ = '=' then + token^.kind := lexerKindLessEqual + end + end; + if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then + token^.kind := lexerKindGreaterEqual + end; + if (lexer^.start.iterator^ = '.') & (lexer^.current.iterator^ = '.') then + token^.kind := lexerKindRange + end; + if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then + token^.kind := lexerKindAssignment + end; + if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then + token^.kind := lexerKindArrow + end; + increment(ADR(lexer^.current)) +end; + +(* Skip a space. *) +proc transition_action_skip(lexer: PLexer, token: PLexerToken); +begin + increment(ADR(lexer^.start)); + + if ORD(lexer^.start.iterator^) = 10 then + INC(lexer^.start.location.line); + lexer^.start.location.column := 1 + end; + lexer^.current := lexer^.start +end; + +(* Delimited string action. *) +proc transition_action_delimited(lexer: PLexer, token: PLexerToken); +var + text_length: CARDINAL; +begin + if lexer^.start.iterator^ = '(' then + token^.kind := lexerKindComment + end; + if lexer^.start.iterator^ = '"' then + text_length := lexer^.current.iterator; + DEC(text_length, lexer^.start.iterator); + INC(text_length); + + MemZero(ADR(token^.stringKind), TSIZE(ShortString)); + MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind)); + + token^.kind := lexerKindCharacter + end; + if lexer^.start.iterator^ = "'" then + text_length := lexer^.current.iterator; + DEC(text_length, lexer^.start.iterator); + INC(text_length); + + MemZero(ADR(token^.stringKind), TSIZE(ShortString)); + MemCopy(lexer^.start.iterator, text_length, ADR(token^.stringKind)); + + token^.kind := lexerKindString + end; + increment(ADR(lexer^.current)) +end; + +(* Finalize keyword or identifier. *) +proc transition_action_key_id(lexer: PLexer, token: PLexerToken); +begin + token^.kind := lexerKindIdentifier; + + token^.identifierKind[1] := lexer^.current.iterator; + DEC(token^.identifierKind[1], lexer^.start.iterator); + MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); + + if compare_keyword('program', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindProgram + end; + if compare_keyword('import', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindImport + end; + if compare_keyword('const', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindConst + end; + if compare_keyword('var', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindVar + end; + if compare_keyword('if', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindIf + end; + if compare_keyword('then', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindThen + end; + if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindElsif + end; + if compare_keyword('else', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindElse + end; + if compare_keyword('while', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindWhile + end; + if compare_keyword('do', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindDo + end; + if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindProc + end; + if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindBegin + end; + if compare_keyword('end', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindEnd + end; + if compare_keyword('type', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindType + end; + if compare_keyword('record', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindRecord + end; + if compare_keyword('union', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindUnion + end; + if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindNull + end; + if compare_keyword('or', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindOr + end; + if compare_keyword('return', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindReturn + end; + if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindDefer + end; + if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindTo + end; + if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindCase + end; + if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindOf + end; + if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindFrom + end; + if compare_keyword('module', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindModule + end; + if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindXor + end; + if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindPointer + end; + if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindArray + end; + if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindBoolean; + token^.booleanKind := true + end; + if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then + token^.kind := lexerKindBoolean; + token^.booleanKind := false + end +end; + +(* Action for tokens containing only one character. The character cannot be + * followed by other characters forming a composite token. *) +proc transition_action_single(lexer: PLexer, token: PLexerToken); +begin + if lexer^.current.iterator^ = '&' then + token^.kind := lexerKindAnd + end; + if lexer^.current.iterator^ = ';' then + token^.kind := lexerKindSemicolon + end; + if lexer^.current.iterator^ = ',' then + token^.kind := lexerKindComma + end; + if lexer^.current.iterator^ = '~' then + token^.kind := lexerKindTilde + end; + if lexer^.current.iterator^ = ')' then + token^.kind := lexerKindRightParen + end; + if lexer^.current.iterator^ = '[' then + token^.kind := lexerKindLeftSquare + end; + if lexer^.current.iterator^ = ']' then + token^.kind := lexerKindRightSquare + end; + if lexer^.current.iterator^ = '^' then + token^.kind := lexerKindHat + end; + if lexer^.current.iterator^ = '=' then + token^.kind := lexerKindEqual + end; + if lexer^.current.iterator^ = '+' then + token^.kind := lexerKindPlus + end; + if lexer^.current.iterator^ = '*' then + token^.kind := lexerKindAsterisk + end; + if lexer^.current.iterator^ = '/' then + token^.kind := lexerKindDivision + end; + if lexer^.current.iterator^ = '%' then + token^.kind := lexerKindRemainder + end; + if lexer^.current.iterator^ = '@' then + token^.kind := lexerKindAt + end; + if lexer^.current.iterator^ = '|' then + token^.kind := lexerKindPipe + end; + increment(ADR(lexer^.current.iterator)) +end; + +(* Handle an integer literal. *) +proc transition_action_integer(lexer: PLexer, token: PLexerToken); +var + buffer: String; + integer_length: CARDINAL; + found: BOOLEAN; +begin + token^.kind := lexerKindInteger; + + integer_length := lexer^.current.iterator; + DEC(integer_length, lexer^.start.iterator); + MemZero(ADR(token^.identifierKind), TSIZE(Identifier)); + MemCopy(lexer^.start.iterator, integer_length, ADR(token^.identifierKind[1])); + + buffer := InitStringCharStar(ADR(token^.identifierKind[1])); + token^.integerKind := StringToInteger(buffer, 10, found); + buffer := KillString(buffer) +end; + +proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState); +var + default_transition: Transition; +begin + default_transition.action := default_action; + default_transition.next_state := next_state; + + transitions[ORD(current_state) + 1][ORD(transitionClassInvalid) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassDigit) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassAlpha) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassSpace) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassColon) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassEquals) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassLeftParen) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassRightParen) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassAsterisk) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassUnderscore) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassSingle) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassHex) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassZero) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassX) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassEof) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassDot) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassMinus) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassSingleQuote) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassDoubleQuote) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassGreater) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassLess) + 1] := default_transition; + transitions[ORD(current_state) + 1][ORD(transitionClassOther) + 1] := default_transition +end; + +(* + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + * + * Each cell is a word long. + * - The least significant byte of the word is a row number (beginning with 0). + * It specifies the target state. "ff" means that this is an end state and no + * transition is possible. + * - The next byte is the action that should be performed when transitioning. + * For the meaning of actions see labels in the lex_next function, which + * handles each action. + *) +proc initialize_transitions(); +begin + (* Start state. *) + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].action := nil; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateDecimal; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].action := transition_action_skip; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].next_state := transitionStateStart; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].next_state := transitionStateColon; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].action := transition_action_single; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].next_state := transitionStateLeftParen; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].action := transition_action_single; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_single; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].action := transition_action_single; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateLeadingZero; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].action := transition_action_eof; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].next_state := transitionStateDot; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].next_state := transitionStateMinus; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].next_state := transitionStateCharacter; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].next_state := transitionStateString; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateGreater; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].next_state := transitionStateLess; + + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].action := nil; + transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].next_state := transitionStateEnd; + + (* Colon state. *) + set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd); + + transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite; + transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd; + + (* Identifier state. *) + set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd); + + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateIdentifier; + + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].next_state := transitionStateIdentifier; + + (* Decimal state. *) + set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd); + + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateDecimal; + + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateDecimalSuffix; + + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].action := nil; + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateDecimalSuffix; + + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateDecimal; + + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].next_state := transitionStateDecimalSuffix; + + (* Greater state. *) + set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd); + + transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite; + transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd; + + (* Minus state. *) + set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd); + + transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].action := transition_action_composite; + transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateEnd; + + (* Left paren state. *) + set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd); + + transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateComment; + + (* Less state. *) + set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd); + + transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].action := transition_action_composite; + transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].action := transition_action_composite; + transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].next_state := transitionStateEnd; + + (* Hexadecimal after 0x. *) + set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd); + + transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].action := transition_action_composite; + transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].next_state := transitionStateEnd; + + (* Comment. *) + set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment); + + transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateClosingComment; + + transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].action := nil; + transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd; + + (* Closing comment. *) + set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment); + + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].action := nil; + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].action := transition_action_delimited; + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].action := transition_action_accumulate; + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].next_state := transitionStateClosingComment; + + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].action := nil; + transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd; + + (* Character. *) + set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter); + + transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].action := nil; + transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].action := nil; + transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].action := transition_action_delimited; + transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].next_state := transitionStateEnd; + + (* String. *) + set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString); + + transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].action := nil; + transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].action := nil; + transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].action := transition_action_delimited; + transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].next_state := transitionStateEnd; + + (* Leading zero. *) + set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd); + + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].action := nil; + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].action := nil; + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].action := nil; + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].action := nil; + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].action := nil; + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].action := nil; + transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd; + + (* Digit with a character suffix. *) + set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd); + + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].action := nil; + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].action := nil; + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].action := nil; + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].action := nil; + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].next_state := transitionStateEnd; + + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].action := nil; + transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd +end; + +proc lexer_initialize(lexer: PLexer, input: File); +begin + lexer^.input := input; + lexer^.length := 0; + + ALLOCATE(lexer^.buffer, CHUNK_SIZE); + MemZero(lexer^.buffer, CHUNK_SIZE); + lexer^.size := CHUNK_SIZE +end; + +proc lexer_current(lexer: PLexer) -> LexerToken; +var + current_class: TransitionClass; + current_state: TransitionState; + current_transition: Transition; + result: LexerToken; + index1: CARDINAL; + index2: CARDINAL; +begin + lexer^.current := lexer^.start; + current_state := transitionStateStart; + + while current_state <> transitionStateEnd DO + index1 := ORD(lexer^.current.iterator^); + INC(index1); + current_class := classification[index1]; + + index1 := ORD(current_state); + INC(index1); + index2 := ORD(current_class); + INC(index2); + + current_transition := transitions[index1][index2]; + if current_transition.action <> nil then + current_transition.action(lexer, ADR(result)) + end; + current_state := current_transition.next_state + end; + result.start_location := lexer^.start.location; + result.end_location := lexer^.current.location; + + return result +end; + +proc lexer_lex(lexer: PLexer) -> LexerToken; +var + result: LexerToken; +begin + if lexer^.length = 0 then + lexer^.length := ReadNBytes(lexer^.input, CHUNK_SIZE, lexer^.buffer); + lexer^.current.location.column := 1; + lexer^.current.location.line := 1; + lexer^.current.iterator := lexer^.buffer + end; + lexer^.start := lexer^.current; + + result := lexer_current(lexer); + return result +end; + +proc lexer_destroy(lexer: PLexer); +begin + DEALLOCATE(lexer^.buffer, lexer^.size) +end; + +begin + initialize_classification(); + initialize_transitions() +end. diff --git a/source/Parser.def b/source/Parser.def new file mode 100644 index 0000000..a766e8e --- /dev/null +++ b/source/Parser.def @@ -0,0 +1,200 @@ +DEFINITION MODULE Parser; + +FROM Common IMPORT Identifier, PIdentifier, ShortString; +FROM Lexer IMPORT PLexer; + +TYPE + Parser = RECORD + lexer: PLexer + END; + PParser = POINTER TO Parser; + + AstLiteralKind = ( + astLiteralKindInteger, + astLiteralKindString, + astLiteralKindNull, + astLiteralKindBoolean + ); + AstLiteral = RECORD + CASE kind: AstLiteralKind OF + astLiteralKindInteger: integer: INTEGER | + astLiteralKindString: string: ShortString | + astLiteralKindNull: | + astLiteralKindBoolean: boolean: BOOLEAN + END + END; + PAstLiteral = POINTER TO AstLiteral; + + AstUnaryOperator = ( + astUnaryOperatorReference, + astUnaryOperatorNot, + astUnaryOperatorMinus + ); + AstBinaryOperator = ( + astBinaryOperatorSum, + astBinaryOperatorSubtraction, + astBinaryOperatorMultiplication, + astBinaryOperatorDivision, + astBinaryOperatorRemainder, + astBinaryOperatorEquals, + astBinaryOperatorNotEquals, + astBinaryOperatorLess, + astBinaryOperatorGreater, + astBinaryOperatorLessEqual, + astBinaryOperatorGreaterEqual, + astBinaryOperatorDisjunction, + astBinaryOperatorConjunction, + astBinaryOperatorExclusiveDisjunction, + astBinaryOperatorShiftLeft, + astBinaryOperatorShiftRight + ); + + AstExpressionKind = ( + astExpressionKindLiteral, + astExpressionKindIdentifier, + astExpressionKindArrayAccess, + astExpressionKindDereference, + astExpressionKindFieldAccess, + astExpressionKindUnary, + astExpressionKindBinary, + astExpressionKindCall + ); + AstExpression = RECORD + CASE kind: AstExpressionKind OF + astExpressionKindLiteral: literal: PAstLiteral | + astExpressionKindIdentifier: identifier: Identifier | + astExpressionKindDereference: reference: PAstExpression | + astExpressionKindArrayAccess: + array: PAstExpression; + index: PAstExpression | + astExpressionKindFieldAccess: + aggregate: PAstExpression; + field: Identifier | + astExpressionKindUnary: + unary_operator: AstUnaryOperator; + unary_operand: PAstExpression | + astExpressionKindBinary: + binary_operator: AstBinaryOperator; + lhs: PAstExpression; + rhs: PAstExpression | + astExpressionKindCall: + callable: PAstExpression; + argument_count: CARDINAL; + arguments: PPAstExpression + END + END; + PAstExpression = POINTER TO AstExpression; + PPAstExpression = POINTER TO PAstExpression; + + AstStatementKind = ( + astStatementKindIf, + astStatementKindWhile, + astStatementKindAssignment, + astStatementKindReturn, + astStatementKindCall + ); + AstStatement = RECORD + CASE kind: AstStatementKind OF + astStatementKindIf: + if_condition: PAstExpression; + if_branch: AstCompoundStatement | + astStatementKindWhile: + while_condition: PAstExpression; + while_body: AstCompoundStatement | + astStatementKindAssignment: + assignee: PAstExpression; + assignment: PAstExpression | + astStatementKindReturn: returned: PAstExpression | + astStatementKindCall: call: PAstExpression + END + END; + PAstStatement = POINTER TO AstStatement; + PPAstStatement = POINTER TO PAstStatement; + AstCompoundStatement = RECORD + count: CARDINAL; + statements: PPAstStatement + END; + + AstImportStatement = RECORD + package: Identifier; + symbols: PIdentifier + END; + PAstImportStatement = POINTER TO AstImportStatement; + PPAstImportStatement = POINTER TO PAstImportStatement; + + AstConstantDeclaration = RECORD + constant_name: Identifier; + constant_value: INTEGER + END; + PAstConstantDeclaration = POINTER TO AstConstantDeclaration; + PPAstConstantDeclaration = POINTER TO PAstConstantDeclaration; + + AstFieldDeclaration = RECORD + field_name: Identifier; + field_type: PAstTypeExpression + END; + PAstFieldDeclaration = POINTER TO AstFieldDeclaration; + + AstTypeExpressionKind = ( + astTypeExpressionKindNamed, + astTypeExpressionKindRecord, + astTypeExpressionKindEnumeration, + astTypeExpressionKindArray, + astTypeExpressionKindPointer, + astTypeExpressionKindProcedure + ); + AstTypeExpression = RECORD + CASE kind: AstTypeExpressionKind OF + astTypeExpressionKindNamed: name: Identifier | + astTypeExpressionKindEnumeration: cases: PIdentifier | + astTypeExpressionKindPointer: target: PAstTypeExpression | + astTypeExpressionKindRecord: fields: PAstFieldDeclaration | + astTypeExpressionKindArray: + base: PAstTypeExpression; + length: CARDINAL | + astTypeExpressionKindProcedure: parameters: PPAstTypeExpression + END + END; + PAstTypeExpression = POINTER TO AstTypeExpression; + PPAstTypeExpression = POINTER TO PAstTypeExpression; + + AstTypedDeclaration = RECORD + identifier: Identifier; + type_expression: PAstTypeExpression + END; + PAstTypedDeclaration = POINTER TO AstTypedDeclaration; + PPAstTypedDeclaration = POINTER TO PAstTypedDeclaration; + + AstVariableDeclaration = RECORD + variable_name: Identifier; + variable_type: PAstTypeExpression + END; + PAstVariableDeclaration = POINTER TO AstVariableDeclaration; + PPAstVariableDeclaration = POINTER TO PAstVariableDeclaration; + + AstProcedureDeclaration = RECORD + name: Identifier; + parameter_count: CARDINAL; + parameters: PAstTypedDeclaration; + return_type: PAstTypeExpression; + constants: PPAstConstantDeclaration; + variables: PPAstVariableDeclaration; + statements: AstCompoundStatement + END; + PAstProcedureDeclaration = POINTER TO AstProcedureDeclaration; + PPAstProcedureDeclaration = POINTER TO PAstProcedureDeclaration; + + AstModule = RECORD + main: BOOLEAN; + imports: PPAstImportStatement; + constants: PPAstConstantDeclaration; + types: PPAstTypedDeclaration; + variables: PPAstVariableDeclaration; + procedures: PPAstProcedureDeclaration; + statements: AstCompoundStatement + END; + PAstModule = POINTER TO AstModule; + +PROCEDURE parse(lexer: PLexer): PAstModule; + +END Parser. diff --git a/source/Parser.elna b/source/Parser.elna new file mode 100644 index 0000000..d870036 --- /dev/null +++ b/source/Parser.elna @@ -0,0 +1,1008 @@ +module; + +from FIO import ReadNBytes; +from SYSTEM import TSIZE, ADR; + +from MemUtils import MemZero; +from Storage import ALLOCATE, REALLOCATE; + +from Lexer import Lexer, LexerKind, LexerToken, lexer_current, lexer_lex; + +(* Calls lexer_lex() but skips the comments. *) +proc parser_lex(lexer: PLexer) -> LexerToken; +var + result: LexerToken; +begin + result := lexer_lex(lexer); + + while result.kind = lexerKindComment do + result := lexer_lex(lexer) + end; + + return result +end; + +proc parse_type_fields(parser: PParser) -> PAstFieldDeclaration; +var + token: LexerToken; + field_declarations: PAstFieldDeclaration; + field_count: CARDINAL; + current_field: PAstFieldDeclaration; +begin + ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration)); + token := parser_lex(parser^.lexer); + field_count := 0; + + while token.kind <> lexerKindEnd do + INC(field_count); + INC(field_count); + REALLOCATE(field_declarations, TSIZE(AstFieldDeclaration) * field_count); + DEC(field_count); + current_field := field_declarations; + INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1)); + + token := parser_lex(parser^.lexer); + + current_field^.field_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + current_field^.field_type := parse_type_expression(parser); + token := parser_lex(parser^.lexer); + + if token.kind = lexerKindSemicolon then + token := parser_lex(parser^.lexer) + end + end; + INC(current_field, TSIZE(AstFieldDeclaration)); + MemZero(current_field, TSIZE(AstFieldDeclaration)); + + return field_declarations +end; + +proc parse_record_type(parser: PParser) -> PAstTypeExpression; +var + result: PAstTypeExpression; +begin + NEW(result); + result^.kind := astTypeExpressionKindRecord; + result^.fields := parse_type_fields(parser); + + return result +end; + +proc parse_pointer_type(parser: PParser) -> PAstTypeExpression; +var + token: LexerToken; + result: PAstTypeExpression; +begin + NEW(result); + result^.kind := astTypeExpressionKindPointer; + + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindPointer then + token := parser_lex(parser^.lexer) + end; + token := lexer_current(parser^.lexer); + result^.target := parse_type_expression(parser); + + return result +end; + +proc parse_array_type(parser: PParser) -> PAstTypeExpression; +var + token: LexerToken; + buffer: [20]CHAR; + result: PAstTypeExpression; +begin + NEW(result); + result^.kind := astTypeExpressionKindArray; + result^.length := 0; + + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindArray then + token := parser_lex(parser^.lexer) + end; + if token.kind <> lexerKindOf then + token := parser_lex(parser^.lexer); + + result^.length := token.integerKind; + + token := parser_lex(parser^.lexer) + end; + token := parser_lex(parser^.lexer); + result^.base := parse_type_expression(parser); + + return result +end; + +proc parse_enumeration_type(parser: PParser) -> PAstTypeExpression; +var + token: LexerToken; + result: PAstTypeExpression; + current_case: PIdentifier; + case_count: CARDINAL; +begin + NEW(result); + result^.kind := astTypeExpressionKindEnumeration; + + case_count := 1; + ALLOCATE(result^.cases, TSIZE(Identifier) * 2); + token := parser_lex(parser^.lexer); + current_case := result^.cases; + current_case^ := token.identifierKind; + + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindComma do + token := parser_lex(parser^.lexer); + + INC(case_count); + INC(case_count); + REALLOCATE(result^.cases, TSIZE(Identifier) * case_count); + DEC(case_count); + current_case := result^.cases; + INC(current_case, TSIZE(Identifier) * (case_count - 1)); + current_case^ := token.identifierKind; + + token := parser_lex(parser^.lexer) + end; + INC(current_case, TSIZE(Identifier)); + MemZero(current_case, TSIZE(Identifier)); + + return result +end; + +proc parse_named_type(parser: PParser) -> PAstTypeExpression; +var + token: LexerToken; + result: PAstTypeExpression; +begin + token := lexer_current(parser^.lexer); + NEW(result); + + result^.kind := astTypeExpressionKindNamed; + result^.name := token.identifierKind; + + return result +end; + +proc parse_procedure_type(parser: PParser) -> PAstTypeExpression; +var + token: LexerToken; + result: PAstTypeExpression; + current_parameter: PPAstTypeExpression; + parameter_count: CARDINAL; +begin + parameter_count := 0; + NEW(result); + result^.kind := astTypeExpressionKindProcedure; + + ALLOCATE(result^.parameters, 1); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + while token.kind <> lexerKindRightParen do + INC(parameter_count); + INC(parameter_count); + REALLOCATE(result^.parameters, TSIZE(PAstTypeExpression) * parameter_count); + DEC(parameter_count); + current_parameter := result^.parameters; + INC(current_parameter, TSIZE(PAstTypeExpression) * (parameter_count - 1)); + + current_parameter^ := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + if token.kind = lexerKindComma then + token := parser_lex(parser^.lexer) + end + end; + current_parameter := result^.parameters; + INC(current_parameter, TSIZE(PAstTypeExpression) * parameter_count); + current_parameter^ := nil; + + return result +end; + +proc parse_type_expression(parser: PParser) -> PAstTypeExpression; +var + token: LexerToken; + result: PAstTypeExpression; +begin + result := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindRecord then + result := parse_record_type(parser) + end; + if token.kind = lexerKindLeftParen then + result := parse_enumeration_type(parser) + end; + if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then + result := parse_array_type(parser) + end; + if token.kind = lexerKindHat then + result := parse_pointer_type(parser) + end; + if token.kind = lexerKindProc then + result := parse_procedure_type(parser) + end; + if token.kind = lexerKindIdentifier then + result := parse_named_type(parser) + end; + return result +end; + +proc parse_type_declaration(parser: PParser) -> PAstTypedDeclaration; +var + token: LexerToken; + result: PAstTypedDeclaration; +begin + token := lexer_current(parser^.lexer); + + NEW(result); + result^.identifier := token.identifierKind; + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + result^.type_expression := parse_type_expression(parser); + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_type_part(parser: PParser) -> PPAstTypedDeclaration; +var + token: LexerToken; + result: PPAstTypedDeclaration; + current_declaration: PPAstTypedDeclaration; + declaration_count: CARDINAL; +begin + token := lexer_current(parser^.lexer); + + ALLOCATE(result, TSIZE(PAstTypedDeclaration)); + current_declaration := result; + declaration_count := 0; + + if token.kind = lexerKindType then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + INC(declaration_count); + + REALLOCATE(result, TSIZE(PAstTypedDeclaration) * (declaration_count + 1)); + current_declaration := result; + INC(current_declaration, TSIZE(PAstTypedDeclaration) * (declaration_count - 1)); + + current_declaration^ := parse_type_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + INC(current_declaration, TSIZE(PAstTypedDeclaration)) + end; + current_declaration^ := nil; + + return result +end; + +proc parse_variable_declaration(parser: PParser) -> PAstVariableDeclaration; +var + token: LexerToken; + result: PAstVariableDeclaration; +begin + NEW(result); + + token := lexer_current(parser^.lexer); + result^.variable_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + token := parser_lex(parser^.lexer); + result^.variable_type := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_variable_part(parser: PParser) -> PPAstVariableDeclaration; +var + token: LexerToken; + result: PPAstVariableDeclaration; + current_declaration: PPAstVariableDeclaration; + declaration_count: CARDINAL; +begin + token := lexer_current(parser^.lexer); + + ALLOCATE(result, TSIZE(PAstVariableDeclaration)); + current_declaration := result; + declaration_count := 0; + + if token.kind = lexerKindVar then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + INC(declaration_count); + + REALLOCATE(result, TSIZE(PAstVariableDeclaration) * (declaration_count + 1)); + current_declaration := result; + INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1)); + + current_declaration^ := parse_variable_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + INC(current_declaration, TSIZE(PAstVariableDeclaration)) + end; + current_declaration^ := nil; + + return result +end; + +proc parse_constant_declaration(parser: PParser) -> PAstConstantDeclaration; +var + token: LexerToken; + result: PAstConstantDeclaration; +begin + NEW(result); + + token := lexer_current(parser^.lexer); + result^.constant_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + token := parser_lex(parser^.lexer); + result^.constant_value := token.integerKind; + + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_constant_part(parser: PParser) -> PPAstConstantDeclaration; +var + token: LexerToken; + result: PPAstConstantDeclaration; + current_declaration: PPAstConstantDeclaration; + declaration_count: CARDINAL; +begin + token := lexer_current(parser^.lexer); + + ALLOCATE(result, TSIZE(PAstConstantDeclaration)); + current_declaration := result; + declaration_count := 0; + + if token.kind = lexerKindConst then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + INC(declaration_count); + + REALLOCATE(result, TSIZE(PAstConstantDeclaration) * (declaration_count + 1)); + current_declaration := result; + INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1)); + + current_declaration^ := parse_constant_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + INC(current_declaration, TSIZE(PAstConstantDeclaration)) + end; + current_declaration^ := nil; + + return result +end; + +proc parse_import_statement(parser: PParser) -> PAstImportStatement; +var + result: PAstImportStatement; + token: LexerToken; + symbol_count: CARDINAL; + current_symbol: PIdentifier; +begin + NEW(result); + symbol_count := 1; + + token := parser_lex(parser^.lexer); + result^.package := token.identifierKind; + + token := parser_lex(parser^.lexer); + ALLOCATE(result^.symbols, TSIZE(Identifier) * 2); + + current_symbol := result^.symbols; + + token := parser_lex(parser^.lexer); + current_symbol^ := token.identifierKind; + + token := parser_lex(parser^.lexer); + while token.kind <> lexerKindSemicolon do + token := parser_lex(parser^.lexer); + INC(symbol_count); + + REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1)); + current_symbol := result^.symbols; + INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1)); + + current_symbol^ := token.identifierKind; + token := parser_lex(parser^.lexer) + end; + INC(current_symbol, TSIZE(Identifier)); + MemZero(current_symbol, TSIZE(Identifier)); + + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_import_part(parser: PParser) -> PPAstImportStatement; +var + token: LexerToken; + import_statement: PPAstImportStatement; + result: PPAstImportStatement; + import_count: CARDINAL; +begin + token := lexer_current(parser^.lexer); + ALLOCATE(result, TSIZE(PAstImportStatement)); + import_statement := result; + import_count := 0; + + while token.kind = lexerKindFrom do + INC(import_count); + + REALLOCATE(result, TSIZE(PAstImportStatement) * (import_count + 1)); + import_statement := result; + INC(import_statement, TSIZE(PAstImportStatement) * (import_count - 1)); + + import_statement^ := parse_import_statement(parser); + token := lexer_current(parser^.lexer) + end; + if import_count > 0 then + INC(import_statement, TSIZE(PAstImportStatement)) + end; + import_statement^ := nil; + + return result +end; + +proc parse_literal(parser: PParser) -> PAstLiteral; +var + literal: PAstLiteral; + token: LexerToken; +begin + literal := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindInteger then + NEW(literal); + + literal^.kind := astLiteralKindInteger; + literal^.integer := token.integerKind + end; + if (token.kind = lexerKindCharacter) or (token.kind = lexerKindString) then + NEW(literal); + + literal^.kind := astLiteralKindString; + literal^.string := token.stringKind + end; + if token.kind = lexerKindNull then + NEW(literal); + + literal^.kind := astLiteralKindNull + end; + if token.kind = lexerKindBoolean then + NEW(literal); + + literal^.kind := astLiteralKindBoolean; + literal^.boolean := token.booleanKind + end; + if literal <> nil then + token := parser_lex(parser^.lexer) + end; + + return literal +end; + +proc parse_factor(parser: PParser) -> PAstExpression; +var + next_token: LexerToken; + result: PAstExpression; + literal: PAstLiteral; +begin + result := nil; + next_token := lexer_current(parser^.lexer); + + literal := parse_literal(parser); + + if (result = nil) & (literal <> nil) then + NEW(result); + + result^.kind := astExpressionKindLiteral; + result^.literal := literal + end; + if (result = nil) & (next_token.kind = lexerKindMinus) then + NEW(result); + next_token := parser_lex(parser^.lexer); + + result^.kind := astExpressionKindUnary; + result^.unary_operator := astUnaryOperatorMinus; + result^.unary_operand := parse_factor(parser) + end; + if (result = nil) & (next_token.kind = lexerKindTilde) then + NEW(result); + next_token := parser_lex(parser^.lexer); + + result^.kind := astExpressionKindUnary; + result^.unary_operator := astUnaryOperatorNot; + result^.unary_operand := parse_factor(parser) + end; + if (result = nil) & (next_token.kind = lexerKindLeftParen) then + next_token := parser_lex(parser^.lexer); + result := parse_expression(parser); + if result <> nil then + next_token := parser_lex(parser^.lexer) + end + end; + if (result = nil) & (next_token.kind = lexerKindIdentifier) then + NEW(result); + + result^.kind := astExpressionKindIdentifier; + result^.identifier := next_token.identifierKind; + + next_token := parser_lex(parser^.lexer) + end; + + return result +end; + +proc parse_designator(parser: PParser) -> PAstExpression; +var + next_token: LexerToken; + inner_expression: PAstExpression; + designator: PAstExpression; + arguments: PPAstExpression; + handled: BOOLEAN; +begin + designator := parse_factor(parser); + handled := designator <> nil; + next_token := lexer_current(parser^.lexer); + + while handled do + inner_expression := designator; + handled := false; + + if ~handled & (next_token.kind = lexerKindHat) then + NEW(designator); + + designator^.kind := astExpressionKindDereference; + designator^.reference := inner_expression; + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindLeftSquare) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := astExpressionKindArrayAccess; + designator^.array := inner_expression; + designator^.index := parse_expression(parser); + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindDot) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := astExpressionKindFieldAccess; + designator^.aggregate := inner_expression; + designator^.field := next_token.identifierKind; + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindLeftParen) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := astExpressionKindCall; + designator^.callable := inner_expression; + designator^.argument_count := 0; + designator^.arguments := nil; + + if next_token.kind <> lexerKindRightParen then + ALLOCATE(designator^.arguments, TSIZE(PAstExpression)); + designator^.argument_count := 1; + designator^.arguments^ := parse_expression(parser); + + next_token := lexer_current(parser^.lexer); + + while next_token.kind = lexerKindComma do + next_token := parser_lex(parser^.lexer); + + designator^.argument_count := designator^.argument_count + 1; + REALLOCATE(designator^.arguments, TSIZE(PAstExpression) * designator^.argument_count); + arguments := designator^.arguments; + INC(arguments, TSIZE(PAstExpression) * (designator^.argument_count - 1)); + arguments^ := parse_expression(parser); + + next_token := lexer_current(parser^.lexer) + end + end; + + next_token := parser_lex(parser^.lexer); + handled := true + end + end; + + return designator +end; + +proc parse_binary_expression(parser: PParser, left: PAstExpression, operator: AstBinaryOperator) -> PAstExpression; +var + next_token: LexerToken; + result: PAstExpression; + right: PAstExpression; +begin + next_token := parser_lex(parser^.lexer); + right := parse_designator(parser); + result := nil; + + if right <> nil then + NEW(result); + result^.kind := astExpressionKindBinary; + result^.binary_operator := operator; + result^.lhs := left; + result^.rhs := right + end; + + return result +end; + +proc parse_expression(parser: PParser) -> PAstExpression; +var + next_token: LexerToken; + left: PAstExpression; + result: PAstExpression; + written_bytes: CARDINAL; +begin + left := parse_designator(parser); + result := nil; + next_token := lexer_current(parser^.lexer); + + if left <> nil then + if (result = nil) & (next_token.kind = lexerKindNotEqual) then + result := parse_binary_expression(parser, left, astBinaryOperatorNotEquals) + end; + if (result = nil) & (next_token.kind = lexerKindEqual) then + result := parse_binary_expression(parser, left, astBinaryOperatorEquals) + end; + if (result = nil) & (next_token.kind = lexerKindGreaterThan) then + result := parse_binary_expression(parser, left, astBinaryOperatorGreater) + end; + if (result = nil) & (next_token.kind = lexerKindLessThan) then + result := parse_binary_expression(parser, left, astBinaryOperatorLess) + end; + if (result = nil) & (next_token.kind = lexerKindGreaterEqual) then + result := parse_binary_expression(parser, left, astBinaryOperatorGreaterEqual) + end; + if (result = nil) & (next_token.kind = lexerKindLessEqual) then + result := parse_binary_expression(parser, left, astBinaryOperatorLessEqual) + end; + if (result = nil) & (next_token.kind = lexerKindAnd) then + result := parse_binary_expression(parser, left, astBinaryOperatorConjunction) + end; + if (result = nil) & (next_token.kind = lexerKindOr) then + result := parse_binary_expression(parser, left, astBinaryOperatorDisjunction) + end; + if (result = nil) & (next_token.kind = lexerKindMinus) then + result := parse_binary_expression(parser, left, astBinaryOperatorSubtraction) + end; + if (result = nil) & (next_token.kind = lexerKindPlus) then + result := parse_binary_expression(parser, left, astBinaryOperatorSum) + end; + if (result = nil) & (next_token.kind = lexerKindAsterisk) then + result := parse_binary_expression(parser, left, astBinaryOperatorMultiplication) + end + end; + if (result = nil) & (left <> nil) then + result := left + end; + + return result +end; + +proc parse_return_statement(parser: PParser) -> PAstStatement; +var + token: LexerToken; + result: PAstStatement; +begin + NEW(result); + result^.kind := astStatementKindReturn; + + token := parser_lex(parser^.lexer); + result^.returned := parse_expression(parser); + + return result +end; + +proc parse_assignment_statement(parser: PParser, assignee: PAstExpression) -> PAstStatement; +var + token: LexerToken; + result: PAstStatement; +begin + NEW(result); + result^.kind := astStatementKindAssignment; + result^.assignee := assignee; + + token := parser_lex(parser^.lexer); + result^.assignment := parse_expression(parser); + + return result +end; + +proc parse_call_statement(parser: PParser, call: PAstExpression) -> PAstStatement; +var + result: PAstStatement; +begin + NEW(result); + result^.kind := astStatementKindCall; + result^.call := call; + + return result +end; + +proc parse_compound_statement(parser: PParser) -> AstCompoundStatement; +var + result: AstCompoundStatement; + token: LexerToken; + current_statement: PPAstStatement; + old_count: CARDINAL; +begin + result.count := 0; + result.statements := nil; + + token := lexer_current(parser^.lexer); + + while token.kind <> lexerKindEnd do + old_count := result.count; + INC(result.count); + + REALLOCATE(result.statements, TSIZE(PAstStatement) * result.count); + current_statement := result.statements; + + INC(current_statement, TSIZE(PAstStatement) * old_count); + current_statement^ := parse_statement(parser); + + token := lexer_current(parser^.lexer) + end; + + return result +end; + +proc parse_statement(parser: PParser) -> PAstStatement; +var + token: LexerToken; + statement: PAstStatement; + designator: PAstExpression; +begin + statement := nil; + token := parser_lex(parser^.lexer); + + if token.kind = lexerKindIf then + statement := parse_if_statement(parser) + end; + if token.kind = lexerKindWhile then + statement := parse_while_statement(parser) + end; + if token.kind = lexerKindReturn then + statement := parse_return_statement(parser) + end; + if token.kind = lexerKindIdentifier then + designator := parse_designator(parser); + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindAssignment then + statement := parse_assignment_statement(parser, designator) + end; + if token.kind <> lexerKindAssignment then + statement := parse_call_statement(parser, designator) + end + end; + return statement +end; + +proc parse_if_statement(parser: PParser) -> PAstStatement; +var + token: LexerToken; + result: PAstStatement; +begin + NEW(result); + result^.kind := astStatementKindIf; + + token := parser_lex(parser^.lexer); + result^.if_condition := parse_expression(parser); + result^.if_branch := parse_compound_statement(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_while_statement(parser: PParser) -> PAstStatement; +var + token: LexerToken; + result: PAstStatement; +begin + NEW(result); + result^.kind := astStatementKindWhile; + + token := parser_lex(parser^.lexer); + result^.while_condition := parse_expression(parser); + result^.while_body := parse_compound_statement(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_statement_part(parser: PParser) -> AstCompoundStatement; +var + token: LexerToken; + compound: AstCompoundStatement; +begin + compound.count := 0; + compound.statements := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindBegin then + compound := parse_compound_statement(parser) + end; + + return compound +end; + +proc parse_procedure_heading(parser: PParser) -> PAstProcedureDeclaration; +var + token: LexerToken; + declaration: PAstProcedureDeclaration; + parameter_index: CARDINAL; + current_parameter: PAstTypedDeclaration; +begin + NEW(declaration); + + token := parser_lex(parser^.lexer); + declaration^.name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + declaration^.parameters := nil; + declaration^.parameter_count := 0; + + token := parser_lex(parser^.lexer); + while token.kind <> lexerKindRightParen do + parameter_index := declaration^.parameter_count; + INC(declaration^.parameter_count); + REALLOCATE(declaration^.parameters, TSIZE(AstTypedDeclaration) * declaration^.parameter_count); + + current_parameter := declaration^.parameters; + INC(current_parameter, TSIZE(AstTypedDeclaration) * parameter_index); + + current_parameter^.identifier := token.identifierKind; + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + current_parameter^.type_expression := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + if token.kind = lexerKindComma then + token := parser_lex(parser^.lexer) + end + end; + token := parser_lex(parser^.lexer); + declaration^.return_type := nil; + + (* Check for the return type and write it. *) + if token.kind = lexerKindArrow then + token := parser_lex(parser^.lexer); + declaration^.return_type := parse_type_expression(parser); + token := parser_lex(parser^.lexer) + end; + token := parser_lex(parser^.lexer); + + return declaration +end; + +proc parse_procedure_declaration(parser: PParser) -> PAstProcedureDeclaration; +var + token: LexerToken; + declaration: PAstProcedureDeclaration; +begin + declaration := parse_procedure_heading(parser); + + declaration^.constants := parse_constant_part(parser); + declaration^.variables := parse_variable_part(parser); + declaration^.statements := parse_statement_part(parser); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + return declaration +end; + +proc parse_procedure_part(parser: PParser) -> PPAstProcedureDeclaration; +var + token: LexerToken; + current_declaration: PPAstProcedureDeclaration; + result: PPAstProcedureDeclaration; + declaration_count: CARDINAL; + declaration_index: CARDINAL; +begin + token := lexer_current(parser^.lexer); + declaration_count := 0; + declaration_index := 0; + + ALLOCATE(result, TSIZE(PAstProcedureDeclaration)); + + while token.kind = lexerKindProc do + INC(declaration_count); + REALLOCATE(result, TSIZE(PAstProcedureDeclaration) * (declaration_count + 1)); + current_declaration := result; + INC(current_declaration, TSIZE(PAstProcedureDeclaration) * declaration_index); + + current_declaration^ := parse_procedure_declaration(parser); + token := lexer_current(parser^.lexer); + declaration_index := declaration_count + end; + current_declaration := result; + INC(current_declaration, TSIZE(PAstProcedureDeclaration) * declaration_index); + current_declaration^ := nil; + + return result +end; + +proc parse_module(parser: PParser) -> PAstModule; +var + token: LexerToken; + result: PAstModule; +begin + NEW(result); + token := parser_lex(parser^.lexer); + result^.main := true; + + if token.kind = lexerKindModule then + result^.main := false + end; + token := parser_lex(parser^.lexer); + + (* Write the module body. *) + token := parser_lex(parser^.lexer); + + result^.imports := parse_import_part(parser); + result^.constants := parse_constant_part(parser); + result^.types := parse_type_part(parser); + + result^.variables := parse_variable_part(parser); + result^.procedures := parse_procedure_part(parser); + result^.statements := parse_statement_part(parser); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse(lexer: PLexer) -> PAstModule; +var + parser: Parser; +begin + parser.lexer := lexer; + + return parse_module(ADR(parser)) +end; + +end. diff --git a/source/Transpiler.def b/source/Transpiler.def new file mode 100644 index 0000000..5f8c219 --- /dev/null +++ b/source/Transpiler.def @@ -0,0 +1,20 @@ +DEFINITION MODULE Transpiler; + +FROM FIO IMPORT File; + +FROM Common IMPORT ShortString; +FROM Lexer IMPORT PLexer, Lexer; +FROM Parser IMPORT PAstModule; + +TYPE + TranspilerContext = RECORD + input_name: ShortString; + output: File; + definition: File; + indentation: CARDINAL + END; + PTranspilerContext = POINTER TO TranspilerContext; + +PROCEDURE transpile(ast_module: PAstModule; output: File; definition: File; input_name: ShortString); + +END Transpiler. diff --git a/source/Transpiler.elna b/source/Transpiler.elna new file mode 100644 index 0000000..44b3e10 --- /dev/null +++ b/source/Transpiler.elna @@ -0,0 +1,658 @@ +module; + +from FIO import WriteNBytes, WriteLine, WriteChar, WriteString; +from SYSTEM import ADR, TSIZE; + +from NumberIO import IntToStr; + +from Common import Identifier, PIdentifier, ShortString; +from Parser import AstTypeExpressionKind, AstExpressionKind, AstLiteralKind, AstUnaryOperator, AstBinaryOperator, + PAstModule, PPAstExpression, PAstExpression, PAstLiteral, PPAstProcedureDeclaration, + PAstConstantDeclaration, PPAstConstantDeclaration, PPAstStatement, PAstStatement, AstStatementKind, + AstTypedDeclaration, PAstTypedDeclaration, PPAstTypedDeclaration, AstCompoundStatement, PAstProcedureDeclaration, + PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement, + PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration; + +proc indent(context: PTranspilerContext); +var + count: CARDINAL; +begin + count := 0; + + while count < context^.indentation do + WriteString(context^.output, ' '); + INC(count) + end +end; + +(* Write a semicolon followed by a newline. *) +proc write_semicolon(output: File); +begin + WriteChar(output, ';'); + WriteLine(output) +end; + +proc transpile_import_statement(context: PTranspilerContext, import_statement: PAstImportStatement); +var + written_bytes: CARDINAL; + current_symbol: PIdentifier; +begin + WriteString(context^.output, 'FROM '); + written_bytes := WriteNBytes(context^.output, ORD(import_statement^.package[1]), ADR(import_statement^.package[2])); + + WriteString(context^.output, ' IMPORT '); + + current_symbol := import_statement^.symbols; + written_bytes := WriteNBytes(context^.output, ORD(current_symbol^[1]), ADR(current_symbol^[2])); + INC(current_symbol, TSIZE(Identifier)); + + while ORD(current_symbol^[1]) <> 0 do + WriteString(context^.output, ', '); + written_bytes := WriteNBytes(context^.output, ORD(current_symbol^[1]), ADR(current_symbol^[2])); + INC(current_symbol, TSIZE(Identifier)) + end; + write_semicolon(context^.output) +end; + +proc transpile_import_part(context: PTranspilerContext, imports: PPAstImportStatement); +var + import_statement: PAstImportStatement; +begin + while imports^ <> nil do + transpile_import_statement(context, imports^); + INC(imports, TSIZE(PAstImportStatement)) + end; + WriteLine(context^.output) +end; + +proc transpile_constant_declaration(context: PTranspilerContext, declaration: PAstConstantDeclaration); +var + buffer: [20]CHAR; + written_bytes: CARDINAL; +begin + WriteString(context^.output, ' '); + written_bytes := WriteNBytes(context^.output, ORD(declaration^.constant_name[1]), ADR(declaration^.constant_name[2])); + + WriteString(context^.output, ' = '); + + IntToStr(declaration^.constant_value, 0, buffer); + WriteString(context^.output, buffer); + + write_semicolon(context^.output) +end; + +proc transpile_constant_part(context: PTranspilerContext, declarations: PPAstConstantDeclaration, extra_newline: BOOLEAN); +var + current_declaration: PPAstConstantDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, 'CONST'); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_constant_declaration(context, current_declaration^); + + INC(current_declaration, TSIZE(PAstConstantDeclaration)) + end; + if extra_newline then + WriteLine(context^.output) + end + end +end; + +proc transpile_module(context: PTranspilerContext, result: PAstModule); +begin + if result^.main = false then + WriteString(context^.output, 'IMPLEMENTATION ') + end; + WriteString(context^.output, 'MODULE '); + + (* Write the module name and end the line with a semicolon and newline. *) + transpile_module_name(context); + + write_semicolon(context^.output); + WriteLine(context^.output); + + (* Write the module body. *) + + transpile_import_part(context, result^.imports); + transpile_constant_part(context, result^.constants, true); + transpile_type_part(context, result^.types); + transpile_variable_part(context, result^.variables, true); + transpile_procedure_part(context, result^.procedures); + transpile_statement_part(context, result^.statements); + + WriteString(context^.output, 'END '); + transpile_module_name(context); + + WriteChar(context^.output, '.'); + WriteLine(context^.output) +end; + +proc transpile_type_fields(context: PTranspilerContext, fields: PAstFieldDeclaration); +var + written_bytes: CARDINAL; + current_field: PAstFieldDeclaration; +begin + current_field := fields; + + while ORD(current_field^.field_name[1]) <> 0 do + WriteString(context^.output, ' '); + written_bytes := WriteNBytes(context^.output, ORD(current_field^.field_name[1]), ADR(current_field^.field_name[2])); + + WriteString(context^.output, ': '); + transpile_type_expression(context, current_field^.field_type); + + INC(current_field , TSIZE(AstFieldDeclaration)); + + if ORD(current_field^.field_name[1]) <> 0 then + WriteChar(context^.output, ';') + end; + WriteLine(context^.output) + end +end; + +proc transpile_record_type(context: PTranspilerContext, type_expression: PAstTypeExpression); +begin + WriteString(context^.output, 'RECORD'); + WriteLine(context^.output); + transpile_type_fields(context, type_expression^.fields); + WriteString(context^.output, ' END') +end; + +proc transpile_pointer_type(context: PTranspilerContext, type_expression: PAstTypeExpression); +begin + WriteString(context^.output, 'POINTER TO '); + + transpile_type_expression(context, type_expression^.target) +end; + +proc transpile_array_type(context: PTranspilerContext, type_expression: PAstTypeExpression); +var + buffer: [20]CHAR; +begin + WriteString(context^.output, 'ARRAY'); + + if type_expression^.length <> 0 then + WriteString(context^.output, '[1..'); + + IntToStr(type_expression^.length, 0, buffer); + WriteString(context^.output, buffer); + + WriteChar(context^.output, ']') + end; + WriteString(context^.output, ' OF '); + + transpile_type_expression(context, type_expression^.base) +end; + +proc transpile_enumeration_type(context: PTranspilerContext, type_expression: PAstTypeExpression); +var + current_case: PIdentifier; + written_bytes: CARDINAL; +begin + current_case := type_expression^.cases; + + WriteString(context^.output, '('); + WriteLine(context^.output); + WriteString(context^.output, ' '); + written_bytes := WriteNBytes(context^.output, ORD(current_case^[1]), ADR(current_case^[2])); + INC(current_case, TSIZE(Identifier)); + + while ORD(current_case^[1]) <> 0 do + WriteChar(context^.output, ','); + WriteLine(context^.output); + WriteString(context^.output, ' '); + written_bytes := WriteNBytes(context^.output, ORD(current_case^[1]), ADR(current_case^[2])); + + INC(current_case, TSIZE(Identifier)) + end; + WriteLine(context^.output); + WriteString(context^.output, ' )') +end; + +proc transpile_named_type(context: PTranspilerContext, type_expression: PAstTypeExpression); +var + written_bytes: CARDINAL; +begin + written_bytes := WriteNBytes(context^.output, ORD(type_expression^.name[1]), ADR(type_expression^.name[2])) +end; + +proc transpile_procedure_type(context: PTranspilerContext, type_expression: PAstTypeExpression); +var + result: PAstTypeExpression; + current_parameter: PPAstTypeExpression; + parameter_count: CARDINAL; +begin + WriteString(context^.output, 'PROCEDURE('); + current_parameter := type_expression^.parameters; + + while current_parameter^ <> nil do + transpile_type_expression(context, current_parameter^); + + INC(current_parameter, TSIZE(PAstTypeExpression)); + + if current_parameter^ <> nil then + WriteString(context^.output, ', ') + end + end; + WriteChar(context^.output, ')') +end; + +proc transpile_type_expression(context: PTranspilerContext, type_expression: PAstTypeExpression); +begin + if type_expression^.kind = astTypeExpressionKindRecord then + transpile_record_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindEnumeration then + transpile_enumeration_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindArray then + transpile_array_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindPointer then + transpile_pointer_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindProcedure then + transpile_procedure_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindNamed then + transpile_named_type(context, type_expression) + end +end; + +proc transpile_type_declaration(context: PTranspilerContext, declaration: PAstTypedDeclaration); +var + written_bytes: CARDINAL; +begin + WriteString(context^.output, ' '); + + written_bytes := WriteNBytes(context^.output, ORD(declaration^.identifier[1]), ADR(declaration^.identifier[2])); + WriteString(context^.output, ' = '); + + transpile_type_expression(context, declaration^.type_expression); + write_semicolon(context^.output) +end; + +proc transpile_type_part(context: PTranspilerContext, declarations: PPAstTypedDeclaration); +var + current_declaration: PPAstTypedDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, 'TYPE'); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_type_declaration(context, current_declaration^); + + INC(current_declaration, TSIZE(PAstTypedDeclaration)) + end; + WriteLine(context^.output) + end +end; + +proc transpile_variable_declaration(context: PTranspilerContext, declaration: PAstVariableDeclaration); +var + written_bytes: CARDINAL; +begin + WriteString(context^.output, ' '); + written_bytes := WriteNBytes(context^.output, ORD(declaration^.variable_name[1]), ADR(declaration^.variable_name[2])); + + WriteString(context^.output, ': '); + + transpile_type_expression(context, declaration^.variable_type); + write_semicolon(context^.output) +end; + +proc transpile_variable_part(context: PTranspilerContext, declarations: PPAstVariableDeclaration, extra_newline: BOOLEAN); +var + current_declaration: PPAstVariableDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, 'VAR'); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_variable_declaration(context, current_declaration^); + + INC(current_declaration, TSIZE(PAstVariableDeclaration)) + end; + if extra_newline then + WriteLine(context^.output) + end + end +end; + +proc transpile_procedure_heading(context: PTranspilerContext, declaration: PAstProcedureDeclaration); +var + written_bytes: CARDINAL; + parameter_index: CARDINAL; + current_parameter: PAstTypedDeclaration; +begin + WriteString(context^.output, 'PROCEDURE '); + written_bytes := WriteNBytes(context^.output, ORD(declaration^.name[1]), ADR(declaration^.name[2])); + WriteChar(context^.output, '('); + + parameter_index := 0; + current_parameter := declaration^.parameters; + + while parameter_index < declaration^.parameter_count do + written_bytes := WriteNBytes(context^.output, ORD(current_parameter^.identifier[1]), ADR(current_parameter^.identifier[2])); + WriteString(context^.output, ': '); + transpile_type_expression(context, current_parameter^.type_expression); + + INC(parameter_index); + INC(current_parameter, TSIZE(AstTypedDeclaration)); + + if parameter_index <> declaration^.parameter_count then + WriteString(context^.output, '; ') + end + end; + + WriteString(context^.output, ')'); + + (* Check for the return type and write it. *) + if declaration^.return_type <> nil then + WriteString(context^.output, ': '); + transpile_type_expression(context, declaration^.return_type) + end; + write_semicolon(context^.output) +end; + +proc transpile_unary_operator(context: PTranspilerContext, operator: AstUnaryOperator); +begin + if operator = astUnaryOperatorMinus then + WriteChar(context^.output, '-') + end; + if operator = astUnaryOperatorNot then + WriteChar(context^.output, '~') + end +end; + +proc transpile_binary_operator(context: PTranspilerContext, operator: AstBinaryOperator); +begin + if operator = astBinaryOperatorSum then + WriteChar(context^.output, '+') + end; + if operator = astBinaryOperatorSubtraction then + WriteChar(context^.output, '-') + end; + if operator = astBinaryOperatorMultiplication then + WriteChar(context^.output, '*') + end; + if operator = astBinaryOperatorEquals then + WriteChar(context^.output, '=') + end; + if operator = astBinaryOperatorNotEquals then + WriteChar(context^.output, '#') + end; + if operator = astBinaryOperatorLess then + WriteChar(context^.output, '<') + end; + if operator = astBinaryOperatorGreater then + WriteChar(context^.output, '>') + end; + if operator = astBinaryOperatorLessEqual then + WriteString(context^.output, '<=') + end; + if operator = astBinaryOperatorGreaterEqual then + WriteString(context^.output, '>=') + end; + if operator = astBinaryOperatorDisjunction then + WriteString(context^.output, 'OR') + end; + if operator = astBinaryOperatorConjunction then + WriteString(context^.output, 'AND') + end +end; + +proc transpile_expression(context: PTranspilerContext, expression: PAstExpression); +var + literal: PAstLiteral; + buffer: [20]CHAR; + written_bytes: CARDINAL; + argument_index: CARDINAL; + current_argument: PPAstExpression; +begin + if expression^.kind = astExpressionKindLiteral then + literal := expression^.literal; + + if literal^.kind = astLiteralKindInteger then + IntToStr(literal^.integer, 0, buffer); + WriteString(context^.output, buffer) + end; + if literal^.kind = astLiteralKindString then + WriteString(context^.output, literal^.string) + end; + if literal^.kind = astLiteralKindNull then + WriteString(context^.output, 'NIL') + end; + if (literal^.kind = astLiteralKindBoolean) & literal^.boolean then + WriteString(context^.output, 'TRUE') + end; + if (literal^.kind = astLiteralKindBoolean) & (literal^.boolean = false) then + WriteString(context^.output, 'FALSE') + end + end; + if expression^.kind = astExpressionKindIdentifier then + written_bytes := WriteNBytes(context^.output, ORD(expression^.identifier[1]), ADR(expression^.identifier[2])) + end; + if expression^.kind = astExpressionKindDereference then + transpile_expression(context, expression^.reference); + WriteChar(context^.output, '^') + end; + if expression^.kind = astExpressionKindArrayAccess then + transpile_expression(context, expression^.array); + WriteChar(context^.output, '['); + transpile_expression(context, expression^.index); + WriteChar(context^.output, ']') + end; + if expression^.kind = astExpressionKindFieldAccess then + transpile_expression(context, expression^.aggregate); + WriteChar(context^.output, '.'); + written_bytes := WriteNBytes(context^.output, ORD(expression^.field[1]), ADR(expression^.field[2])) + end; + if expression^.kind = astExpressionKindUnary then + transpile_unary_operator(context, expression^.unary_operator); + transpile_expression(context, expression^.unary_operand) + end; + if expression^.kind = astExpressionKindBinary then + WriteChar(context^.output, '('); + transpile_expression(context, expression^.lhs); + WriteChar(context^.output, ' '); + transpile_binary_operator(context, expression^.binary_operator); + WriteChar(context^.output, ' '); + transpile_expression(context, expression^.rhs); + WriteChar(context^.output, ')') + end; + if expression^.kind = astExpressionKindCall then + transpile_expression(context, expression^.callable); + WriteChar(context^.output, '('); + + current_argument := expression^.arguments; + if expression^.argument_count > 0 then + transpile_expression(context, current_argument^); + + argument_index := 1; + INC(current_argument, TSIZE(PAstExpression)); + + while argument_index < expression^.argument_count do + WriteString(context^.output, ', '); + + transpile_expression(context, current_argument^); + + INC(current_argument, TSIZE(PAstExpression)); + INC(argument_index) + end + end; + WriteChar(context^.output, ')') + end +end; + +proc transpile_if_statement(context: PTranspilerContext, statement: PAstStatement); +begin + WriteString(context^.output, 'IF '); + transpile_expression(context, statement^.if_condition); + + WriteString(context^.output, ' THEN'); + WriteLine(context^.output); + INC(context^.indentation); + + transpile_compound_statement(context, statement^.if_branch); + DEC(context^.indentation); + indent(context); + WriteString(context^.output, 'END') +end; + +proc transpile_while_statement(context: PTranspilerContext, statement: PAstStatement); +begin + WriteString(context^.output, 'WHILE '); + transpile_expression(context, statement^.while_condition); + + WriteString(context^.output, ' DO'); + WriteLine(context^.output); + INC(context^.indentation); + + transpile_compound_statement(context, statement^.while_body); + DEC(context^.indentation); + indent(context); + WriteString(context^.output, 'END') +end; + +proc transpile_assignment_statement(context: PTranspilerContext, statement: PAstStatement); +begin + transpile_expression(context, statement^.assignee); + WriteString(context^.output, ' := '); + transpile_expression(context, statement^.assignment) +end; + +proc transpile_return_statement(context: PTranspilerContext, statement: PAstStatement); +begin + WriteString(context^.output, 'RETURN '); + + transpile_expression(context, statement^.returned) +end; + +proc transpile_compound_statement(context: PTranspilerContext, statement: AstCompoundStatement); +var + current_statement: PPAstStatement; + index: CARDINAL; +begin + index := 0; + current_statement := statement.statements; + + while index < statement.count do + transpile_statement(context, current_statement^); + + INC(current_statement, TSIZE(PAstStatement)); + INC(index); + + if index <> statement.count then + WriteChar(context^.output, ';') + end; + WriteLine(context^.output) + end +end; + +proc transpile_statement(context: PTranspilerContext, statement: PAstStatement); +begin + indent(context); + + if statement^.kind = astStatementKindIf then + transpile_if_statement(context, statement) + end; + if statement^.kind = astStatementKindWhile then + transpile_while_statement(context, statement) + end; + if statement^.kind = astStatementKindReturn then + transpile_return_statement(context, statement) + end; + if statement^.kind = astStatementKindAssignment then + transpile_assignment_statement(context, statement) + end; + if statement^.kind = astStatementKindCall then + transpile_expression(context, statement^.call) + end +end; + +proc transpile_statement_part(context: PTranspilerContext, compound: AstCompoundStatement); +begin + if compound.count > 0 then + WriteString(context^.output, 'BEGIN'); + WriteLine(context^.output); + + INC(context^.indentation); + transpile_compound_statement(context, compound); + DEC(context^.indentation) + end +end; + +proc transpile_procedure_declaration(context: PTranspilerContext, declaration: PAstProcedureDeclaration); +var + written_bytes: CARDINAL; +begin + transpile_procedure_heading(context, declaration); + + transpile_constant_part(context, declaration^.constants, false); + transpile_variable_part(context, declaration^.variables, false); + transpile_statement_part(context, declaration^.statements); + + WriteString(context^.output, 'END '); + written_bytes := WriteNBytes(context^.output, ORD(declaration^.name[1]), ADR(declaration^.name[2])); + + write_semicolon(context^.output) +end; + +proc transpile_procedure_part(context: PTranspilerContext, declaration: PPAstProcedureDeclaration); +begin + while declaration^ <> nil do + transpile_procedure_declaration(context, declaration^); + WriteLine(context^.output); + + INC(declaration, TSIZE(PAstProcedureDeclaration)) + end +end; + +proc transpile_module_name(context: PTranspilerContext); +var + counter: CARDINAL; + last_slash: CARDINAL; +begin + counter := 1; + last_slash := 0; + + while (context^.input_name[counter] <> '.') & (ORD(context^.input_name[counter]) <> 0) do + if context^.input_name[counter] = '/' then + last_slash := counter + end; + INC(counter) + end; + + if last_slash = 0 then + counter := 1 + end; + if last_slash <> 0 then + counter := last_slash + 1 + end; + while (context^.input_name[counter] <> '.') & (ORD(context^.input_name[counter]) <> 0) do + WriteChar(context^.output, context^.input_name[counter]); + INC(counter) + end +end; + +proc transpile(ast_module: PAstModule, output: File, definition: File, input_name: ShortString); +var + context: TranspilerContext; +begin + context.input_name := input_name; + context.output := output; + context.definition := definition; + context.indentation := 0; + + transpile_module(ADR(context), ast_module) +end; + +end. diff --git a/source.elna b/source/main.elna similarity index 90% rename from source.elna rename to source/main.elna index 2555e1f..d29c813 100644 --- a/source.elna +++ b/source/main.elna @@ -1,17 +1,17 @@ (* This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. *) -program +program; -import dummy +import dummy; const - SEEK_SET* := 0 - SEEK_CUR* := 1 - SEEK_END* := 2 - STDIN := 0 - STDOUT := 1 - STDERR := 2 + SEEK_SET* := 0; + SEEK_CUR* := 1; + SEEK_END* := 2; + STDIN := 0; + STDOUT := 1; + STDERR := 2; type TokenKind* = ( @@ -79,82 +79,82 @@ type _program, _module, _import - ) + ); Position* = record - line: Word + line: Word; column: Word - end + end; Location* = record - first: Position + first: Position; last: Position - end + end; SourceFile* = record - buffer: [1024]Char - handle: ^FILE - size: Word + buffer: [1024]Char; + handle: ^FILE; + size: Word; index: Word - end - FILE* = record end + end; + FILE* = record end; StringBuffer* = record - data: Pointer - size: Word + data: Pointer; + size: Word; capacity: Word - end + end; SourceCode = record - position: Position + position: Position; - input: Pointer - empty: proc(Pointer) -> Bool - advance: proc(Pointer) + input: Pointer; + empty: proc(Pointer) -> Bool; + advance: proc(Pointer); head: proc(Pointer) -> Char - end + end; Token* = record - kind: TokenKind + kind: TokenKind; value: union - int_value: Int - string: String - boolean_value: Bool + int_value: Int; + string: String; + boolean_value: Bool; char_value: Char - end + end; location: Location - end + end; CommandLine* = record - input: ^Char - lex: Bool + input: ^Char; + lex: Bool; parse: Bool - end + end; Lexer* = record - length: Word + length: Word; data: ^Token - end + end; (* External procedures. *) -proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern -proc fclose(stream: ^FILE) -> Int; extern -proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern -proc rewind(stream: ^FILE); extern -proc ftell(stream: ^FILE) -> Int; extern -proc fread(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern -proc write(fd: Int, buf: Pointer, Word: Int) -> Int; extern +proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern; +proc fclose(stream: ^FILE) -> Int; extern; +proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern; +proc rewind(stream: ^FILE); extern; +proc ftell(stream: ^FILE) -> Int; extern; +proc fread(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern; +proc write(fd: Int, buf: Pointer, Word: Int) -> Int; extern; -proc malloc(size: Word) -> Pointer; extern -proc free(ptr: Pointer); extern -proc calloc(nmemb: Word, size: Word) -> Pointer; extern -proc realloc(ptr: Pointer, size: Word) -> Pointer; extern +proc malloc(size: Word) -> Pointer; extern; +proc free(ptr: Pointer); extern; +proc calloc(nmemb: Word, size: Word) -> Pointer; extern; +proc realloc(ptr: Pointer, size: Word) -> Pointer; extern; -proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern +proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern; -proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern -proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern -proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern -proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern -proc strlen(ptr: ^Char) -> Word; extern +proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern; +proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern; +proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern; +proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern; +proc strlen(ptr: ^Char) -> Word; extern; -proc perror(s: ^Char); extern -proc exit(code: Int) -> !; extern +proc perror(s: ^Char); extern; +proc exit(code: Int) -> !; extern; (* Standard procedures. @@ -162,17 +162,17 @@ proc exit(code: Int) -> !; extern proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer; return realloc(ptr, n * size) -end +end; proc write_s(value: String); begin write(0, cast(value.ptr: Pointer), cast(value.length: Int)) -end +end; proc write_z(value: ^Char); begin write(0, cast(value: Pointer), cast(strlen(value): Int)) -end +end; proc write_b(value: Bool); begin @@ -181,18 +181,18 @@ begin else write_s("false") end -end +end; proc write_c(value: Char); begin write(0, cast(@value: Pointer), 1) -end +end; proc write_i(value: Int); var - digit: Int - n: Word - buffer: [10]Char + digit: Int; + n: Word; + buffer: [10]Char; begin n := 10u; @@ -210,57 +210,57 @@ begin n := n + 1u; write_c(buffer[n]) end -end +end; proc write_u(value: Word); begin write_i(cast(value: Int)) -end +end; proc is_digit(c: Char) -> Bool; return cast(c: Int) >= cast('0': Int) & cast(c: Int) <= cast('9': Int) -end +end; proc is_alpha(c: Char) -> Bool; return cast(c: Int) >= cast('A': Int) & cast(c: Int) <= cast('z': Int) -end +end; proc is_alnum(c: Char) -> Bool; return is_digit(c) or is_alpha(c) -end +end; proc is_space(c: Char) -> Bool; return c = ' ' or c = '\n' or c = '\t' -end +end; proc substring(string: String, start: Word, count: Word) -> String; return String(string.ptr + start, count) -end +end; proc open_substring(string: String, start: Word) -> String; return substring(string, start, string.length - start) -end +end; proc string_dup(origin: String) -> String; var - copy: ^Char + copy: ^Char; begin copy := cast(malloc(origin.length): ^Char); strncpy(copy, origin.ptr, origin.length); return String(copy, origin.length) -end +end; proc string_buffer_new() -> StringBuffer; var - result: StringBuffer + result: StringBuffer; begin result.capacity := 64u; result.data := malloc(result.capacity); result.size := 0u; return result -end +end; proc string_buffer_push(buffer: ^StringBuffer, char: Char); begin @@ -270,21 +270,21 @@ begin end; cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char); buffer^.size := buffer^.size + 1u -end +end; proc string_buffer_pop(buffer: ^StringBuffer, count: Word); begin buffer^.size := buffer^.size - count -end +end; proc string_buffer_clear(buffer: ^StringBuffer) -> String; var - result: String + result: String; begin result := String(cast(buffer^.data: ^Char), buffer^.size); buffer^.size := 0u; return result -end +end; (* Source code stream procedures. @@ -292,8 +292,8 @@ end proc read_source(filename: ^Char) -> ^SourceFile; var - result: ^SourceFile - file_handle: ^FILE + result: ^SourceFile; + file_handle: ^FILE; begin file_handle := fopen(filename, "rb\0".ptr); @@ -304,11 +304,11 @@ begin result^.index := 1u end; return result -end +end; proc source_file_empty(source_input: Pointer) -> Bool; var - source_file: ^SourceFile + source_file: ^SourceFile; begin source_file := cast(source_input: ^SourceFile); @@ -318,49 +318,49 @@ begin end; return source_file^.size = 0u -end +end; proc source_file_head(source_input: Pointer) -> Char; var - source_file: ^SourceFile + source_file: ^SourceFile; begin source_file := cast(source_input: ^SourceFile); return source_file^.buffer[source_file^.index] -end +end; proc source_file_advance(source_input: Pointer); var - source_file: ^SourceFile + source_file: ^SourceFile; begin source_file := cast(source_input: ^SourceFile); source_file^.index := source_file^.index + 1u -end +end; proc source_code_empty(source_code: ^SourceCode) -> Bool; return source_code^.empty(source_code^.input) -end +end; proc source_code_head(source_code: SourceCode) -> Char; return source_code.head(source_code.input) -end +end; proc source_code_advance(source_code: ^SourceCode); begin source_code^.advance(source_code^.input); source_code^.position.column := source_code^.position.column -end +end; proc source_code_break(source_code: ^SourceCode); begin source_code^.position.line := source_code^.position.line + 1u; source_code^.position.column := 0u -end +end; proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool; return ~source_code_empty(source_code) & source_code_head(source_code^) = expected -end +end; (* Token procedures. @@ -368,7 +368,7 @@ end proc lexer_escape(escape: Char, result: ^Char) -> Bool; var - successful: Bool + successful: Bool; begin if escape = 'n' then result^ := '\n'; @@ -410,12 +410,12 @@ begin successful := false end; return successful -end +end; (* Skip spaces. *) proc lexer_spaces(source_code: ^SourceCode); var - current: Char + current: Char; begin while ~source_code_empty(source_code) & is_space(source_code_head(source_code^)) do current := source_code_head(source_code^); @@ -425,26 +425,26 @@ begin end; source_code_advance(source_code) end -end +end; (* Checker whether the character is allowed in an identificator. *) proc lexer_is_ident(char: Char) -> Bool; return is_alnum(char) or char = '_' -end +end; proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer); var - content_length: Word + content_length: Word; begin while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do string_buffer_push(token_content, source_code_head(source_code^)); source_code_advance(source_code) end -end +end; proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; var - trailing: Word + trailing: Word; begin trailing := 0u; @@ -463,11 +463,11 @@ begin end; return trailing = 2u -end +end; proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool; var - successful: Bool + successful: Bool; begin successful := ~source_code_empty(source_code); @@ -485,14 +485,14 @@ begin source_code_advance(source_code) end; return successful -end +end; proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; var - token_end, constructed_string: ^Char - token_length: Word - is_valid: Bool - next_char: Char + token_end, constructed_string: ^Char; + token_length: Word; + is_valid: Bool; + next_char: Char; begin is_valid := true; @@ -510,7 +510,7 @@ begin is_valid := false end; return is_valid -end +end; proc lexer_number(source_code: ^SourceCode, token_content: ^Int); begin @@ -521,12 +521,12 @@ begin source_code_advance(source_code) end -end +end; (* Categorize an identifier. *) proc lexer_categorize(token_content: String) -> Token; var - current_token: Token + current_token: Token; begin if token_content = "if" then current_token.kind := TokenKind._if @@ -590,23 +590,23 @@ begin end; return current_token -end +end; proc lexer_add_token(lexer: ^Lexer, token: Token); var - new_length: Word + new_length: Word; begin new_length := lexer^.length + 1u; lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token); (lexer^.data + lexer^.length)^ := token; lexer^.length := new_length -end +end; (* Read the next token from the input. *) proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token; var - current_token: Token - first_char: Char + current_token: Token; + first_char: Char; begin current_token.kind := TokenKind.unknown; @@ -775,14 +775,14 @@ begin end; return current_token -end +end; (* Split the source text into tokens. *) proc lexer_text(source_code: SourceCode) -> Lexer; var - current_token: Token - token_buffer: StringBuffer - lexer: Lexer + current_token: Token; + token_buffer: StringBuffer; + lexer: Lexer; begin lexer := Lexer(0u, nil); token_buffer := string_buffer_new(); @@ -803,7 +803,7 @@ begin end; return lexer -end +end; (* Command line handling. @@ -811,9 +811,9 @@ end proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine; var - parameter: ^^Char - i: Int - result: ^CommandLine + parameter: ^^Char; + i: Int; + result: ^CommandLine; begin i := 1; result := cast(malloc(#size(CommandLine)): ^CommandLine); @@ -852,7 +852,7 @@ begin end; return result -end +end; (* Parser. @@ -860,8 +860,8 @@ end proc parse(tokens: ^Token, tokens_size: Word); var - current_token: ^Token - i: Word + current_token: ^Token; + i: Word; begin i := 0u; while i < tokens_size do @@ -1015,7 +1015,7 @@ begin i := i + 1u end; write_c('\n') -end +end; (* Compilation entry. @@ -1023,8 +1023,8 @@ end proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; var - return_code: Int - lexer: Lexer + return_code: Int; + lexer: Lexer; begin return_code := 0; @@ -1036,16 +1036,16 @@ begin end; return return_code -end +end; proc process(argc: Int, argv: ^^Char) -> Int; var - tokens: ^Token - tokens_size: Word - source_code: SourceCode - command_line: ^CommandLine - return_code: Int - source_file: ^SourceFile + tokens: ^Token; + tokens_size: Word; + source_code: SourceCode; + command_line: ^CommandLine; + return_code: Int; + source_file: ^SourceFile; begin return_code := 0; @@ -1077,7 +1077,7 @@ begin return_code := compile_in_stages(command_line, source_code) end; return return_code -end +end; begin exit(process(count, parameters)) diff --git a/tools/support.rb b/tools/support.rb deleted file mode 100644 index 4774b48..0000000 --- a/tools/support.rb +++ /dev/null @@ -1,89 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. -} - -require 'uri' -require 'net/http' -require 'open3' - -TMP = Pathname.new('./build') - -class BuildTarget - attr_accessor(:build, :gcc, :sysroot, :tmp) - - def initialize - @sysroot = Pathname.new '/' - end - - def gxx - @gcc.gsub 'c', '+' - end - - def rootfs - tmp + 'rootfs' - end -end - -def gcc_verbose(gcc_binary) - read, write = IO.pipe - sh({'LANG' => 'C'}, gcc_binary, '--verbose', err: write) - write.close - output = read.read - read.close - output -end - -def find_build_target(gcc_version) - gcc_binary = 'gcc' - output = gcc_verbose gcc_binary - - if output.start_with? 'Apple clang' - gcc_binary = "gcc-#{gcc_version.split('.').first}" - output = gcc_verbose gcc_binary - sdk = Pathname.new '/Library/Developer/CommandLineTools/SDKs/MacOSX15.sdk' - end - result = output - .lines - .each_with_object(BuildTarget.new) do |line, accumulator| - if line.start_with? 'Target: ' - accumulator.build = line.split(' ').last.strip - elsif line.start_with? 'COLLECT_GCC' - accumulator.gcc = line.split('=').last.strip - end - end - result.tmp = TMP - result.sysroot = sdk unless sdk.nil? - result -end - -def download_and_pipe(url, target, command) - target.mkpath - - Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == 'https') do |http| - request = Net::HTTP::Get.new url.request_uri - - http.request request do |response| - case response - when Net::HTTPRedirection - download_and_pipe URI.parse(response['location']), target, command - when Net::HTTPSuccess - Dir.chdir target.to_path do - Open3.popen2(*command) do |stdin, stdout, wait_thread| - Thread.new do - stdout.each { |line| puts line } - end - - response.read_body do |chunk| - stdin.write chunk - end - stdin.close - - wait_thread.value - end - end - else - response.error! - end - end - end -end