From 42d2038c4d08f6b2fe3ead22ebe82b923a182110 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Mon, 11 Mar 2024 10:43:26 +0100 Subject: [PATCH] Support compound statements --- backend/riscv.cpp | 100 ++++++++++++++---------- backend/target.cpp | 47 ++++++----- include/elna/backend/riscv.hpp | 10 ++- include/elna/source/lexer.hpp | 41 +++++++--- include/elna/source/parser.hpp | 61 ++++++++++++++- include/elna/source/symboltable.hpp | 19 +++++ source/lexer.cpp | 11 +++ source/parser.cpp | 28 +++++-- source/symboltable.cpp | 4 + tests/declare_variable.eln | 1 + tests/expectations/multiline_output.txt | 2 + tests/multiline_output.eln | 4 + 12 files changed, 242 insertions(+), 86 deletions(-) create mode 100644 tests/expectations/multiline_output.txt create mode 100644 tests/multiline_output.eln diff --git a/backend/riscv.cpp b/backend/riscv.cpp index 005bb3f..ca29b83 100644 --- a/backend/riscv.cpp +++ b/backend/riscv.cpp @@ -57,6 +57,10 @@ namespace elna::riscv return reinterpret_cast(&this->representation) + sizeof(this->representation); } + void visitor::visit(source::declaration *declaration) + { + } + void visitor::visit(source::definition *definition) { constants[definition->identifier()] = definition->body().number(); @@ -64,60 +68,40 @@ namespace elna::riscv void visitor::visit(source::block *block) { + this->instructions.push_back(instruction(base_opcode::opImm)); + this->instructions.push_back(instruction(base_opcode::store)); + this->instructions.push_back(instruction(base_opcode::store)); + this->instructions.push_back(instruction(base_opcode::opImm)); + for (const auto& block_definition : block->definitions()) { block_definition->accept(this); } + for (const auto& block_declaration : block->declarations()) + { + block_declaration->accept(this); + } block->body().accept(this); - // Prologue. - const uint stackSize = static_cast(variable_counter * 4 + 12); - - std::vector prologue{ - instruction(base_opcode::opImm) - .i(x_register::sp, funct3_t::addi, x_register::sp, -stackSize), - instruction(base_opcode::store) - .s(stackSize - 4, funct3_t::sw, x_register::sp, x_register::s0), - instruction(base_opcode::store) - .s(stackSize - 8, funct3_t::sw, x_register::sp, x_register::ra), - instruction(base_opcode::opImm) - .i(x_register::s0, funct3_t::addi, x_register::sp, stackSize) - }; - this->instructions.insert(this->instructions.cbegin(), prologue.begin(), prologue.end()); - - // Print the result. - this->instructions.push_back(instruction(base_opcode::opImm) - .i(x_register::a1, funct3_t::addi, x_register::a0, 0)); - this->references[0] = reference(); - this->references[0].name = ".CL0"; - this->references[0].offset = instructions.size() * 4; - this->references[0].target = address_t::high20; - this->instructions.push_back(instruction(base_opcode::lui).u(x_register::a5, 0)); - this->references[1] = reference(); - this->references[1].name = ".CL0"; - this->references[1].offset = instructions.size() * 4; - this->references[1].target = address_t::lower12i; - - this->instructions.push_back(instruction(base_opcode::opImm) - .i(x_register::a0, funct3_t::addi, x_register::a5, 0)); - this->references[2] = reference(); - this->references[2].name = "printf"; - this->references[2].offset = instructions.size() * 4; - this->references[2].target = address_t::text; - this->instructions.push_back(instruction(base_opcode::auipc).u(x_register::ra, 0)); - this->instructions.push_back(instruction(base_opcode::jalr) - .i(x_register::ra, funct3_t::jalr, x_register::ra, 0)); // Set the return value (0). this->instructions.push_back(instruction(base_opcode::op) .r(x_register::a0, funct3_t::_and, x_register::zero, x_register::zero)); + // Prologue. + const uint stack_size = static_cast(variable_counter * 4 + 12); + + this->instructions[0].i(x_register::sp, funct3_t::addi, x_register::sp, -stack_size); + this->instructions[1].s(stack_size - 4, funct3_t::sw, x_register::sp, x_register::s0); + this->instructions[2].s(stack_size - 8, funct3_t::sw, x_register::sp, x_register::ra); + this->instructions[3].i(x_register::s0, funct3_t::addi, x_register::sp, stack_size); + // Epilogue. this->instructions.push_back(instruction(base_opcode::load) - .i(x_register::s0, funct3_t::lw, x_register::sp, stackSize - 4)); + .i(x_register::s0, funct3_t::lw, x_register::sp, stack_size - 4)); this->instructions.push_back(instruction(base_opcode::load) - .i(x_register::ra, funct3_t::lw, x_register::sp, stackSize - 8)); + .i(x_register::ra, funct3_t::lw, x_register::sp, stack_size - 8)); this->instructions.push_back(instruction(base_opcode::opImm) - .i(x_register::sp, funct3_t::addi, x_register::sp, stackSize)); + .i(x_register::sp, funct3_t::addi, x_register::sp, stack_size)); this->instructions.push_back(instruction(base_opcode::jalr) .i(x_register::zero, funct3_t::jalr, x_register::ra, 0)); } @@ -125,6 +109,42 @@ namespace elna::riscv void visitor::visit(source::bang_statement *statement) { statement->body().accept(this); + + // Print the result. + this->instructions.push_back(instruction(base_opcode::opImm) + .i(x_register::a1, funct3_t::addi, x_register::a0, 0)); + + this->references.push_back(reference()); + this->references.back().name = ".CL0"; + this->references.back().offset = instructions.size() * 4; + this->references.back().target = address_t::high20; + this->instructions.push_back(instruction(base_opcode::lui).u(x_register::a5, 0)); + this->references.push_back(reference()); + this->references.back().name = ".CL0"; + this->references.back().offset = instructions.size() * 4; + this->references.back().target = address_t::lower12i; + + this->instructions.push_back(instruction(base_opcode::opImm) + .i(x_register::a0, funct3_t::addi, x_register::a5, 0)); + this->references.push_back(reference()); + this->references.back().name = "printf"; + this->references.back().offset = instructions.size() * 4; + this->references.back().target = address_t::text; + this->instructions.push_back(instruction(base_opcode::auipc).u(x_register::ra, 0)); + this->instructions.push_back(instruction(base_opcode::jalr) + .i(x_register::ra, funct3_t::jalr, x_register::ra, 0)); + } + + void visitor::visit(source::compound_statement *statement) + { + for (auto& nested_statement : statement->statements()) + { + nested_statement->accept(this); + } + } + + void visitor::visit(source::assignment_statement *statement) + { } void visitor::visit(source::variable_expression *variable) diff --git a/backend/target.cpp b/backend/target.cpp index 666a806..b329a88 100644 --- a/backend/target.cpp +++ b/backend/target.cpp @@ -32,8 +32,6 @@ namespace elna::riscv // Create string table writer ELFIO::string_section_accessor stra(str_sec); - // Add label name - ELFIO::Elf32_Word str_index = stra.add_string("msg"); // Create read only data section ELFIO::section* ro_sec = writer.sections.add(".rodata"); @@ -50,15 +48,6 @@ namespace elna::riscv sym_sec->set_entry_size(writer.get_default_entry_size(ELFIO::SHT_SYMTAB)); sym_sec->set_link(str_sec->get_index()); - // Create symbol table writer - ELFIO::symbol_section_accessor syma(writer, sym_sec); - auto label_sym = syma.add_symbol(stra, ".CL0", 0x00000000, strlen("%d\n") + 1, - ELFIO::STB_LOCAL, ELFIO::STT_NOTYPE, 0, ro_sec->get_index()); - syma.add_symbol(stra, "main", 0x00000000, instructions_size, - ELFIO::STB_GLOBAL, ELFIO::STT_FUNC, 0, text_sec->get_index()); - auto printf_sym = syma.add_symbol(stra, "printf", 0x00000000, 0, - ELFIO::STB_GLOBAL, ELFIO::STT_NOTYPE, 0, ELFIO::SHN_UNDEF); - // Create relocation table section ELFIO::section* rel_sec = writer.sections.add(".rel.text"); rel_sec->set_type(ELFIO::SHT_REL); @@ -68,15 +57,35 @@ namespace elna::riscv rel_sec->set_link(sym_sec->get_index()); rel_sec->set_flags(ELFIO::SHF_ALLOC); - // Create relocation table writer + // Create symbol relocation table writers + ELFIO::symbol_section_accessor syma(writer, sym_sec); ELFIO::relocation_section_accessor rela(writer, rel_sec); - // Add relocation entry (adjust address at offset 11) - rela.add_entry(_visitor->references[0].offset, label_sym, 26 /* ELFIO::R_RISCV_HI20 */); - rela.add_entry(_visitor->references[0].offset, label_sym, 51 /* ELFIO::R_RISCV_RELAX */); - rela.add_entry(_visitor->references[1].offset, label_sym, 27 /* ELFIO::R_RISCV_LO12_I */); - rela.add_entry(_visitor->references[1].offset, label_sym, 51 /* ELFIO::R_RISCV_RELAX */); - rela.add_entry(_visitor->references[2].offset, printf_sym, 18 /* ELFIO::R_RISCV_CALL */); - rela.add_entry(_visitor->references[2].offset, printf_sym, 51 /* ELFIO::R_RISCV_RELAX */); + ELFIO::Elf_Word digit_symbol = syma.add_symbol(stra, ".CL0", 0x00000000, strlen("%d\n") + 1, + ELFIO::STB_LOCAL, ELFIO::STT_NOTYPE, 0, ro_sec->get_index()); + ELFIO::Elf_Word printf_symbol = syma.add_symbol(stra, "printf", 0x00000000, 0, + ELFIO::STB_GLOBAL, ELFIO::STT_NOTYPE, 0, ELFIO::SHN_UNDEF); + + for (auto& reference : _visitor->references) + { + // The loop assumes that address_t::lower12i always follows address_t::high20. + switch (reference.target) + { + case address_t::high20: + rela.add_entry(reference.offset, digit_symbol, 26 /* ELFIO::R_RISCV_HI20 */); + rela.add_entry(reference.offset, digit_symbol, 51 /* ELFIO::R_RISCV_RELAX */); + break; + case address_t::lower12i: + rela.add_entry(reference.offset, digit_symbol, 27 /* ELFIO::R_RISCV_LO12_I */); + rela.add_entry(reference.offset, digit_symbol, 51 /* ELFIO::R_RISCV_RELAX */); + break; + case address_t::text: + rela.add_entry(reference.offset, printf_symbol, 18 /* ELFIO::R_RISCV_CALL */); + rela.add_entry(reference.offset, printf_symbol, 51 /* ELFIO::R_RISCV_RELAX */); + break; + } + } + syma.add_symbol(stra, "main", 0x00000000, instructions_size, + ELFIO::STB_GLOBAL, ELFIO::STT_FUNC, 0, text_sec->get_index()); // Create ELF object file writer.save(out_file); diff --git a/include/elna/backend/riscv.hpp b/include/elna/backend/riscv.hpp index a61eb5e..a5b33b8 100644 --- a/include/elna/backend/riscv.hpp +++ b/include/elna/backend/riscv.hpp @@ -15,7 +15,7 @@ namespace elna::riscv struct reference { - const char* name; + std::string name; std::size_t offset; address_t target; }; @@ -141,6 +141,7 @@ namespace elna::riscv struct instruction { + instruction() = default; // NOP = addi x0, x0, 0. instruction(base_opcode opcode); instruction& i(x_register rd, funct3_t funct3, x_register rs1, std::uint32_t immediate); @@ -155,17 +156,20 @@ namespace elna::riscv std::uint32_t representation{ 0 }; }; - class visitor : public source::parser_visitor + class visitor final : public source::parser_visitor { public: std::vector instructions; bool register_in_use{ true }; std::uint32_t variable_counter = 1; - reference references[3]; + std::vector references; std::unordered_map constants; + virtual void visit(source::declaration *declaration) override; virtual void visit(source::definition *definition) override; virtual void visit(source::bang_statement *statement) override; + virtual void visit(source::compound_statement *statement) override; + virtual void visit(source::assignment_statement *statement) override; virtual void visit(source::block *block) override; virtual void visit(source::variable_expression *variable) override; virtual void visit(source::integer_literal *number) override; diff --git a/include/elna/source/lexer.hpp b/include/elna/source/lexer.hpp index 4395bfe..4a0da00 100644 --- a/include/elna/source/lexer.hpp +++ b/include/elna/source/lexer.hpp @@ -33,6 +33,7 @@ namespace elna::source text_iterator& operator++(int); bool operator==(const text_iterator& that) const noexcept; bool operator!=(const text_iterator& that) const noexcept; + text_iterator operator+(std::size_t step); friend std::pair text_iterators(const std::string& buffer); }; @@ -50,19 +51,23 @@ namespace elna::source */ enum class type : std::uint16_t { - number = 0, - term_operator = 1, - let = 2, - identifier = 3, - equals = 4, - var = 5, - semicolon = 6, - left_paren = 7, - right_paren = 8, - bang = 9, - dot = 10, - comma = 11, - factor_operator = 12, + number, + term_operator, + let, + identifier, + equals, + var, + semicolon, + left_paren, + right_paren, + bang, + dot, + comma, + factor_operator, + eof, + begin, + end, + assignment }; /** @@ -113,6 +118,16 @@ namespace elna::source std::string what() const override; }; + class unexpected_token final : public error + { + token m_token; + + public: + explicit unexpected_token(const token& token); + + std::string what() const override; + }; + /** * Split the source into tokens. * diff --git a/include/elna/source/parser.hpp b/include/elna/source/parser.hpp index e39b015..b61cf00 100644 --- a/include/elna/source/parser.hpp +++ b/include/elna/source/parser.hpp @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include namespace elna::source @@ -14,8 +16,11 @@ namespace elna::source division }; + class declaration; class definition; class bang_statement; + class compound_statement; + class assignment_statement; class block; class binary_expression; class variable_expression; @@ -23,8 +28,11 @@ namespace elna::source struct parser_visitor { + virtual void visit(declaration *) = 0; virtual void visit(definition *) = 0; virtual void visit(bang_statement *) = 0; + virtual void visit(compound_statement *) = 0; + virtual void visit(assignment_statement *) = 0; virtual void visit(block *) = 0; virtual void visit(binary_expression *) = 0; virtual void visit(variable_expression *) = 0; @@ -48,6 +56,20 @@ namespace elna::source { }; + /** + * Variable declaration. + */ + class declaration : public node + { + std::string m_identifier; + + public: + declaration(const std::string& identifier); + virtual void accept(parser_visitor *visitor) override; + + std::string& identifier() noexcept; + }; + /** * Constant definition. */ @@ -57,7 +79,7 @@ namespace elna::source std::unique_ptr m_body; public: - definition(std::string&& identifier, std::unique_ptr&& body); + definition(const std::string& identifier, std::unique_ptr&& body); virtual void accept(parser_visitor *visitor) override; std::string& identifier() noexcept; @@ -75,6 +97,25 @@ namespace elna::source expression& body(); }; + class compound_statement : public statement + { + std::vector> m_statements; + + public: + compound_statement() = default; + compound_statement(std::vector>&& statements); + virtual void accept(parser_visitor *visitor) override; + + std::vector>& statements(); + }; + + class assignment_statement : public statement + { + std::unique_ptr lvalue; + std::unique_ptr rvalue; + virtual void accept(parser_visitor *visitor) override; + }; + /** * Block. */ @@ -82,13 +123,17 @@ namespace elna::source { std::unique_ptr m_body; std::vector> m_definitions; + std::vector> m_declarations; public: - block(std::vector>&& definitions, std::unique_ptr&& body); + block(std::vector>&& definitions, + std::vector>&& declarations, + std::unique_ptr&& body); virtual void accept(parser_visitor *visitor) override; statement& body(); std::vector>& definitions() noexcept; + std::vector>& declarations() noexcept; }; class integer_literal : public expression @@ -129,7 +174,7 @@ namespace elna::source binary_operator operation() const noexcept; }; - struct parser + struct parser : boost::noncopyable { parser(const std::vector& tokens); @@ -140,11 +185,19 @@ namespace elna::source std::unique_ptr parse_term(); std::unique_ptr parse_expression(); std::unique_ptr parse_definition(); - std::unique_ptr parse_bang_statement(); + std::unique_ptr parse_declaration(); + std::unique_ptr parse_statement(); + std::unique_ptr parse_bang_statement(); + std::unique_ptr parse_compound_statement(); std::vector> parse_definitions(); + std::vector> parse_declarations(); std::unique_ptr parse_block(); + std::optional> advance(const token::type token_type); + bool skip(const token::type token_type); + std::vector::const_iterator tokens; std::vector::const_iterator end; + std::list> errors; }; } diff --git a/include/elna/source/symboltable.hpp b/include/elna/source/symboltable.hpp index 9a54d4b..a5a59b3 100644 --- a/include/elna/source/symboltable.hpp +++ b/include/elna/source/symboltable.hpp @@ -6,6 +6,17 @@ namespace elna::source { + class name_collision final : public error + { + position previous; + std::string name; + + public: + name_collision(const std::string& name, const position current, const position previous); + + std::string what() const override; + }; + class info { public: @@ -27,8 +38,13 @@ namespace elna::source class variable_info final : public info { + std::size_t m_offset{ 0 }; + public: + variable_info(const std::size_t offset); ~variable_info() override; + + std::size_t offset() const noexcept; }; class symbol_table @@ -44,8 +60,11 @@ namespace elna::source class name_analysis_visitor final : public source::parser_visitor { + void visit(declaration *declaration) override; void visit(definition *definition) override; void visit(bang_statement *statement) override; + void visit(compound_statement *statement) override; + void visit(assignment_statement *statement) override; void visit(block *block) override; void visit(integer_literal *number) override; void visit(variable_expression *variable) override; diff --git a/source/lexer.cpp b/source/lexer.cpp index 1fc71e0..95f193e 100644 --- a/source/lexer.cpp +++ b/source/lexer.cpp @@ -58,6 +58,12 @@ namespace elna::source return *this; } + text_iterator text_iterator::operator+(std::size_t step) + { + auto result = *this; + return ++result; + } + bool text_iterator::operator==(const text_iterator& that) const noexcept { return this->m_buffer == that.m_buffer; @@ -309,6 +315,11 @@ namespace elna::source tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position()); } + else if (*iterator == ':' && iterator + 1 != text_end && *(iterator + 1) == '=') + { + tokens.emplace_back(token::type::assignment, iterator.position()); + ++iterator; + } else { return source_result(unexpected_character{ std::string{ *iterator }, iterator.position() }); diff --git a/source/parser.cpp b/source/parser.cpp index 160f940..5a58501 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -175,6 +175,11 @@ namespace elna::source return m_statements; } + void assignment_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + parser::parser(const std::vector& tokens) : tokens(tokens.cbegin()), end(tokens.cend()) { @@ -323,16 +328,25 @@ namespace elna::source auto result = std::make_unique(); std::unique_ptr next_statement; - do + while ((next_statement = parse_statement()) != nullptr) { - if ((next_statement = parse_statement()) == nullptr) - { - return nullptr; - } result->statements().push_back(std::move(next_statement)); + + if (tokens->of() == token::type::semicolon) + { + ++tokens; + } + else if (tokens->of() == token::type::end) + { + ++tokens; + break; + } + else + { + errors.push_back(std::make_unique(*tokens)); + break; + } } - while (tokens->of() != token::type::end); - ++tokens; return result; } diff --git a/source/symboltable.cpp b/source/symboltable.cpp index 49bff30..ee3b710 100644 --- a/source/symboltable.cpp +++ b/source/symboltable.cpp @@ -82,6 +82,10 @@ namespace elna::source { } + void name_analysis_visitor::visit(assignment_statement *statement) + { + } + void name_analysis_visitor::visit(block *block) { } diff --git a/tests/declare_variable.eln b/tests/declare_variable.eln index 9c17195..4f7131f 100644 --- a/tests/declare_variable.eln +++ b/tests/declare_variable.eln @@ -1,4 +1,5 @@ var x; begin + x := 5; ! 5 end. diff --git a/tests/expectations/multiline_output.txt b/tests/expectations/multiline_output.txt new file mode 100644 index 0000000..b3172d1 --- /dev/null +++ b/tests/expectations/multiline_output.txt @@ -0,0 +1,2 @@ +5 +7 diff --git a/tests/multiline_output.eln b/tests/multiline_output.eln new file mode 100644 index 0000000..d7a381f --- /dev/null +++ b/tests/multiline_output.eln @@ -0,0 +1,4 @@ +begin + ! 5; + ! 7 +end.