diff --git a/TODO b/TODO index ca8486b..c34f3a9 100644 --- a/TODO +++ b/TODO @@ -7,8 +7,8 @@ - Move constants to the symbol table, so we can check at parse time for duplicates. - Don't pass raw pointers to the visitor methods. - While loop. -- If condition. -- Introduce program node which contains global state and functions. +- Comparision operators. +- Procedures. - Calculate additional stack space needed for subexpressions in the allocator visitor and not in the backend. - Support immediates greater than 12 bits. diff --git a/backend/riscv.cpp b/backend/riscv.cpp index db4e31c..8a871ae 100644 --- a/backend/riscv.cpp +++ b/backend/riscv.cpp @@ -76,6 +76,12 @@ namespace elna::riscv return reinterpret_cast(&this->representation) + sizeof(this->representation); } + visitor::visitor(std::function write_text, + std::function write_read_only) + : write_text(write_text), write_read_only(write_read_only) + { + } + void visitor::visit(source::declaration *declaration) { } @@ -119,6 +125,13 @@ namespace elna::riscv .i(x_register::zero, funct3_t::jalr, x_register::ra, 0)); } + void visitor::visit(source::program *program) + { + visit(dynamic_cast(program)); + write_text("main", reinterpret_cast(this->instructions.data()), + this->instructions.size() * sizeof(instruction)); + } + void visitor::visit(source::bang_statement *statement) { statement->body().accept(this); @@ -127,15 +140,15 @@ namespace elna::riscv this->instructions.push_back(instruction(base_opcode::opImm) .i(x_register::a1, funct3_t::addi, x_register::a0, 0)); - auto format_string = this->read_only.label("%d\n"); + auto format_string = write_read_only(reinterpret_cast("%d\n\0"), 4); this->references.push_back(reference()); - this->references.back().name = format_string->first; + this->references.back().name = format_string; this->references.back().offset = instructions.size() * 4; this->references.back().target = address_t::high20; this->instructions.push_back(instruction(base_opcode::lui).u(x_register::a5, 0)); this->references.push_back(reference()); - this->references.back().name = format_string->first; + this->references.back().name = format_string; this->references.back().offset = instructions.size() * 4; this->references.back().target = address_t::lower12i; @@ -158,15 +171,15 @@ namespace elna::riscv this->instructions.push_back(instruction(base_opcode::opImm) .i(x_register::a1, funct3_t::addi, x_register::a0, 0)); - auto format_string = this->read_only.label("%d\n"); + auto format_string = write_read_only(reinterpret_cast("%d\n\0"), 4); this->references.push_back(reference()); - this->references.back().name = format_string->first; + this->references.back().name = format_string; this->references.back().offset = instructions.size() * 4; this->references.back().target = address_t::high20; this->instructions.push_back(instruction(base_opcode::lui).u(x_register::a5, 0)); this->references.push_back(reference()); - this->references.back().name = format_string->first; + this->references.back().name = format_string; this->references.back().offset = instructions.size() * 4; this->references.back().target = address_t::lower12i; diff --git a/backend/target.cpp b/backend/target.cpp index 33b4efd..6b46b25 100644 --- a/backend/target.cpp +++ b/backend/target.cpp @@ -1,16 +1,97 @@ #include "elna/backend/target.hpp" #include "elna/backend/riscv.hpp" -#include +#include namespace elna::riscv { - void riscv32_elf(source::block *ast, const std::filesystem::path& out_file) + elfio_writer::iterator::reference elfio_writer::iterator::operator*() const noexcept { - auto _visitor = std::make_unique(); - _visitor->visit(ast); + return payload; + } + elfio_writer::iterator::pointer elfio_writer::iterator::operator->() const noexcept + { + return &payload; + } + + elfio_writer::iterator& elfio_writer::iterator::operator++() + { + this->payload.data += *this->sizes; + this->payload.label = *(++this->labels); + this->payload.size = *(++this->sizes); + + return *this; + } + + elfio_writer::iterator& elfio_writer::iterator::operator++(int) + { + auto tmp = *this; + ++(*this); + return *this; + } + + bool elfio_writer::iterator::operator==(const iterator& that) const + { + return this->labels == that.labels; + } + + bool elfio_writer::iterator::operator!=(const iterator& that) const + { + return !(*this == that); + } + + elfio_writer::elfio_writer(ELFIO::section *text) + : text(text), labels(std::make_shared>()), + sizes(std::make_shared>()) + { + } + + void elfio_writer::operator()(const std::string& label, const std::byte *data, std::size_t size) + { + labels->push_back(label + '\0'); + sizes->push_back(size); + text->append_data(reinterpret_cast(data), size); + } + + std::string_view elfio_writer::operator()(const std::byte *data, std::size_t size) + { + auto found = std::find_if(begin(), end(), + [data, size](elfio_writer::entry entry) { + return size == entry.size && std::memcmp(entry.data, data, size) == 0; + }); + if (found == end()) + { + (*this)(".CL" + std::to_string(labels->size()), data, size); + return labels->back(); + } + return found->label; + } + + elfio_writer::iterator elfio_writer::begin() const + { + return elfio_writer::iterator(labels->cbegin(), sizes->cbegin(), + reinterpret_cast(text->get_data())); + } + + elfio_writer::iterator elfio_writer::end() const + { + return elfio_writer::iterator(labels->cend(), sizes->cend()); + } + + std::ptrdiff_t elfio_writer::lookup(const std::string& label) + { + auto found = std::find(labels->cbegin(), labels->cend(), label); + + if (found == labels->cend()) + { + return -1; + } + return std::distance(labels->cbegin(), found); + } + + void riscv32_elf(source::program *ast, const std::filesystem::path& out_file) + { ELFIO::elfio writer; - const ELFIO::Elf_Word instructions_size = _visitor->instructions.size() * sizeof(instruction); writer.create(ELFIO::ELFCLASS32, ELFIO::ELFDATA2LSB); @@ -23,8 +104,6 @@ namespace elna::riscv text_sec->set_type(ELFIO::SHT_PROGBITS); text_sec->set_flags(ELFIO::SHF_ALLOC | ELFIO::SHF_EXECINSTR); text_sec->set_addr_align(0x1); - text_sec->set_data(reinterpret_cast(_visitor->instructions.data()), - instructions_size); // Create string table section ELFIO::section* str_sec = writer.sections.add(".strtab"); @@ -56,32 +135,40 @@ namespace elna::riscv ro_sec->set_flags(ELFIO::SHF_ALLOC); ro_sec->set_addr_align(0x4); + elfio_writer text_writer{ text_sec }; + elfio_writer read_only_writer{ ro_sec }; + visitor _visitor{ text_writer, read_only_writer }; + _visitor.visit(ast); + // Create symbol relocation table writers ELFIO::symbol_section_accessor syma(writer, sym_sec); ELFIO::relocation_section_accessor rela(writer, rel_sec); ELFIO::Elf_Word digit_symbol; - for (auto read_only_text : _visitor->read_only) + for (auto symbol : read_only_writer) { - ro_sec->append_data(read_only_text.second.data(), read_only_text.second.size()); - - syma.add_symbol(stra, read_only_text.first.c_str(), 0x00000000, - read_only_text.first.size() + 1, ELFIO::STB_LOCAL, ELFIO::STT_NOTYPE, 0, ro_sec->get_index()); + syma.add_symbol(stra, symbol.label.data(), 0x00000000, + symbol.label.size(), ELFIO::STB_LOCAL, ELFIO::STT_NOTYPE, 0, ro_sec->get_index()); } ELFIO::Elf_Word printf_symbol = syma.add_symbol(stra, "printf", 0x00000000, 0, ELFIO::STB_GLOBAL, ELFIO::STT_NOTYPE, 0, ELFIO::SHN_UNDEF); - for (auto& reference : _visitor->references) + for (auto symbol : text_writer) + { + syma.add_symbol(stra, symbol.label.data(), 0x00000000, symbol.label.size(), + ELFIO::STB_GLOBAL, ELFIO::STT_FUNC, 0, text_sec->get_index()); + } + for (auto& reference : _visitor.references) { switch (reference.target) { case address_t::high20: - digit_symbol = _visitor->read_only.lookup(reference.name) + 1; + digit_symbol = read_only_writer.lookup(reference.name) + 1; rela.add_entry(reference.offset, digit_symbol, 26 /* ELFIO::R_RISCV_HI20 */); rela.add_entry(reference.offset, digit_symbol, 51 /* ELFIO::R_RISCV_RELAX */); break; case address_t::lower12i: - digit_symbol = _visitor->read_only.lookup(reference.name) + 1; + digit_symbol = read_only_writer.lookup(reference.name) + 1; rela.add_entry(reference.offset, digit_symbol, 27 /* ELFIO::R_RISCV_LO12_I */); rela.add_entry(reference.offset, digit_symbol, 51 /* ELFIO::R_RISCV_RELAX */); break; @@ -91,8 +178,6 @@ namespace elna::riscv break; } } - syma.add_symbol(stra, "main", 0x00000000, instructions_size, - ELFIO::STB_GLOBAL, ELFIO::STT_FUNC, 0, text_sec->get_index()); // Create ELF object file writer.save(out_file); diff --git a/include/elna/backend/riscv.hpp b/include/elna/backend/riscv.hpp index 7083895..80ff833 100644 --- a/include/elna/backend/riscv.hpp +++ b/include/elna/backend/riscv.hpp @@ -1,8 +1,7 @@ #pragma once -#include #include -#include +#include #include "elna/source/parser.hpp" namespace elna::riscv @@ -159,169 +158,20 @@ namespace elna::riscv std::uint32_t representation{ 0 }; }; - /** - * Assigns sequentially numbered labels to text strings. - */ - template - struct read_only_table - { - private: - constexpr static const char get_prefix[] = { prefix... }; - constexpr static const std::size_t prefix_length = sizeof(get_prefix) / sizeof(char); - - public: - /** - * An iterator over label and string pairs. - */ - struct const_iterator - { - using iterator_category = std::forward_iterator_tag; - using difference_type = ptrdiff_t; - using value_type = std::pair; - using pointer = const value_type *; - using reference = const value_type&; - - reference operator*() const noexcept - { - return payload; - } - - pointer operator->() const noexcept - { - return &payload; - } - - const_iterator& operator++() - { - ++index; - ++iterator; - this->payload = std::pair(label(), *iterator); - - return *this; - } - - const_iterator& operator++(int) - { - auto tmp = *this; - ++(*this); - return *this; - } - - bool operator==(const const_iterator& that) const - { - return this->index == that.index; - } - - bool operator!=(const const_iterator& that) const - { - return !(*this == that); - } - - private: - std::vector::const_iterator iterator; - std::size_t index; - value_type payload; - - const_iterator(std::size_t index, std::vector::const_iterator iterator) - : iterator(iterator), index(index), payload({ label(), *iterator }) - { - } - - const_iterator(std::size_t index, std::vector::const_iterator iterator, - std::string_view value) - : iterator(iterator), index(index), payload({ get_prefix, value }) - { - } - - std::string label() const - { - return get_prefix + std::to_string(this->index); - } - - friend read_only_table; - }; - - const_iterator begin() - { - if (payload.empty()) - { - return end(); - } - else - { - return read_only_table::const_iterator(0, payload.cbegin()); - } - } - - const_iterator end() const - { - return read_only_table::const_iterator(size(), payload.cend(), ""); - } - - std::size_t size() const - { - return payload.size(); - } - - /** - * Looks up \a needle in the string storage and returns a label for it - * or create a new one. - * - * \param needle A string to search for. - * \return Label name. - */ - const_iterator label(std::string_view needle) - { - auto format_string = std::find(this->payload.cbegin(), this->payload.cend(), needle); - - if (format_string == this->payload.cend()) - { - format_string = this->payload.emplace(format_string, needle); - } - auto read_only_index = std::distance(this->payload.cbegin(), format_string); - - return read_only_table::const_iterator(read_only_index, format_string); - } - - /** - * Searches the content by label and returns its index or -1 when the - * label does not exist. - * - * \param needle Label name. - * \return Data index. - */ - std::ptrdiff_t lookup(std::string_view needle) - { - if (needle.size() <= prefix_length) - { - return -1; - } - auto needle_middle = needle.cbegin() + prefix_length; - auto needle_prefix = std::string_view(needle.cbegin(), prefix_length); - - std::size_t counter; - auto [position, char_error] = std::from_chars(needle_middle, needle.cend(), counter); - if (char_error != std::errc{} || position != needle.cend() - || needle_prefix != get_prefix || counter >= size()) - { - return -1; - } - return counter; - } - - private: - std::vector payload; - }; - class visitor final : public source::parser_visitor { + std::function write_text; + std::function write_read_only; + public: std::vector instructions; bool register_in_use{ true }; std::uint32_t variable_counter = 1; std::vector references; std::shared_ptr table; - read_only_table<'.', 'C', 'L'> read_only; + + visitor(std::function write_text, + std::function write_read_only); virtual void visit(source::declaration *declaration) override; virtual void visit(source::definition *definition) override; @@ -332,6 +182,7 @@ namespace elna::riscv virtual void visit(source::if_statement *statement) override; virtual void visit(source::while_statement *statement) override; virtual void visit(source::block *block) override; + virtual void visit(source::program *program) override; virtual void visit(source::variable_expression *variable) override; virtual void visit(source::binary_expression *expression) override; virtual void visit(source::integer_literal *number) override; diff --git a/include/elna/backend/target.hpp b/include/elna/backend/target.hpp index e2be438..72f60ac 100644 --- a/include/elna/backend/target.hpp +++ b/include/elna/backend/target.hpp @@ -1,7 +1,80 @@ #include "elna/source/parser.hpp" #include +#include namespace elna::riscv { - void riscv32_elf(source::block *ast, const std::filesystem::path& out_file); + struct elfio_writer + { + struct entry + { + std::string_view label; + const std::byte *data{ nullptr }; + std::size_t size{ 0 }; + }; + + /** + * An iterator over label and string pairs. + */ + struct iterator + { + using iterator_category = std::forward_iterator_tag; + using difference_type = ptrdiff_t; + using value_type = entry; + using pointer = const value_type *; + using reference = const value_type&; + + reference operator*() const noexcept; + pointer operator->() const noexcept; + iterator& operator++(); + iterator& operator++(int); + bool operator==(const iterator& that) const; + bool operator!=(const iterator& that) const; + + private: + std::vector::const_iterator labels; + std::vector::const_iterator sizes; + value_type payload; + + iterator(std::vector::const_iterator labels, std::vector::const_iterator sizes, + const std::byte *data) + : labels(labels), sizes(sizes) + { + if (data != nullptr) + { + payload = { *this->labels, data, *this->sizes}; + } + } + + iterator(std::vector::const_iterator labels, std::vector::const_iterator sizes) + : labels(labels), sizes(sizes), payload{} + { + } + + friend elfio_writer; + }; + explicit elfio_writer(ELFIO::section *text); + + void operator()(const std::string& label, const std::byte *data, std::size_t size); + std::string_view operator()(const std::byte *data, std::size_t size); + + iterator begin() const; + iterator end() const; + + /** + * Searches the content by label and returns its index or -1 when the + * label does not exist. + * + * \param needle Label name. + * \return Data index. + */ + std::ptrdiff_t lookup(const std::string& label); + + private: + std::shared_ptr> labels; + std::shared_ptr> sizes; + ELFIO::section *text; + }; + + void riscv32_elf(source::program *ast, const std::filesystem::path& out_file); } diff --git a/include/elna/source/parser.hpp b/include/elna/source/parser.hpp index 0b12552..5697191 100644 --- a/include/elna/source/parser.hpp +++ b/include/elna/source/parser.hpp @@ -23,6 +23,7 @@ namespace elna::source class if_statement; class while_statement; class block; + class program; class binary_expression; class variable_expression; class integer_literal; @@ -39,6 +40,7 @@ namespace elna::source virtual void visit(if_statement *) = 0; virtual void visit(while_statement *) = 0; virtual void visit(block *) = 0; + virtual void visit(program *) = 0; virtual void visit(binary_expression *) = 0; virtual void visit(variable_expression *) = 0; virtual void visit(integer_literal *) = 0; @@ -56,6 +58,7 @@ namespace elna::source virtual void visit(if_statement *) override; virtual void visit(while_statement *) override; virtual void visit(block *block) override; + virtual void visit(program *program) override; virtual void visit(binary_expression *expression) override; virtual void visit(variable_expression *variable) override; virtual void visit(integer_literal *number) override; @@ -186,9 +189,6 @@ namespace elna::source statement& body(); }; - /** - * Block. - */ class block : public node { std::unique_ptr m_body; @@ -208,6 +208,15 @@ namespace elna::source std::shared_ptr table(); }; + class program : public block + { + public: + program(std::vector>&& definitions, + std::vector>&& declarations, + std::unique_ptr&& body); + virtual void accept(parser_visitor *visitor) override; + }; + class integer_literal : public expression { std::int32_t m_number; @@ -286,7 +295,7 @@ namespace elna::source * * \return Parsed program or nothing if an error occurred. */ - std::unique_ptr parse(); + std::unique_ptr parse(); /** * Gets produced errors. diff --git a/source/parser.cpp b/source/parser.cpp index 673de5b..ebf089e 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -60,6 +60,11 @@ namespace elna::source block->body().accept(this); } + void empty_visitor::visit(program *program) + { + visit(dynamic_cast(program)); + } + void empty_visitor::visit(binary_expression *expression) { expression->lhs().accept(this); @@ -154,6 +159,18 @@ namespace elna::source return m_table; } + program::program(std::vector>&& definitions, + std::vector>&& declarations, + std::unique_ptr&& body) + : block(std::move(definitions), std::move(declarations), std::move(body)) + { + } + + void program::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + integer_literal::integer_literal(const std::int32_t value) : m_number(value) { @@ -352,9 +369,18 @@ namespace elna::source { } - std::unique_ptr parser::parse() + std::unique_ptr parser::parse() { - return parse_block(); + auto definitions = parse_definitions(); + auto declarations = parse_declarations(); + auto parsed_statement = parse_statement(); + + if (parsed_statement == nullptr) + { + return nullptr; + } + return std::make_unique(std::move(definitions), + std::move(declarations), std::move(parsed_statement)); } const std::list>& parser::errors() const noexcept