From f84fd91426f0d996bd786ad05c8c5fb8f207108d Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Wed, 6 Mar 2024 07:51:56 +0100 Subject: [PATCH] Get rid of the raw pointers in the parser --- CMakeLists.txt | 4 +- include/elna/ir.hpp | 18 +- include/elna/lexer.hpp | 115 ------------- include/elna/parser.hpp | 125 -------------- include/elna/riscv.hpp | 25 ++- include/elna/source/lexer.hpp | 116 +++++++++++++ include/elna/source/parser.hpp | 153 +++++++++++++++++ source/cl.cpp | 6 +- source/ir.cpp | 22 +-- source/lexer.cpp | 195 ++++++++++++---------- source/parser.cpp | 294 +++++++++++++++++++-------------- source/riscv.cpp | 90 ++++------ 12 files changed, 601 insertions(+), 562 deletions(-) delete mode 100644 include/elna/lexer.hpp delete mode 100644 include/elna/parser.hpp create mode 100644 include/elna/source/lexer.hpp create mode 100644 include/elna/source/parser.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 961adc9..22d3555 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,10 +19,10 @@ add_executable(elnsh shell/main.cpp target_include_directories(elnsh PRIVATE include) add_executable(elna source/main.cpp - source/lexer.cpp include/elna/lexer.hpp + source/lexer.cpp include/elna/source/lexer.hpp + source/parser.cpp include/elna/source/parser.hpp source/result.cpp include/elna/result.hpp source/riscv.cpp include/elna/riscv.hpp - source/parser.cpp include/elna/parser.hpp source/ir.cpp include/elna/ir.hpp source/cl.cpp include/elna/cl.hpp ) diff --git a/include/elna/ir.hpp b/include/elna/ir.hpp index b92f2e0..ae4269c 100644 --- a/include/elna/ir.hpp +++ b/include/elna/ir.hpp @@ -1,18 +1,16 @@ #pragma once -#include "elna/parser.hpp" +#include "elna/source/parser.hpp" namespace elna { - class TransformVisitor final : public ParserVisitor + class TransformVisitor final : public source::ParserVisitor { - void visit(Node *node) override; - void visit(Definition *definition) override; - void visit(BangStatement *statement) override; - void visit(Block *block) override; - void visit(Expression *expression) override; - void visit(Number *number) override; - void visit(Variable *variable) override; - void visit(BinaryExpression *binaryExpression) override; + void visit(source::definition *definition) override; + void visit(source::bang_statement *statement) override; + void visit(source::block *block) override; + void visit(source::integer_literal *number) override; + void visit(source::variable *variable) override; + void visit(source::binary_expression *binaryExpression) override; }; } diff --git a/include/elna/lexer.hpp b/include/elna/lexer.hpp deleted file mode 100644 index a6383c9..0000000 --- a/include/elna/lexer.hpp +++ /dev/null @@ -1,115 +0,0 @@ -#pragma once - -#include -#include - -#include "elna/result.hpp" - -namespace elna -{ -namespace lex -{ - /** - * Range over the source text that keeps track of the current position. - */ - struct source - { - class const_iterator - { - std::string::const_iterator m_buffer; - elna::source::position m_position; - - const_iterator(std::string::const_iterator buffer, - const elna::source::position start_position = elna::source::position()); - - public: - using iterator_category = std::forward_iterator_tag; - using difference_type = ptrdiff_t; - using value_type = char; - using pointer = const value_type *; - using reference = const value_type&; - - const elna::source::position& position() const noexcept; - - reference operator*() const noexcept; - pointer operator->() const noexcept; - const_iterator& operator++(); - const_iterator& operator++(int); - bool operator==(const const_iterator& that) const noexcept; - bool operator!=(const const_iterator& that) const noexcept; - - friend source; - }; - - source(const std::string& buffer); - const_iterator begin() const; - const_iterator end() const; - - private: - const std::string m_buffer; - }; - - /** - * Union type representing a single token. - */ - struct Token - { - /** - * Token type. - */ - enum class type : std::uint16_t - { - number = 0, - term_operator = 1, - let = 2, - identifier = 3, - equals = 4, - var = 5, - semicolon = 6, - left_paren = 7, - right_paren = 8, - bang = 9, - dot = 10, - comma = 11, - factor_operator = 12, - }; - - /** - * Type of the token value. - */ - union Value - { - std::int32_t number; - const char *identifier; - }; - - Token(type of, elna::source::position position); - Token(type of, std::int32_t value, elna::source::position position); - Token(type of, const char *value, elna::source::position position); - Token(const Token& that); - Token(Token&& that); - ~Token(); - - Token& operator=(const Token& that); - Token& operator=(Token&& that); - - type of() const noexcept; - const char *identifier() const noexcept; - std::int32_t number() const noexcept; - const elna::source::position& position() const noexcept; - - private: - type m_type; - Value m_value; - elna::source::position m_position; - }; - - /** - * Split the source into tokens. - * - * \param buffer Source text. - * \return Tokens or error. - */ - elna::source::result> lex(const char *buffer); -} -} diff --git a/include/elna/parser.hpp b/include/elna/parser.hpp deleted file mode 100644 index 0ee369e..0000000 --- a/include/elna/parser.hpp +++ /dev/null @@ -1,125 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace elna -{ - enum class BinaryOperator - { - sum, - subtraction, - multiplication, - division - }; - - class Node; - class Definition; - class BangStatement; - class Block; - class Expression; - class BinaryExpression; - class Variable; - class Number; - - struct ParserVisitor - { - virtual void visit(Node *) = 0; - virtual void visit(Definition *) = 0; - virtual void visit(BangStatement *) = 0; - virtual void visit(Block *) = 0; - virtual void visit(Expression *) = 0; - virtual void visit(BinaryExpression *) = 0; - virtual void visit(Variable *) = 0; - virtual void visit(Number *) = 0; - }; - - /** - * AST node. - */ - class Node - { - public: - virtual void accept(ParserVisitor *) = 0; - }; - - class Statement : public Node - { - }; - - /** - * Constant definition. - */ - class Definition : public Node - { - public: - Number *number; - const char *identifier; - - virtual void accept(ParserVisitor *visitor) override; - }; - - class BangStatement : public Statement - { - public: - Expression *expression; - - virtual void accept(ParserVisitor *visitor) override; - }; - - /** - * Block. - */ - class Block : public Node - { - public: - Definition** definitions{ nullptr }; - size_t definitionsLength{ 0 }; - Statement *statement{ nullptr }; - - virtual void accept(ParserVisitor *visitor) override; - }; - - class Expression : public Node - { - public: - virtual void accept(ParserVisitor *visitor) override; - }; - - class Number : public Expression - { - public: - std::int32_t value; - - virtual void accept(ParserVisitor *visitor) override; - }; - - class Variable : public Expression - { - public: - const char *identifier; - - virtual void accept(ParserVisitor *visitor) override; - }; - - class BinaryExpression : public Expression - { - public: - Expression *lhs, *rhs; - BinaryOperator _operator; - - BinaryExpression(Expression *lhs, Expression *rhs, unsigned char); - - virtual void accept(ParserVisitor *visitor) override; - }; - - Expression *parseFactor(lex::Token **tokens, std::size_t *length); - Expression *parseTerm(lex::Token **tokens, std::size_t *length); - Expression *parseExpression(lex::Token **tokens, std::size_t *length); - Definition *parseDefinition(lex::Token **tokens, std::size_t *length); - Statement *parseStatement(lex::Token **tokens, std::size_t *length); - Definition **parseDefinitions(lex::Token **tokens, std::size_t *length, std::size_t *resultLength); - Block *parseBlock(lex::Token **tokens, std::size_t *length); - Block *parse(lex::Token *tokenStream, std::size_t length); -} diff --git a/include/elna/riscv.hpp b/include/elna/riscv.hpp index a55723d..2d8cd2e 100644 --- a/include/elna/riscv.hpp +++ b/include/elna/riscv.hpp @@ -1,7 +1,8 @@ #pragma once #include -#include "elna/parser.hpp" +#include +#include "elna/source/parser.hpp" #include "elna/result.hpp" namespace elna @@ -141,7 +142,7 @@ namespace elna std::uint32_t instruction{ 0 }; }; - class RiscVVisitor : public ParserVisitor + class RiscVVisitor : public source::ParserVisitor { public: Instruction *instructions; @@ -149,19 +150,15 @@ namespace elna bool registerInUse{ true }; std::uint32_t variableCounter = 1; Reference references[3]; - std::int32_t *constValues{ nullptr }; - const char **constNames{ nullptr }; - std::size_t constCount{ 0 }; + std::unordered_map constants; - virtual void visit(Node *) override; - virtual void visit(Definition *definition) override; - virtual void visit(BangStatement *statement) override; - virtual void visit(Block *block) override; - virtual void visit(Expression *operand) override; - virtual void visit(Variable *variable) override; - virtual void visit(Number *number) override; - virtual void visit(BinaryExpression *expression) override; + virtual void visit(source::definition *definition) override; + virtual void visit(source::bang_statement *statement) override; + virtual void visit(source::block *block) override; + virtual void visit(source::variable *variable) override; + virtual void visit(source::integer_literal *number) override; + virtual void visit(source::binary_expression *expression) override; }; - Symbol writeNext(Block *ast); + Symbol writeNext(source::block *ast); } diff --git a/include/elna/source/lexer.hpp b/include/elna/source/lexer.hpp new file mode 100644 index 0000000..3f08770 --- /dev/null +++ b/include/elna/source/lexer.hpp @@ -0,0 +1,116 @@ +#pragma once + +#include +#include + +#include "elna/result.hpp" + +namespace elna +{ +namespace source +{ + /** + * Range over the source text that keeps track of the current position. + */ + class text_iterator + { + std::string::const_iterator m_buffer; + elna::source::position m_position; + + text_iterator(std::string::const_iterator buffer, + const elna::source::position start_position = elna::source::position()); + + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = ptrdiff_t; + using value_type = char; + using pointer = const value_type *; + using reference = const value_type&; + + const elna::source::position& position() const noexcept; + + reference operator*() const noexcept; + pointer operator->() const noexcept; + text_iterator& operator++(); + text_iterator& operator++(int); + bool operator==(const text_iterator& that) const noexcept; + bool operator!=(const text_iterator& that) const noexcept; + + friend std::pair text_iterators(const std::string& buffer); + }; + + std::pair + text_iterators(const std::string &buffer); + + /** + * Union type representing a single token. + */ + struct token + { + /** + * Token type. + */ + enum class type : std::uint16_t + { + number = 0, + term_operator = 1, + let = 2, + identifier = 3, + equals = 4, + var = 5, + semicolon = 6, + left_paren = 7, + right_paren = 8, + bang = 9, + dot = 10, + comma = 11, + factor_operator = 12, + }; + + /** + * Type of the token value. + */ + union value + { + value(); + value(std::int32_t value); + value(const std::string& value); + ~value(); + + std::nullptr_t nil; + std::int32_t number; + std::string identifier; + }; + + token(type of, elna::source::position position); + token(type of, std::int32_t value, const elna::source::position position); + token(type of, const std::string& value, const elna::source::position position); + token(const token& that); + token(token&& that); + ~token(); + + token& operator=(const token& that); + token& operator=(token&& that); + + type of() const noexcept; + const std::string& identifier() const; + std::int32_t number() const; + const elna::source::position& position() const noexcept; + + private: + type m_type; + value m_value; + elna::source::position m_position; + + bool has_identifier() const noexcept; + }; + + /** + * Split the source into tokens. + * + * \param buffer Source text. + * \return Tokens or error. + */ + elna::source::result> lex(const std::string& buffer); +} +} diff --git a/include/elna/source/parser.hpp b/include/elna/source/parser.hpp new file mode 100644 index 0000000..975e0e9 --- /dev/null +++ b/include/elna/source/parser.hpp @@ -0,0 +1,153 @@ +#pragma once + +#include +#include +#include + +namespace elna +{ +namespace source +{ + enum class binary_operator + { + sum, + subtraction, + multiplication, + division + }; + + class definition; + class bang_statement; + class block; + class binary_expression; + class variable; + class integer_literal; + + struct ParserVisitor + { + virtual void visit(definition *) = 0; + virtual void visit(bang_statement *) = 0; + virtual void visit(block *) = 0; + virtual void visit(binary_expression *) = 0; + virtual void visit(variable *) = 0; + virtual void visit(integer_literal *) = 0; + }; + + /** + * AST node. + */ + class node + { + public: + virtual void accept(ParserVisitor *) = 0; + }; + + class statement : public node + { + }; + + class expression : public node + { + }; + + /** + * Constant definition. + */ + class definition : public node + { + std::string m_identifier; + std::unique_ptr m_body; + + public: + definition(std::string&& identifier, std::unique_ptr&& body); + virtual void accept(ParserVisitor *visitor) override; + + std::string& identifier() noexcept; + integer_literal& body(); + }; + + class bang_statement : public statement + { + std::unique_ptr m_body; + + public: + bang_statement(std::unique_ptr&& body); + virtual void accept(ParserVisitor *visitor) override; + + expression& body(); + }; + + /** + * Block. + */ + class block : public node + { + std::unique_ptr m_body; + std::vector> m_definitions; + + public: + block(std::vector>&& definitions, std::unique_ptr&& body); + virtual void accept(ParserVisitor *visitor) override; + + statement& body(); + std::vector>& definitions() noexcept; + }; + + class integer_literal : public expression + { + std::int32_t m_number; + + public: + integer_literal(const std::int32_t value); + virtual void accept(ParserVisitor *visitor) override; + + std::int32_t number() const noexcept; + }; + + class variable : public expression + { + std::string m_name; + + public: + variable(const std::string& name); + virtual void accept(ParserVisitor *visitor) override; + + const std::string& name() const noexcept; + }; + + class binary_expression : public expression + { + std::unique_ptr m_lhs; + std::unique_ptr m_rhs; + binary_operator m_operator; + + public: + binary_expression(std::unique_ptr&& lhs, + std::unique_ptr&& rhs, const unsigned char operation); + + virtual void accept(ParserVisitor *visitor) override; + expression& lhs(); + expression& rhs(); + binary_operator operation() const noexcept; + }; + + struct parser + { + parser(const std::vector& tokens); + + std::unique_ptr parse(); + + private: + std::unique_ptr parse_factor(); + std::unique_ptr parse_term(); + std::unique_ptr parse_expression(); + std::unique_ptr parse_definition(); + std::unique_ptr parse_bang_statement(); + std::vector> parse_definitions(); + std::unique_ptr parse_block(); + + std::vector::const_iterator tokens; + std::vector::const_iterator end; + }; +} +} diff --git a/source/cl.cpp b/source/cl.cpp index b3090a8..7ed6be5 100644 --- a/source/cl.cpp +++ b/source/cl.cpp @@ -41,7 +41,7 @@ namespace elna return 3; } size_t tokensCount{ 0 }; - auto lex_result = lex::lex(sourceText); + auto lex_result = source::lex(sourceText); free(sourceText); if (lex_result.has_errors()) { @@ -51,12 +51,12 @@ namespace elna } return 1; } - auto ast = parse(lex_result.success().data(), tokensCount); + auto ast = source::parser(lex_result.success()).parse(); if (ast == nullptr) { return 2; } - auto program = writeNext(ast); + auto program = writeNext(ast.get()); // Create code section ELFIO::section* text_sec = writer.sections.add(".text"); diff --git a/source/ir.cpp b/source/ir.cpp index e209866..bd8c622 100644 --- a/source/ir.cpp +++ b/source/ir.cpp @@ -4,42 +4,32 @@ namespace elna { - void TransformVisitor::visit(Node *node) + void TransformVisitor::visit(source::definition *definition) { assert(false); } - void TransformVisitor::visit(Definition *definition) + void TransformVisitor::visit(source::bang_statement *statement) { assert(false); } - void TransformVisitor::visit(BangStatement *statement) + void TransformVisitor::visit(source::block *block) { assert(false); } - void TransformVisitor::visit(Block *block) + void TransformVisitor::visit(source::integer_literal *number) { assert(false); } - void TransformVisitor::visit(Expression *expression) + void TransformVisitor::visit(source::variable *variable) { assert(false); } - void TransformVisitor::visit(Number *number) - { - assert(false); - } - - void TransformVisitor::visit(Variable *variable) - { - assert(false); - } - - void TransformVisitor::visit(BinaryExpression *binaryExpression) + void TransformVisitor::visit(source::binary_expression *binaryExpression) { assert(false); } diff --git a/source/lexer.cpp b/source/lexer.cpp index 732d63a..f39f49b 100644 --- a/source/lexer.cpp +++ b/source/lexer.cpp @@ -1,54 +1,42 @@ -#include "elna/lexer.hpp" - -#include +#include "elna/source/lexer.hpp" +#include namespace elna { -namespace lex +namespace source { using source_position = elna::source::position; using source_error = elna::source::error; - using source_result = elna::source::result>; + using source_result = elna::source::result>; - source::source(const std::string& buffer) - : m_buffer(buffer) + std::pair text_iterators(const std::string &buffer) { + return std::make_pair<>(text_iterator(std::cbegin(buffer)), + text_iterator(std::cend(buffer), position{0, 0})); } - source::const_iterator source::begin() const - { - return source::const_iterator(std::cbegin(m_buffer)); - } - - source::const_iterator source::end() const - { - source_position end_position{ 0, 0 }; - - return source::const_iterator(std::cend(m_buffer), end_position); - } - - source::const_iterator::const_iterator(std::string::const_iterator buffer, + text_iterator::text_iterator(std::string::const_iterator buffer, const source_position start_position) : m_buffer(buffer), m_position(start_position) { } - const source_position& source::const_iterator::position() const noexcept + const source_position& text_iterator::position() const noexcept { return this->m_position; } - source::const_iterator::reference source::const_iterator::operator*() const noexcept + text_iterator::reference text_iterator::operator*() const noexcept { return *m_buffer; } - source::const_iterator::pointer source::const_iterator::operator->() const noexcept + text_iterator::pointer text_iterator::operator->() const noexcept { return m_buffer.base(); } - source::const_iterator& source::const_iterator::operator++() + text_iterator& text_iterator::operator++() { if (*this->m_buffer == '\n') { @@ -64,129 +52,156 @@ namespace lex return *this; } - source::const_iterator& source::const_iterator::operator++(int) + text_iterator& text_iterator::operator++(int) { auto tmp = *this; ++(*this); return *this; } - bool source::const_iterator::operator==(const source::const_iterator& that) const noexcept + bool text_iterator::operator==(const text_iterator& that) const noexcept { return this->m_buffer == that.m_buffer; } - bool source::const_iterator::operator!=(const source::const_iterator& that) const noexcept + bool text_iterator::operator!=(const text_iterator& that) const noexcept { return !(*this == that); } - Token::Token(const type of, const char *value, source_position position) - : m_type(of), m_position(position) + token::value::value() + : nil(nullptr) { - std::size_t value_length = strlen(value); - char *buffer = reinterpret_cast(malloc(value_length + 1)); - - std::memcpy(buffer, value, value_length); - buffer[value_length] = 0; - - m_value.identifier = buffer; } - Token::Token(const type of, std::int32_t number, source_position position) - : m_type(of), m_position(position) + token::value::value(std::int32_t value) + : number(value) { - m_value.number = number; } - Token::Token(const type of, source_position position) + token::value::value(const std::string& value) + : identifier(value) + { + } + + token::value::~value() + { + } + + token::token(const type of, const std::string& value, const source_position position) + : m_type(of), m_value(value), m_position(position) + { + } + + token::token(const type of, std::int32_t number, const source_position position) + : m_type(of), m_value(number), m_position(position) + { + } + + token::token(const type of, source_position position) : m_type(of), m_position(position) { } - Token::Token(const Token& that) + token::token(const token& that) : m_type(that.of()), m_position(that.position()) { *this = that; } - Token::Token(Token&& that) + token::token(token&& that) : m_type(that.of()), m_position(that.position()) { *this = std::move(that); } - Token::~Token() + token::~token() { if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator) { - std::free(const_cast(m_value.identifier)); + m_value.identifier.~basic_string(); } } - Token& Token::operator=(const Token& that) + token& token::operator=(const token& that) { m_type = that.of(); m_position = that.position(); - if (that.of() == type::identifier || that.of() == type::term_operator || m_type == type::factor_operator) - { - std::size_t value_length = strlen(that.identifier()); - char *buffer = reinterpret_cast(malloc(value_length + 1)); - - std::memcpy(buffer, that.identifier(), value_length); - buffer[value_length] = 0; - - m_value.identifier = buffer; - } - else if (that.of() == type::number) - { - m_value.number = that.number(); - } - return *this; - } - - Token& Token::operator=(Token&& that) - { - m_type = that.of(); - m_position = that.position(); - if (that.of() == type::identifier || that.of() == type::term_operator || that.of() == type::factor_operator) + if (that.has_identifier()) { m_value.identifier = that.identifier(); - that.m_value.identifier = nullptr; } else if (that.of() == type::number) { m_value.number = that.number(); } + else + { + m_value.nil = nullptr; + } return *this; } - Token::type Token::of() const noexcept + token& token::operator=(token&& that) + { + m_type = that.of(); + m_position = that.position(); + if (that.has_identifier()) + { + m_value.identifier = std::move(that.identifier()); + } + else if (that.of() == type::number) + { + m_value.number = that.number(); + } + else + { + m_value.nil = nullptr; + } + return *this; + } + + token::type token::of() const noexcept { return m_type; } - const char *Token::identifier() const noexcept + const std::string& token::identifier() const { + if (!has_identifier()) + { + throw std::bad_variant_access(); + } return m_value.identifier; } - std::int32_t Token::number() const noexcept + std::int32_t token::number() const { + if (of() != type::number) + { + throw std::bad_variant_access(); + } return m_value.number; } - const source_position& Token::position() const noexcept + const source_position& token::position() const noexcept { return m_position; } - source_result lex(const char *buffer) + bool token::has_identifier() const noexcept { - std::vector tokens; - source input{ buffer }; + return of() == type::identifier + || of() == type::term_operator + || of() == type::factor_operator; + } - for (auto iterator = input.begin(); iterator != input.end();) + source_result lex(const std::string& buffer) + { + std::vector tokens; + auto [iterator, text_end] = text_iterators(buffer); + + while (iterator != text_end) { if (*iterator == ' ' || *iterator == '\n') { @@ -194,59 +209,59 @@ namespace lex else if (std::isdigit(*iterator)) { tokens.emplace_back( - Token::type::number, + token::type::number, static_cast(*iterator - '0'), iterator.position() ); } else if (*iterator == '=') { - tokens.emplace_back(Token::type::equals, iterator.position()); + tokens.emplace_back(token::type::equals, iterator.position()); } else if (*iterator == '(') { - tokens.emplace_back(Token::type::left_paren, iterator.position()); + tokens.emplace_back(token::type::left_paren, iterator.position()); } else if (*iterator == ')') { - tokens.emplace_back(Token::type::right_paren, iterator.position()); + tokens.emplace_back(token::type::right_paren, iterator.position()); } else if (*iterator == ';') { - tokens.emplace_back(Token::type::semicolon, iterator.position()); + tokens.emplace_back(token::type::semicolon, iterator.position()); } else if (*iterator == ',') { - tokens.emplace_back(Token::type::comma, iterator.position()); + tokens.emplace_back(token::type::comma, iterator.position()); } else if (*iterator == '!') { - tokens.emplace_back(Token::type::bang, iterator.position()); + tokens.emplace_back(token::type::bang, iterator.position()); } else if (*iterator == '.') { - tokens.emplace_back(Token::type::dot, iterator.position()); + tokens.emplace_back(token::type::dot, iterator.position()); } else if (std::isalpha(*iterator)) { std::string word; auto i = iterator; - while (i != input.end() && std::isalpha(*i)) + while (i != text_end && std::isalpha(*i)) { word.push_back(*i); ++i; } if (word == "const") { - tokens.emplace_back(Token::type::let, iterator.position()); + tokens.emplace_back(token::type::let, iterator.position()); } else if (word == "var") { - tokens.emplace_back(Token::type::var, iterator.position()); + tokens.emplace_back(token::type::var, iterator.position()); } else { - tokens.emplace_back(Token::type::identifier, word.c_str(), iterator.position()); + tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position()); } iterator = i; continue; @@ -255,13 +270,13 @@ namespace lex { std::string _operator{ *iterator }; - tokens.emplace_back(Token::type::term_operator, _operator.c_str(), iterator.position()); + tokens.emplace_back(token::type::term_operator, _operator.c_str(), iterator.position()); } else if (*iterator == '*' || *iterator == '/') { std::string _operator{ *iterator }; - tokens.emplace_back(Token::type::factor_operator, _operator.c_str(), iterator.position()); + tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position()); } else { diff --git a/source/parser.cpp b/source/parser.cpp index 094a405..97dbc43 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -1,254 +1,294 @@ -#include "elna/parser.hpp" +#include "elna/source/parser.hpp" #include namespace elna +{ +namespace source { /** * AST node. */ - void Node::accept(ParserVisitor *) + void node::accept(ParserVisitor *) { } - void Definition::accept(ParserVisitor *visitor) + definition::definition(std::string&& identifier, std::unique_ptr&& body) + : m_identifier(std::move(identifier)), m_body(std::move(body)) + { + } + + void definition::accept(ParserVisitor *visitor) { visitor->visit(this); } - void Block::accept(ParserVisitor *visitor) + std::string& definition::identifier() noexcept + { + return m_identifier; + } + + integer_literal& definition::body() + { + return *m_body; + } + + block::block(std::vector>&& definitions, std::unique_ptr&& body) + : m_definitions(std::move(definitions)), m_body(std::move(body)) + { + } + + void block::accept(ParserVisitor *visitor) { visitor->visit(this); } - void Expression::accept(ParserVisitor *visitor) + statement& block::body() + { + return *m_body; + } + + std::vector>& block::definitions() noexcept + { + return m_definitions; + } + + integer_literal::integer_literal(const std::int32_t value) + : m_number(value) + { + } + + void integer_literal::accept(ParserVisitor *visitor) { visitor->visit(this); } - void Number::accept(ParserVisitor *visitor) + std::int32_t integer_literal::number() const noexcept + { + return m_number; + } + + variable::variable(const std::string& name) + : m_name(name) + { + } + + void variable::accept(ParserVisitor *visitor) { visitor->visit(this); } - void Variable::accept(ParserVisitor *visitor) + const std::string& variable::name() const noexcept { - visitor->visit(this); + return m_name; } - BinaryExpression::BinaryExpression(Expression *lhs, Expression *rhs, unsigned char _operator) + binary_expression::binary_expression(std::unique_ptr&& lhs, + std::unique_ptr&& rhs, const unsigned char operation) + : m_lhs(std::move(lhs)), m_rhs(std::move(rhs)) { - this->lhs = lhs; - this->rhs = rhs; - - switch (_operator) + switch (operation) { case '+': - this->_operator = BinaryOperator::sum; + this->m_operator = binary_operator::sum; break; case '-': - this->_operator = BinaryOperator::subtraction; + this->m_operator = binary_operator::subtraction; break; case '*': - this->_operator = BinaryOperator::multiplication; + this->m_operator = binary_operator::multiplication; break; case '/': - this->_operator = BinaryOperator::division; + this->m_operator = binary_operator::division; break; default: throw std::logic_error("Invalid binary operator"); } } - void BinaryExpression::accept(ParserVisitor *visitor) + void binary_expression::accept(ParserVisitor *visitor) { visitor->visit(this); } - void BangStatement::accept(ParserVisitor *visitor) + expression& binary_expression::lhs() + { + return *m_lhs; + } + + expression& binary_expression::rhs() + { + return *m_rhs; + } + + binary_operator binary_expression::operation() const noexcept + { + return m_operator; + } + + bang_statement::bang_statement(std::unique_ptr&& body) + : m_body(std::move(body)) + { + } + + void bang_statement::accept(ParserVisitor *visitor) { visitor->visit(this); } - Block *parse(lex::Token *tokenStream, std::size_t length) + expression& bang_statement::body() { - return parseBlock(&tokenStream, &length); + return *m_body; } - Expression *parseFactor(lex::Token **tokens, size_t *length) + parser::parser(const std::vector& tokens) + : tokens(tokens.cbegin()), end(tokens.cend()) { - if ((*tokens)[0].of() == lex::Token::type::identifier) - { - auto variable = new Variable(); - variable->identifier = (*tokens)[0].identifier(); - ++(*tokens); - --(*length); - return variable; - } - else if ((*tokens)[0].of() == lex::Token::Token::type::number) - { - auto number = new Number(); - number->value = (*tokens)[0].number(); - ++(*tokens); - --(*length); - return number; - } - else if ((*tokens)[0].of() == lex::Token::type::left_paren) - { - ++(*tokens); - --(*length); + } - auto expression = parseExpression(tokens, length); + std::unique_ptr parser::parse() + { + return parse_block(); + } - ++(*tokens); - --(*length); + std::unique_ptr parser::parse_factor() + { + if (tokens->of() == source::token::type::identifier) + { + auto result = std::make_unique(tokens->identifier()); + ++tokens; + return result; + } + else if (tokens->of() == source::token::token::type::number) + { + auto result = std::make_unique(tokens->number()); + ++tokens; + return result; + } + else if (tokens->of() == source::token::type::left_paren) + { + ++tokens; + + auto expression = parse_expression(); + + ++tokens; return expression; } return nullptr; } - Expression *parseTerm(lex::Token **tokens, size_t *length) + std::unique_ptr parser::parse_term() { - auto lhs = parseFactor(tokens, length); - if (lhs == nullptr || *length == 0 || (*tokens)[0].of() != lex::Token::type::factor_operator) + auto lhs = parse_factor(); + if (lhs == nullptr || tokens == end || tokens->of() != source::token::type::factor_operator) { return lhs; } - auto _operator = (*tokens)[0].identifier()[0]; - ++(*tokens); - --(*length); + auto _operator = tokens->identifier()[0]; + ++tokens; - auto rhs = parseFactor(tokens, length); + auto rhs = parse_factor(); if (rhs != nullptr) { - return new BinaryExpression(lhs, rhs, _operator); + return std::make_unique(std::move(lhs), + std::move(rhs), _operator); } return nullptr; } - Expression *parseExpression(lex::Token **tokens, size_t *length) + std::unique_ptr parser::parse_expression() { - auto term = parseTerm(tokens, length); - if (term == nullptr || *length == 0 || (*tokens)[0].of() != lex::Token::type::term_operator) + auto term = parse_term(); + if (term == nullptr || tokens == end || tokens->of() != source::token::type::term_operator) { return term; } - auto _operator = (*tokens)[0].identifier()[0]; - ++(*tokens); - --(*length); + auto _operator = tokens->identifier()[0]; + ++tokens; - auto expression = parseExpression(tokens, length); + auto rhs = parse_expression(); - if (expression != nullptr) + if (rhs != nullptr) { - return new BinaryExpression(term, expression, _operator); + return std::make_unique(std::move(term), + std::move(rhs), _operator); } return nullptr; } - Definition *parseDefinition(lex::Token **tokens, size_t *length) + std::unique_ptr parser::parse_definition() { - auto definition = new Definition(); - definition->identifier = (*tokens)[0].identifier(); // Copy. + std::string definition_identifier = tokens->identifier(); // Copy. - ++(*tokens); - ++(*tokens); // Skip the equals sign. - *length -= 2; + ++tokens; + ++tokens; // Skip the equals sign. - if ((*tokens)[0].of() == lex::Token::type::number) + if (tokens->of() == source::token::type::number) { - auto number = new Number(); - number->value = (*tokens)[0].number(); - definition->number = number; - ++(*tokens); - --(*length); - return definition; + auto result = std::make_unique(std::move(definition_identifier), + std::make_unique(tokens->number())); + ++tokens; + return result; } return nullptr; } - Statement *parseStatement(lex::Token **tokens, std::size_t *length) + std::unique_ptr parser::parse_bang_statement() { - if ((*tokens)[0].of() == lex::Token::type::bang) + if (tokens->of() == source::token::type::bang) { - ++(*tokens); - --(*length); - auto statement = new BangStatement(); - auto expression = parseExpression(tokens, length); - if (expression != nullptr) + ++tokens; + auto bang_body = parse_expression(); + if (bang_body != nullptr) { - statement->expression = expression; + return std::make_unique(std::move(bang_body)); } - else - { - return nullptr; - } - return statement; } return nullptr; } - Definition **parseDefinitions(lex::Token **tokens, size_t *length, size_t *resultLength) + std::vector> parser::parse_definitions() { - ++(*tokens); // Skip const. - --(*length); + ++tokens; // Skip const. - Definition **definitions; - *resultLength = 0; + std::vector> definitions; - while (*length != 0) + while (tokens != end) { - auto definition = parseDefinition(tokens, length); - if (definition == nullptr) + auto parsed_definition = parse_definition(); + if (parsed_definition == nullptr) { - return nullptr; + return definitions; } - definitions = reinterpret_cast( - realloc(definitions, (*resultLength + 1) * sizeof(Definition*))); - definitions[(*resultLength)++] = definition; + definitions.push_back(std::move(parsed_definition)); - if ((*tokens)[0].of() == lex::Token::type::semicolon) + if (tokens->of() == source::token::type::semicolon) { break; } - if ((*tokens)[0].of() == lex::Token::type::comma) + if (tokens->of() == source::token::type::comma) { - ++(*tokens); - --(*length); + ++tokens; } } return definitions; } - Block *parseBlock(lex::Token **tokens, std::size_t *length) + std::unique_ptr parser::parse_block() { - auto block = new Block(); - if ((*tokens)[0].of() == lex::Token::type::let) + std::vector> definitions; + if (tokens->of() == source::token::type::let) { - size_t length_ = 0; - auto constDefinitions = parseDefinitions(tokens, length, &length_); - if (constDefinitions != nullptr) - { - block->definitionsLength = length_; - block->definitions = constDefinitions; - } - else - { - return nullptr; - } - ++(*tokens); - --(*length); + definitions = parse_definitions(); + ++tokens; } - auto statement = parseStatement(tokens, length); - if (statement != nullptr) - { - block->statement = statement; - } - else + auto parsed_statement = parse_bang_statement(); + if (parsed_statement == nullptr) { return nullptr; } - return block; + return std::make_unique(std::move(definitions), std::move(parsed_statement)); } } +} diff --git a/source/riscv.cpp b/source/riscv.cpp index c2bf3cc..13b79b1 100644 --- a/source/riscv.cpp +++ b/source/riscv.cpp @@ -1,4 +1,3 @@ -#include "elna/parser.hpp" #include "elna/riscv.hpp" #include #include @@ -59,31 +58,22 @@ namespace elna return reinterpret_cast(&this->instruction) + sizeof(this->instruction); } - void RiscVVisitor::visit(Node *) + void RiscVVisitor::visit(source::definition *definition) { + constants[definition->identifier()] = definition->body().number(); } - void RiscVVisitor::visit(Definition *definition) + void RiscVVisitor::visit(source::block *block) { - ++constCount; - constNames = reinterpret_cast(realloc(constNames, sizeof(const char *) * constCount)); - constValues = reinterpret_cast(realloc(constValues, sizeof(std::int32_t) * constCount)); - - constNames[constCount - 1] = definition->identifier; - constValues[constCount - 1] = definition->number->value; - } - - void RiscVVisitor::visit(Block *block) - { - for (std::size_t i = 0; i < block->definitionsLength; ++i) + for (const auto& block_definition : block->definitions()) { - block->definitions[i]->accept(this); + block_definition->accept(this); } this->instructionsLength += 4; this->instructions = reinterpret_cast( realloc(this->instructions, this->instructionsLength * sizeof(Instruction))); - block->statement->accept(this); + block->body().accept(this); // Prologue. const uint stackSize = static_cast(variableCounter * 4 + 12); @@ -137,44 +127,12 @@ namespace elna .i(XRegister::zero, Funct3::jalr, XRegister::ra, 0); } - void RiscVVisitor::visit(BangStatement *statement) + void RiscVVisitor::visit(source::bang_statement *statement) { - statement->expression->accept(this); + statement->body().accept(this); } - void RiscVVisitor::visit(Expression *operand) - { - if (dynamic_cast(operand) != nullptr) - { - return dynamic_cast(operand)->accept(this); - } - if (dynamic_cast(operand) != nullptr) - { - return dynamic_cast(operand)->accept(this); - } - } - - void RiscVVisitor::visit(Variable *variable) - { - std::size_t i = 0; - for (; i < constCount; ++i) - { - if (std::strcmp(variable->identifier, constNames[i]) == 0) - { - break; - } - } - const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0; - - ++this->instructionsLength; - this->instructions = reinterpret_cast( - realloc(this->instructions, this->instructionsLength * sizeof(Instruction))); - this->instructions[this->instructionsLength - 1] = - Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number. - .i(freeRegister, Funct3::addi, XRegister::zero, constValues[i]); - } - - void RiscVVisitor::visit(Number *number) + void RiscVVisitor::visit(source::variable *variable) { const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0; @@ -183,15 +141,27 @@ namespace elna realloc(this->instructions, this->instructionsLength * sizeof(Instruction))); this->instructions[this->instructionsLength - 1] = Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number. - .i(freeRegister, Funct3::addi, XRegister::zero, number->value); + .i(freeRegister, Funct3::addi, XRegister::zero, constants[variable->name()]); } - void RiscVVisitor::visit(BinaryExpression *expression) + void RiscVVisitor::visit(source::integer_literal *number) + { + const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0; + + ++this->instructionsLength; + this->instructions = reinterpret_cast( + realloc(this->instructions, this->instructionsLength * sizeof(Instruction))); + this->instructions[this->instructionsLength - 1] = + Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number. + .i(freeRegister, Funct3::addi, XRegister::zero, number->number()); + } + + void RiscVVisitor::visit(source::binary_expression *expression) { const auto lhs_register = this->registerInUse ? XRegister::a0 : XRegister::t0; this->registerInUse = true; - expression->lhs->accept(this); + expression->lhs().accept(this); ++this->instructionsLength; this->instructions = reinterpret_cast( @@ -202,7 +172,7 @@ namespace elna auto lhs_stack_position = ++this->variableCounter; this->registerInUse = false; - expression->rhs->accept(this); + expression->rhs().accept(this); this->instructionsLength += 2; this->instructions = reinterpret_cast( @@ -213,24 +183,24 @@ namespace elna static_cast((lhs_stack_position - 1) * 4)); // Calculate the result and assign it to a variable on the stack. - switch (expression->_operator) + switch (expression->operation()) { - case BinaryOperator::sum: + case source::binary_operator::sum: this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op) .r(lhs_register, Funct3::add, XRegister::a0, XRegister::t0); break; - case BinaryOperator::subtraction: + case source::binary_operator::subtraction: this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op) .r(lhs_register, Funct3::sub, XRegister::a0, XRegister::t0, Funct7::sub); break; - case BinaryOperator::multiplication: + case source::binary_operator::multiplication: this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op) .r(lhs_register, Funct3::mul, XRegister::a0, XRegister::t0, Funct7::muldiv); break; } } - Symbol writeNext(Block *ast) + Symbol writeNext(source::block *ast) { auto visitor = std::make_unique(); visitor->visit(ast);