Get rid of the raw pointers in the parser

This commit is contained in:
Eugen Wissner 2024-03-06 07:51:56 +01:00
parent 632dc53b53
commit f84fd91426
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
12 changed files with 601 additions and 562 deletions

View File

@ -19,10 +19,10 @@ add_executable(elnsh shell/main.cpp
target_include_directories(elnsh PRIVATE include) target_include_directories(elnsh PRIVATE include)
add_executable(elna source/main.cpp add_executable(elna source/main.cpp
source/lexer.cpp include/elna/lexer.hpp source/lexer.cpp include/elna/source/lexer.hpp
source/parser.cpp include/elna/source/parser.hpp
source/result.cpp include/elna/result.hpp source/result.cpp include/elna/result.hpp
source/riscv.cpp include/elna/riscv.hpp source/riscv.cpp include/elna/riscv.hpp
source/parser.cpp include/elna/parser.hpp
source/ir.cpp include/elna/ir.hpp source/ir.cpp include/elna/ir.hpp
source/cl.cpp include/elna/cl.hpp source/cl.cpp include/elna/cl.hpp
) )

View File

@ -1,18 +1,16 @@
#pragma once #pragma once
#include "elna/parser.hpp" #include "elna/source/parser.hpp"
namespace elna namespace elna
{ {
class TransformVisitor final : public ParserVisitor class TransformVisitor final : public source::ParserVisitor
{ {
void visit(Node *node) override; void visit(source::definition *definition) override;
void visit(Definition *definition) override; void visit(source::bang_statement *statement) override;
void visit(BangStatement *statement) override; void visit(source::block *block) override;
void visit(Block *block) override; void visit(source::integer_literal *number) override;
void visit(Expression *expression) override; void visit(source::variable *variable) override;
void visit(Number *number) override; void visit(source::binary_expression *binaryExpression) override;
void visit(Variable *variable) override;
void visit(BinaryExpression *binaryExpression) override;
}; };
} }

View File

@ -1,115 +0,0 @@
#pragma once
#include <cstdint>
#include <string>
#include "elna/result.hpp"
namespace elna
{
namespace lex
{
/**
* Range over the source text that keeps track of the current position.
*/
struct source
{
class const_iterator
{
std::string::const_iterator m_buffer;
elna::source::position m_position;
const_iterator(std::string::const_iterator buffer,
const elna::source::position start_position = elna::source::position());
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = ptrdiff_t;
using value_type = char;
using pointer = const value_type *;
using reference = const value_type&;
const elna::source::position& position() const noexcept;
reference operator*() const noexcept;
pointer operator->() const noexcept;
const_iterator& operator++();
const_iterator& operator++(int);
bool operator==(const const_iterator& that) const noexcept;
bool operator!=(const const_iterator& that) const noexcept;
friend source;
};
source(const std::string& buffer);
const_iterator begin() const;
const_iterator end() const;
private:
const std::string m_buffer;
};
/**
* Union type representing a single token.
*/
struct Token
{
/**
* Token type.
*/
enum class type : std::uint16_t
{
number = 0,
term_operator = 1,
let = 2,
identifier = 3,
equals = 4,
var = 5,
semicolon = 6,
left_paren = 7,
right_paren = 8,
bang = 9,
dot = 10,
comma = 11,
factor_operator = 12,
};
/**
* Type of the token value.
*/
union Value
{
std::int32_t number;
const char *identifier;
};
Token(type of, elna::source::position position);
Token(type of, std::int32_t value, elna::source::position position);
Token(type of, const char *value, elna::source::position position);
Token(const Token& that);
Token(Token&& that);
~Token();
Token& operator=(const Token& that);
Token& operator=(Token&& that);
type of() const noexcept;
const char *identifier() const noexcept;
std::int32_t number() const noexcept;
const elna::source::position& position() const noexcept;
private:
type m_type;
Value m_value;
elna::source::position m_position;
};
/**
* Split the source into tokens.
*
* \param buffer Source text.
* \return Tokens or error.
*/
elna::source::result<std::vector<Token>> lex(const char *buffer);
}
}

View File

@ -1,125 +0,0 @@
#pragma once
#include <cstddef>
#include <cstdint>
#include <elna/lexer.hpp>
namespace elna
{
enum class BinaryOperator
{
sum,
subtraction,
multiplication,
division
};
class Node;
class Definition;
class BangStatement;
class Block;
class Expression;
class BinaryExpression;
class Variable;
class Number;
struct ParserVisitor
{
virtual void visit(Node *) = 0;
virtual void visit(Definition *) = 0;
virtual void visit(BangStatement *) = 0;
virtual void visit(Block *) = 0;
virtual void visit(Expression *) = 0;
virtual void visit(BinaryExpression *) = 0;
virtual void visit(Variable *) = 0;
virtual void visit(Number *) = 0;
};
/**
* AST node.
*/
class Node
{
public:
virtual void accept(ParserVisitor *) = 0;
};
class Statement : public Node
{
};
/**
* Constant definition.
*/
class Definition : public Node
{
public:
Number *number;
const char *identifier;
virtual void accept(ParserVisitor *visitor) override;
};
class BangStatement : public Statement
{
public:
Expression *expression;
virtual void accept(ParserVisitor *visitor) override;
};
/**
* Block.
*/
class Block : public Node
{
public:
Definition** definitions{ nullptr };
size_t definitionsLength{ 0 };
Statement *statement{ nullptr };
virtual void accept(ParserVisitor *visitor) override;
};
class Expression : public Node
{
public:
virtual void accept(ParserVisitor *visitor) override;
};
class Number : public Expression
{
public:
std::int32_t value;
virtual void accept(ParserVisitor *visitor) override;
};
class Variable : public Expression
{
public:
const char *identifier;
virtual void accept(ParserVisitor *visitor) override;
};
class BinaryExpression : public Expression
{
public:
Expression *lhs, *rhs;
BinaryOperator _operator;
BinaryExpression(Expression *lhs, Expression *rhs, unsigned char);
virtual void accept(ParserVisitor *visitor) override;
};
Expression *parseFactor(lex::Token **tokens, std::size_t *length);
Expression *parseTerm(lex::Token **tokens, std::size_t *length);
Expression *parseExpression(lex::Token **tokens, std::size_t *length);
Definition *parseDefinition(lex::Token **tokens, std::size_t *length);
Statement *parseStatement(lex::Token **tokens, std::size_t *length);
Definition **parseDefinitions(lex::Token **tokens, std::size_t *length, std::size_t *resultLength);
Block *parseBlock(lex::Token **tokens, std::size_t *length);
Block *parse(lex::Token *tokenStream, std::size_t length);
}

View File

@ -1,7 +1,8 @@
#pragma once #pragma once
#include <cstdint> #include <cstdint>
#include "elna/parser.hpp" #include <unordered_map>
#include "elna/source/parser.hpp"
#include "elna/result.hpp" #include "elna/result.hpp"
namespace elna namespace elna
@ -141,7 +142,7 @@ namespace elna
std::uint32_t instruction{ 0 }; std::uint32_t instruction{ 0 };
}; };
class RiscVVisitor : public ParserVisitor class RiscVVisitor : public source::ParserVisitor
{ {
public: public:
Instruction *instructions; Instruction *instructions;
@ -149,19 +150,15 @@ namespace elna
bool registerInUse{ true }; bool registerInUse{ true };
std::uint32_t variableCounter = 1; std::uint32_t variableCounter = 1;
Reference references[3]; Reference references[3];
std::int32_t *constValues{ nullptr }; std::unordered_map<std::string, std::int32_t> constants;
const char **constNames{ nullptr };
std::size_t constCount{ 0 };
virtual void visit(Node *) override; virtual void visit(source::definition *definition) override;
virtual void visit(Definition *definition) override; virtual void visit(source::bang_statement *statement) override;
virtual void visit(BangStatement *statement) override; virtual void visit(source::block *block) override;
virtual void visit(Block *block) override; virtual void visit(source::variable *variable) override;
virtual void visit(Expression *operand) override; virtual void visit(source::integer_literal *number) override;
virtual void visit(Variable *variable) override; virtual void visit(source::binary_expression *expression) override;
virtual void visit(Number *number) override;
virtual void visit(BinaryExpression *expression) override;
}; };
Symbol writeNext(Block *ast); Symbol writeNext(source::block *ast);
} }

View File

@ -0,0 +1,116 @@
#pragma once
#include <cstdint>
#include <string>
#include "elna/result.hpp"
namespace elna
{
namespace source
{
/**
* Range over the source text that keeps track of the current position.
*/
class text_iterator
{
std::string::const_iterator m_buffer;
elna::source::position m_position;
text_iterator(std::string::const_iterator buffer,
const elna::source::position start_position = elna::source::position());
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = ptrdiff_t;
using value_type = char;
using pointer = const value_type *;
using reference = const value_type&;
const elna::source::position& position() const noexcept;
reference operator*() const noexcept;
pointer operator->() const noexcept;
text_iterator& operator++();
text_iterator& operator++(int);
bool operator==(const text_iterator& that) const noexcept;
bool operator!=(const text_iterator& that) const noexcept;
friend std::pair<text_iterator, text_iterator> text_iterators(const std::string& buffer);
};
std::pair<text_iterator, text_iterator>
text_iterators(const std::string &buffer);
/**
* Union type representing a single token.
*/
struct token
{
/**
* Token type.
*/
enum class type : std::uint16_t
{
number = 0,
term_operator = 1,
let = 2,
identifier = 3,
equals = 4,
var = 5,
semicolon = 6,
left_paren = 7,
right_paren = 8,
bang = 9,
dot = 10,
comma = 11,
factor_operator = 12,
};
/**
* Type of the token value.
*/
union value
{
value();
value(std::int32_t value);
value(const std::string& value);
~value();
std::nullptr_t nil;
std::int32_t number;
std::string identifier;
};
token(type of, elna::source::position position);
token(type of, std::int32_t value, const elna::source::position position);
token(type of, const std::string& value, const elna::source::position position);
token(const token& that);
token(token&& that);
~token();
token& operator=(const token& that);
token& operator=(token&& that);
type of() const noexcept;
const std::string& identifier() const;
std::int32_t number() const;
const elna::source::position& position() const noexcept;
private:
type m_type;
value m_value;
elna::source::position m_position;
bool has_identifier() const noexcept;
};
/**
* Split the source into tokens.
*
* \param buffer Source text.
* \return Tokens or error.
*/
elna::source::result<std::vector<token>> lex(const std::string& buffer);
}
}

View File

@ -0,0 +1,153 @@
#pragma once
#include <cstdint>
#include <memory>
#include <elna/source/lexer.hpp>
namespace elna
{
namespace source
{
enum class binary_operator
{
sum,
subtraction,
multiplication,
division
};
class definition;
class bang_statement;
class block;
class binary_expression;
class variable;
class integer_literal;
struct ParserVisitor
{
virtual void visit(definition *) = 0;
virtual void visit(bang_statement *) = 0;
virtual void visit(block *) = 0;
virtual void visit(binary_expression *) = 0;
virtual void visit(variable *) = 0;
virtual void visit(integer_literal *) = 0;
};
/**
* AST node.
*/
class node
{
public:
virtual void accept(ParserVisitor *) = 0;
};
class statement : public node
{
};
class expression : public node
{
};
/**
* Constant definition.
*/
class definition : public node
{
std::string m_identifier;
std::unique_ptr<integer_literal> m_body;
public:
definition(std::string&& identifier, std::unique_ptr<integer_literal>&& body);
virtual void accept(ParserVisitor *visitor) override;
std::string& identifier() noexcept;
integer_literal& body();
};
class bang_statement : public statement
{
std::unique_ptr<expression> m_body;
public:
bang_statement(std::unique_ptr<expression>&& body);
virtual void accept(ParserVisitor *visitor) override;
expression& body();
};
/**
* Block.
*/
class block : public node
{
std::unique_ptr<statement> m_body;
std::vector<std::unique_ptr<definition>> m_definitions;
public:
block(std::vector<std::unique_ptr<definition>>&& definitions, std::unique_ptr<statement>&& body);
virtual void accept(ParserVisitor *visitor) override;
statement& body();
std::vector<std::unique_ptr<definition>>& definitions() noexcept;
};
class integer_literal : public expression
{
std::int32_t m_number;
public:
integer_literal(const std::int32_t value);
virtual void accept(ParserVisitor *visitor) override;
std::int32_t number() const noexcept;
};
class variable : public expression
{
std::string m_name;
public:
variable(const std::string& name);
virtual void accept(ParserVisitor *visitor) override;
const std::string& name() const noexcept;
};
class binary_expression : public expression
{
std::unique_ptr<expression> m_lhs;
std::unique_ptr<expression> m_rhs;
binary_operator m_operator;
public:
binary_expression(std::unique_ptr<expression>&& lhs,
std::unique_ptr<expression>&& rhs, const unsigned char operation);
virtual void accept(ParserVisitor *visitor) override;
expression& lhs();
expression& rhs();
binary_operator operation() const noexcept;
};
struct parser
{
parser(const std::vector<token>& tokens);
std::unique_ptr<block> parse();
private:
std::unique_ptr<expression> parse_factor();
std::unique_ptr<expression> parse_term();
std::unique_ptr<expression> parse_expression();
std::unique_ptr<definition> parse_definition();
std::unique_ptr<statement> parse_bang_statement();
std::vector<std::unique_ptr<definition>> parse_definitions();
std::unique_ptr<block> parse_block();
std::vector<token>::const_iterator tokens;
std::vector<token>::const_iterator end;
};
}
}

View File

@ -41,7 +41,7 @@ namespace elna
return 3; return 3;
} }
size_t tokensCount{ 0 }; size_t tokensCount{ 0 };
auto lex_result = lex::lex(sourceText); auto lex_result = source::lex(sourceText);
free(sourceText); free(sourceText);
if (lex_result.has_errors()) if (lex_result.has_errors())
{ {
@ -51,12 +51,12 @@ namespace elna
} }
return 1; return 1;
} }
auto ast = parse(lex_result.success().data(), tokensCount); auto ast = source::parser(lex_result.success()).parse();
if (ast == nullptr) if (ast == nullptr)
{ {
return 2; return 2;
} }
auto program = writeNext(ast); auto program = writeNext(ast.get());
// Create code section // Create code section
ELFIO::section* text_sec = writer.sections.add(".text"); ELFIO::section* text_sec = writer.sections.add(".text");

View File

@ -4,42 +4,32 @@
namespace elna namespace elna
{ {
void TransformVisitor::visit(Node *node) void TransformVisitor::visit(source::definition *definition)
{ {
assert(false); assert(false);
} }
void TransformVisitor::visit(Definition *definition) void TransformVisitor::visit(source::bang_statement *statement)
{ {
assert(false); assert(false);
} }
void TransformVisitor::visit(BangStatement *statement) void TransformVisitor::visit(source::block *block)
{ {
assert(false); assert(false);
} }
void TransformVisitor::visit(Block *block) void TransformVisitor::visit(source::integer_literal *number)
{ {
assert(false); assert(false);
} }
void TransformVisitor::visit(Expression *expression) void TransformVisitor::visit(source::variable *variable)
{ {
assert(false); assert(false);
} }
void TransformVisitor::visit(Number *number) void TransformVisitor::visit(source::binary_expression *binaryExpression)
{
assert(false);
}
void TransformVisitor::visit(Variable *variable)
{
assert(false);
}
void TransformVisitor::visit(BinaryExpression *binaryExpression)
{ {
assert(false); assert(false);
} }

View File

@ -1,54 +1,42 @@
#include "elna/lexer.hpp" #include "elna/source/lexer.hpp"
#include <variant>
#include <cstring>
namespace elna namespace elna
{ {
namespace lex namespace source
{ {
using source_position = elna::source::position; using source_position = elna::source::position;
using source_error = elna::source::error; using source_error = elna::source::error;
using source_result = elna::source::result<std::vector<Token>>; using source_result = elna::source::result<std::vector<token>>;
source::source(const std::string& buffer) std::pair<text_iterator, text_iterator> text_iterators(const std::string &buffer)
: m_buffer(buffer)
{ {
return std::make_pair<>(text_iterator(std::cbegin(buffer)),
text_iterator(std::cend(buffer), position{0, 0}));
} }
source::const_iterator source::begin() const text_iterator::text_iterator(std::string::const_iterator buffer,
{
return source::const_iterator(std::cbegin(m_buffer));
}
source::const_iterator source::end() const
{
source_position end_position{ 0, 0 };
return source::const_iterator(std::cend(m_buffer), end_position);
}
source::const_iterator::const_iterator(std::string::const_iterator buffer,
const source_position start_position) const source_position start_position)
: m_buffer(buffer), m_position(start_position) : m_buffer(buffer), m_position(start_position)
{ {
} }
const source_position& source::const_iterator::position() const noexcept const source_position& text_iterator::position() const noexcept
{ {
return this->m_position; return this->m_position;
} }
source::const_iterator::reference source::const_iterator::operator*() const noexcept text_iterator::reference text_iterator::operator*() const noexcept
{ {
return *m_buffer; return *m_buffer;
} }
source::const_iterator::pointer source::const_iterator::operator->() const noexcept text_iterator::pointer text_iterator::operator->() const noexcept
{ {
return m_buffer.base(); return m_buffer.base();
} }
source::const_iterator& source::const_iterator::operator++() text_iterator& text_iterator::operator++()
{ {
if (*this->m_buffer == '\n') if (*this->m_buffer == '\n')
{ {
@ -64,129 +52,156 @@ namespace lex
return *this; return *this;
} }
source::const_iterator& source::const_iterator::operator++(int) text_iterator& text_iterator::operator++(int)
{ {
auto tmp = *this; auto tmp = *this;
++(*this); ++(*this);
return *this; return *this;
} }
bool source::const_iterator::operator==(const source::const_iterator& that) const noexcept bool text_iterator::operator==(const text_iterator& that) const noexcept
{ {
return this->m_buffer == that.m_buffer; return this->m_buffer == that.m_buffer;
} }
bool source::const_iterator::operator!=(const source::const_iterator& that) const noexcept bool text_iterator::operator!=(const text_iterator& that) const noexcept
{ {
return !(*this == that); return !(*this == that);
} }
Token::Token(const type of, const char *value, source_position position) token::value::value()
: m_type(of), m_position(position) : nil(nullptr)
{ {
std::size_t value_length = strlen(value);
char *buffer = reinterpret_cast<char *>(malloc(value_length + 1));
std::memcpy(buffer, value, value_length);
buffer[value_length] = 0;
m_value.identifier = buffer;
} }
Token::Token(const type of, std::int32_t number, source_position position) token::value::value(std::int32_t value)
: m_type(of), m_position(position) : number(value)
{ {
m_value.number = number;
} }
Token::Token(const type of, source_position position) token::value::value(const std::string& value)
: identifier(value)
{
}
token::value::~value()
{
}
token::token(const type of, const std::string& value, const source_position position)
: m_type(of), m_value(value), m_position(position)
{
}
token::token(const type of, std::int32_t number, const source_position position)
: m_type(of), m_value(number), m_position(position)
{
}
token::token(const type of, source_position position)
: m_type(of), m_position(position) : m_type(of), m_position(position)
{ {
} }
Token::Token(const Token& that) token::token(const token& that)
: m_type(that.of()), m_position(that.position()) : m_type(that.of()), m_position(that.position())
{ {
*this = that; *this = that;
} }
Token::Token(Token&& that) token::token(token&& that)
: m_type(that.of()), m_position(that.position()) : m_type(that.of()), m_position(that.position())
{ {
*this = std::move(that); *this = std::move(that);
} }
Token::~Token() token::~token()
{ {
if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator) if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator)
{ {
std::free(const_cast<char*>(m_value.identifier)); m_value.identifier.~basic_string();
} }
} }
Token& Token::operator=(const Token& that) token& token::operator=(const token& that)
{ {
m_type = that.of(); m_type = that.of();
m_position = that.position(); m_position = that.position();
if (that.of() == type::identifier || that.of() == type::term_operator || m_type == type::factor_operator) if (that.has_identifier())
{
std::size_t value_length = strlen(that.identifier());
char *buffer = reinterpret_cast<char *>(malloc(value_length + 1));
std::memcpy(buffer, that.identifier(), value_length);
buffer[value_length] = 0;
m_value.identifier = buffer;
}
else if (that.of() == type::number)
{
m_value.number = that.number();
}
return *this;
}
Token& Token::operator=(Token&& that)
{
m_type = that.of();
m_position = that.position();
if (that.of() == type::identifier || that.of() == type::term_operator || that.of() == type::factor_operator)
{ {
m_value.identifier = that.identifier(); m_value.identifier = that.identifier();
that.m_value.identifier = nullptr;
} }
else if (that.of() == type::number) else if (that.of() == type::number)
{ {
m_value.number = that.number(); m_value.number = that.number();
} }
else
{
m_value.nil = nullptr;
}
return *this; return *this;
} }
Token::type Token::of() const noexcept token& token::operator=(token&& that)
{
m_type = that.of();
m_position = that.position();
if (that.has_identifier())
{
m_value.identifier = std::move(that.identifier());
}
else if (that.of() == type::number)
{
m_value.number = that.number();
}
else
{
m_value.nil = nullptr;
}
return *this;
}
token::type token::of() const noexcept
{ {
return m_type; return m_type;
} }
const char *Token::identifier() const noexcept const std::string& token::identifier() const
{ {
if (!has_identifier())
{
throw std::bad_variant_access();
}
return m_value.identifier; return m_value.identifier;
} }
std::int32_t Token::number() const noexcept std::int32_t token::number() const
{ {
if (of() != type::number)
{
throw std::bad_variant_access();
}
return m_value.number; return m_value.number;
} }
const source_position& Token::position() const noexcept const source_position& token::position() const noexcept
{ {
return m_position; return m_position;
} }
source_result lex(const char *buffer) bool token::has_identifier() const noexcept
{ {
std::vector<Token> tokens; return of() == type::identifier
source input{ buffer }; || of() == type::term_operator
|| of() == type::factor_operator;
}
for (auto iterator = input.begin(); iterator != input.end();) source_result lex(const std::string& buffer)
{
std::vector<token> tokens;
auto [iterator, text_end] = text_iterators(buffer);
while (iterator != text_end)
{ {
if (*iterator == ' ' || *iterator == '\n') if (*iterator == ' ' || *iterator == '\n')
{ {
@ -194,59 +209,59 @@ namespace lex
else if (std::isdigit(*iterator)) else if (std::isdigit(*iterator))
{ {
tokens.emplace_back( tokens.emplace_back(
Token::type::number, token::type::number,
static_cast<std::int32_t>(*iterator - '0'), static_cast<std::int32_t>(*iterator - '0'),
iterator.position() iterator.position()
); );
} }
else if (*iterator == '=') else if (*iterator == '=')
{ {
tokens.emplace_back(Token::type::equals, iterator.position()); tokens.emplace_back(token::type::equals, iterator.position());
} }
else if (*iterator == '(') else if (*iterator == '(')
{ {
tokens.emplace_back(Token::type::left_paren, iterator.position()); tokens.emplace_back(token::type::left_paren, iterator.position());
} }
else if (*iterator == ')') else if (*iterator == ')')
{ {
tokens.emplace_back(Token::type::right_paren, iterator.position()); tokens.emplace_back(token::type::right_paren, iterator.position());
} }
else if (*iterator == ';') else if (*iterator == ';')
{ {
tokens.emplace_back(Token::type::semicolon, iterator.position()); tokens.emplace_back(token::type::semicolon, iterator.position());
} }
else if (*iterator == ',') else if (*iterator == ',')
{ {
tokens.emplace_back(Token::type::comma, iterator.position()); tokens.emplace_back(token::type::comma, iterator.position());
} }
else if (*iterator == '!') else if (*iterator == '!')
{ {
tokens.emplace_back(Token::type::bang, iterator.position()); tokens.emplace_back(token::type::bang, iterator.position());
} }
else if (*iterator == '.') else if (*iterator == '.')
{ {
tokens.emplace_back(Token::type::dot, iterator.position()); tokens.emplace_back(token::type::dot, iterator.position());
} }
else if (std::isalpha(*iterator)) else if (std::isalpha(*iterator))
{ {
std::string word; std::string word;
auto i = iterator; auto i = iterator;
while (i != input.end() && std::isalpha(*i)) while (i != text_end && std::isalpha(*i))
{ {
word.push_back(*i); word.push_back(*i);
++i; ++i;
} }
if (word == "const") if (word == "const")
{ {
tokens.emplace_back(Token::type::let, iterator.position()); tokens.emplace_back(token::type::let, iterator.position());
} }
else if (word == "var") else if (word == "var")
{ {
tokens.emplace_back(Token::type::var, iterator.position()); tokens.emplace_back(token::type::var, iterator.position());
} }
else else
{ {
tokens.emplace_back(Token::type::identifier, word.c_str(), iterator.position()); tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position());
} }
iterator = i; iterator = i;
continue; continue;
@ -255,13 +270,13 @@ namespace lex
{ {
std::string _operator{ *iterator }; std::string _operator{ *iterator };
tokens.emplace_back(Token::type::term_operator, _operator.c_str(), iterator.position()); tokens.emplace_back(token::type::term_operator, _operator.c_str(), iterator.position());
} }
else if (*iterator == '*' || *iterator == '/') else if (*iterator == '*' || *iterator == '/')
{ {
std::string _operator{ *iterator }; std::string _operator{ *iterator };
tokens.emplace_back(Token::type::factor_operator, _operator.c_str(), iterator.position()); tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position());
} }
else else
{ {

View File

@ -1,254 +1,294 @@
#include "elna/parser.hpp" #include "elna/source/parser.hpp"
#include <stdexcept> #include <stdexcept>
namespace elna namespace elna
{
namespace source
{ {
/** /**
* AST node. * AST node.
*/ */
void Node::accept(ParserVisitor *) void node::accept(ParserVisitor *)
{ {
} }
void Definition::accept(ParserVisitor *visitor) definition::definition(std::string&& identifier, std::unique_ptr<integer_literal>&& body)
: m_identifier(std::move(identifier)), m_body(std::move(body))
{
}
void definition::accept(ParserVisitor *visitor)
{ {
visitor->visit(this); visitor->visit(this);
} }
void Block::accept(ParserVisitor *visitor) std::string& definition::identifier() noexcept
{
return m_identifier;
}
integer_literal& definition::body()
{
return *m_body;
}
block::block(std::vector<std::unique_ptr<definition>>&& definitions, std::unique_ptr<statement>&& body)
: m_definitions(std::move(definitions)), m_body(std::move(body))
{
}
void block::accept(ParserVisitor *visitor)
{ {
visitor->visit(this); visitor->visit(this);
} }
void Expression::accept(ParserVisitor *visitor) statement& block::body()
{
return *m_body;
}
std::vector<std::unique_ptr<definition>>& block::definitions() noexcept
{
return m_definitions;
}
integer_literal::integer_literal(const std::int32_t value)
: m_number(value)
{
}
void integer_literal::accept(ParserVisitor *visitor)
{ {
visitor->visit(this); visitor->visit(this);
} }
void Number::accept(ParserVisitor *visitor) std::int32_t integer_literal::number() const noexcept
{
return m_number;
}
variable::variable(const std::string& name)
: m_name(name)
{
}
void variable::accept(ParserVisitor *visitor)
{ {
visitor->visit(this); visitor->visit(this);
} }
void Variable::accept(ParserVisitor *visitor) const std::string& variable::name() const noexcept
{ {
visitor->visit(this); return m_name;
} }
BinaryExpression::BinaryExpression(Expression *lhs, Expression *rhs, unsigned char _operator) binary_expression::binary_expression(std::unique_ptr<expression>&& lhs,
std::unique_ptr<expression>&& rhs, const unsigned char operation)
: m_lhs(std::move(lhs)), m_rhs(std::move(rhs))
{ {
this->lhs = lhs; switch (operation)
this->rhs = rhs;
switch (_operator)
{ {
case '+': case '+':
this->_operator = BinaryOperator::sum; this->m_operator = binary_operator::sum;
break; break;
case '-': case '-':
this->_operator = BinaryOperator::subtraction; this->m_operator = binary_operator::subtraction;
break; break;
case '*': case '*':
this->_operator = BinaryOperator::multiplication; this->m_operator = binary_operator::multiplication;
break; break;
case '/': case '/':
this->_operator = BinaryOperator::division; this->m_operator = binary_operator::division;
break; break;
default: default:
throw std::logic_error("Invalid binary operator"); throw std::logic_error("Invalid binary operator");
} }
} }
void BinaryExpression::accept(ParserVisitor *visitor) void binary_expression::accept(ParserVisitor *visitor)
{ {
visitor->visit(this); visitor->visit(this);
} }
void BangStatement::accept(ParserVisitor *visitor) expression& binary_expression::lhs()
{
return *m_lhs;
}
expression& binary_expression::rhs()
{
return *m_rhs;
}
binary_operator binary_expression::operation() const noexcept
{
return m_operator;
}
bang_statement::bang_statement(std::unique_ptr<expression>&& body)
: m_body(std::move(body))
{
}
void bang_statement::accept(ParserVisitor *visitor)
{ {
visitor->visit(this); visitor->visit(this);
} }
Block *parse(lex::Token *tokenStream, std::size_t length) expression& bang_statement::body()
{ {
return parseBlock(&tokenStream, &length); return *m_body;
} }
Expression *parseFactor(lex::Token **tokens, size_t *length) parser::parser(const std::vector<token>& tokens)
: tokens(tokens.cbegin()), end(tokens.cend())
{ {
if ((*tokens)[0].of() == lex::Token::type::identifier)
{
auto variable = new Variable();
variable->identifier = (*tokens)[0].identifier();
++(*tokens);
--(*length);
return variable;
} }
else if ((*tokens)[0].of() == lex::Token::Token::type::number)
std::unique_ptr<block> parser::parse()
{ {
auto number = new Number(); return parse_block();
number->value = (*tokens)[0].number();
++(*tokens);
--(*length);
return number;
} }
else if ((*tokens)[0].of() == lex::Token::type::left_paren)
std::unique_ptr<expression> parser::parse_factor()
{ {
++(*tokens); if (tokens->of() == source::token::type::identifier)
--(*length); {
auto result = std::make_unique<variable>(tokens->identifier());
++tokens;
return result;
}
else if (tokens->of() == source::token::token::type::number)
{
auto result = std::make_unique<integer_literal>(tokens->number());
++tokens;
return result;
}
else if (tokens->of() == source::token::type::left_paren)
{
++tokens;
auto expression = parseExpression(tokens, length); auto expression = parse_expression();
++(*tokens); ++tokens;
--(*length);
return expression; return expression;
} }
return nullptr; return nullptr;
} }
Expression *parseTerm(lex::Token **tokens, size_t *length) std::unique_ptr<expression> parser::parse_term()
{ {
auto lhs = parseFactor(tokens, length); auto lhs = parse_factor();
if (lhs == nullptr || *length == 0 || (*tokens)[0].of() != lex::Token::type::factor_operator) if (lhs == nullptr || tokens == end || tokens->of() != source::token::type::factor_operator)
{ {
return lhs; return lhs;
} }
auto _operator = (*tokens)[0].identifier()[0]; auto _operator = tokens->identifier()[0];
++(*tokens); ++tokens;
--(*length);
auto rhs = parseFactor(tokens, length); auto rhs = parse_factor();
if (rhs != nullptr) if (rhs != nullptr)
{ {
return new BinaryExpression(lhs, rhs, _operator); return std::make_unique<binary_expression>(std::move(lhs),
std::move(rhs), _operator);
} }
return nullptr; return nullptr;
} }
Expression *parseExpression(lex::Token **tokens, size_t *length) std::unique_ptr<expression> parser::parse_expression()
{ {
auto term = parseTerm(tokens, length); auto term = parse_term();
if (term == nullptr || *length == 0 || (*tokens)[0].of() != lex::Token::type::term_operator) if (term == nullptr || tokens == end || tokens->of() != source::token::type::term_operator)
{ {
return term; return term;
} }
auto _operator = (*tokens)[0].identifier()[0]; auto _operator = tokens->identifier()[0];
++(*tokens); ++tokens;
--(*length);
auto expression = parseExpression(tokens, length); auto rhs = parse_expression();
if (expression != nullptr) if (rhs != nullptr)
{ {
return new BinaryExpression(term, expression, _operator); return std::make_unique<binary_expression>(std::move(term),
std::move(rhs), _operator);
} }
return nullptr; return nullptr;
} }
Definition *parseDefinition(lex::Token **tokens, size_t *length) std::unique_ptr<definition> parser::parse_definition()
{ {
auto definition = new Definition(); std::string definition_identifier = tokens->identifier(); // Copy.
definition->identifier = (*tokens)[0].identifier(); // Copy.
++(*tokens); ++tokens;
++(*tokens); // Skip the equals sign. ++tokens; // Skip the equals sign.
*length -= 2;
if ((*tokens)[0].of() == lex::Token::type::number) if (tokens->of() == source::token::type::number)
{ {
auto number = new Number(); auto result = std::make_unique<definition>(std::move(definition_identifier),
number->value = (*tokens)[0].number(); std::make_unique<integer_literal>(tokens->number()));
definition->number = number; ++tokens;
++(*tokens); return result;
--(*length);
return definition;
} }
return nullptr; return nullptr;
} }
Statement *parseStatement(lex::Token **tokens, std::size_t *length) std::unique_ptr<statement> parser::parse_bang_statement()
{ {
if ((*tokens)[0].of() == lex::Token::type::bang) if (tokens->of() == source::token::type::bang)
{ {
++(*tokens); ++tokens;
--(*length); auto bang_body = parse_expression();
auto statement = new BangStatement(); if (bang_body != nullptr)
auto expression = parseExpression(tokens, length);
if (expression != nullptr)
{ {
statement->expression = expression; return std::make_unique<bang_statement>(std::move(bang_body));
} }
else
{
return nullptr;
}
return statement;
} }
return nullptr; return nullptr;
} }
Definition **parseDefinitions(lex::Token **tokens, size_t *length, size_t *resultLength) std::vector<std::unique_ptr<definition>> parser::parse_definitions()
{ {
++(*tokens); // Skip const. ++tokens; // Skip const.
--(*length);
Definition **definitions; std::vector<std::unique_ptr<definition>> definitions;
*resultLength = 0;
while (*length != 0) while (tokens != end)
{ {
auto definition = parseDefinition(tokens, length); auto parsed_definition = parse_definition();
if (definition == nullptr) if (parsed_definition == nullptr)
{ {
return nullptr; return definitions;
} }
definitions = reinterpret_cast<Definition **>( definitions.push_back(std::move(parsed_definition));
realloc(definitions, (*resultLength + 1) * sizeof(Definition*)));
definitions[(*resultLength)++] = definition;
if ((*tokens)[0].of() == lex::Token::type::semicolon) if (tokens->of() == source::token::type::semicolon)
{ {
break; break;
} }
if ((*tokens)[0].of() == lex::Token::type::comma) if (tokens->of() == source::token::type::comma)
{ {
++(*tokens); ++tokens;
--(*length);
} }
} }
return definitions; return definitions;
} }
Block *parseBlock(lex::Token **tokens, std::size_t *length) std::unique_ptr<block> parser::parse_block()
{ {
auto block = new Block(); std::vector<std::unique_ptr<definition>> definitions;
if ((*tokens)[0].of() == lex::Token::type::let) if (tokens->of() == source::token::type::let)
{ {
size_t length_ = 0; definitions = parse_definitions();
auto constDefinitions = parseDefinitions(tokens, length, &length_); ++tokens;
if (constDefinitions != nullptr)
{
block->definitionsLength = length_;
block->definitions = constDefinitions;
} }
else auto parsed_statement = parse_bang_statement();
if (parsed_statement == nullptr)
{ {
return nullptr; return nullptr;
} }
++(*tokens); return std::make_unique<block>(std::move(definitions), std::move(parsed_statement));
--(*length); }
}
auto statement = parseStatement(tokens, length);
if (statement != nullptr)
{
block->statement = statement;
}
else
{
return nullptr;
}
return block;
} }
} }

View File

@ -1,4 +1,3 @@
#include "elna/parser.hpp"
#include "elna/riscv.hpp" #include "elna/riscv.hpp"
#include <memory> #include <memory>
#include <cstring> #include <cstring>
@ -59,31 +58,22 @@ namespace elna
return reinterpret_cast<std::byte *>(&this->instruction) + sizeof(this->instruction); return reinterpret_cast<std::byte *>(&this->instruction) + sizeof(this->instruction);
} }
void RiscVVisitor::visit(Node *) void RiscVVisitor::visit(source::definition *definition)
{ {
constants[definition->identifier()] = definition->body().number();
} }
void RiscVVisitor::visit(Definition *definition) void RiscVVisitor::visit(source::block *block)
{ {
++constCount; for (const auto& block_definition : block->definitions())
constNames = reinterpret_cast<const char **>(realloc(constNames, sizeof(const char *) * constCount));
constValues = reinterpret_cast<std::int32_t *>(realloc(constValues, sizeof(std::int32_t) * constCount));
constNames[constCount - 1] = definition->identifier;
constValues[constCount - 1] = definition->number->value;
}
void RiscVVisitor::visit(Block *block)
{ {
for (std::size_t i = 0; i < block->definitionsLength; ++i) block_definition->accept(this);
{
block->definitions[i]->accept(this);
} }
this->instructionsLength += 4; this->instructionsLength += 4;
this->instructions = reinterpret_cast<Instruction *>( this->instructions = reinterpret_cast<Instruction *>(
realloc(this->instructions, this->instructionsLength * sizeof(Instruction))); realloc(this->instructions, this->instructionsLength * sizeof(Instruction)));
block->statement->accept(this); block->body().accept(this);
// Prologue. // Prologue.
const uint stackSize = static_cast<std::uint32_t>(variableCounter * 4 + 12); const uint stackSize = static_cast<std::uint32_t>(variableCounter * 4 + 12);
@ -137,44 +127,12 @@ namespace elna
.i(XRegister::zero, Funct3::jalr, XRegister::ra, 0); .i(XRegister::zero, Funct3::jalr, XRegister::ra, 0);
} }
void RiscVVisitor::visit(BangStatement *statement) void RiscVVisitor::visit(source::bang_statement *statement)
{ {
statement->expression->accept(this); statement->body().accept(this);
} }
void RiscVVisitor::visit(Expression *operand) void RiscVVisitor::visit(source::variable *variable)
{
if (dynamic_cast<Variable *>(operand) != nullptr)
{
return dynamic_cast<Variable *>(operand)->accept(this);
}
if (dynamic_cast<Number *>(operand) != nullptr)
{
return dynamic_cast<Number *>(operand)->accept(this);
}
}
void RiscVVisitor::visit(Variable *variable)
{
std::size_t i = 0;
for (; i < constCount; ++i)
{
if (std::strcmp(variable->identifier, constNames[i]) == 0)
{
break;
}
}
const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0;
++this->instructionsLength;
this->instructions = reinterpret_cast<Instruction *>(
realloc(this->instructions, this->instructionsLength * sizeof(Instruction)));
this->instructions[this->instructionsLength - 1] =
Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number.
.i(freeRegister, Funct3::addi, XRegister::zero, constValues[i]);
}
void RiscVVisitor::visit(Number *number)
{ {
const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0; const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0;
@ -183,15 +141,27 @@ namespace elna
realloc(this->instructions, this->instructionsLength * sizeof(Instruction))); realloc(this->instructions, this->instructionsLength * sizeof(Instruction)));
this->instructions[this->instructionsLength - 1] = this->instructions[this->instructionsLength - 1] =
Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number. Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number.
.i(freeRegister, Funct3::addi, XRegister::zero, number->value); .i(freeRegister, Funct3::addi, XRegister::zero, constants[variable->name()]);
} }
void RiscVVisitor::visit(BinaryExpression *expression) void RiscVVisitor::visit(source::integer_literal *number)
{
const auto freeRegister = this->registerInUse ? XRegister::a0 : XRegister::t0;
++this->instructionsLength;
this->instructions = reinterpret_cast<Instruction *>(
realloc(this->instructions, this->instructionsLength * sizeof(Instruction)));
this->instructions[this->instructionsLength - 1] =
Instruction(BaseOpcode::opImm) // movl $x, %eax; where $x is a number.
.i(freeRegister, Funct3::addi, XRegister::zero, number->number());
}
void RiscVVisitor::visit(source::binary_expression *expression)
{ {
const auto lhs_register = this->registerInUse ? XRegister::a0 : XRegister::t0; const auto lhs_register = this->registerInUse ? XRegister::a0 : XRegister::t0;
this->registerInUse = true; this->registerInUse = true;
expression->lhs->accept(this); expression->lhs().accept(this);
++this->instructionsLength; ++this->instructionsLength;
this->instructions = reinterpret_cast<Instruction *>( this->instructions = reinterpret_cast<Instruction *>(
@ -202,7 +172,7 @@ namespace elna
auto lhs_stack_position = ++this->variableCounter; auto lhs_stack_position = ++this->variableCounter;
this->registerInUse = false; this->registerInUse = false;
expression->rhs->accept(this); expression->rhs().accept(this);
this->instructionsLength += 2; this->instructionsLength += 2;
this->instructions = reinterpret_cast<Instruction *>( this->instructions = reinterpret_cast<Instruction *>(
@ -213,24 +183,24 @@ namespace elna
static_cast<std::int8_t>((lhs_stack_position - 1) * 4)); static_cast<std::int8_t>((lhs_stack_position - 1) * 4));
// Calculate the result and assign it to a variable on the stack. // Calculate the result and assign it to a variable on the stack.
switch (expression->_operator) switch (expression->operation())
{ {
case BinaryOperator::sum: case source::binary_operator::sum:
this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op) this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op)
.r(lhs_register, Funct3::add, XRegister::a0, XRegister::t0); .r(lhs_register, Funct3::add, XRegister::a0, XRegister::t0);
break; break;
case BinaryOperator::subtraction: case source::binary_operator::subtraction:
this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op) this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op)
.r(lhs_register, Funct3::sub, XRegister::a0, XRegister::t0, Funct7::sub); .r(lhs_register, Funct3::sub, XRegister::a0, XRegister::t0, Funct7::sub);
break; break;
case BinaryOperator::multiplication: case source::binary_operator::multiplication:
this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op) this->instructions[instructionsLength - 1] = Instruction(BaseOpcode::op)
.r(lhs_register, Funct3::mul, XRegister::a0, XRegister::t0, Funct7::muldiv); .r(lhs_register, Funct3::mul, XRegister::a0, XRegister::t0, Funct7::muldiv);
break; break;
} }
} }
Symbol writeNext(Block *ast) Symbol writeNext(source::block *ast)
{ {
auto visitor = std::make_unique<RiscVVisitor>(); auto visitor = std::make_unique<RiscVVisitor>();
visitor->visit(ast); visitor->visit(ast);