Get rid of the raw pointers in the parser
This commit is contained in:
195
source/lexer.cpp
195
source/lexer.cpp
@ -1,54 +1,42 @@
|
||||
#include "elna/lexer.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include "elna/source/lexer.hpp"
|
||||
#include <variant>
|
||||
|
||||
namespace elna
|
||||
{
|
||||
namespace lex
|
||||
namespace source
|
||||
{
|
||||
using source_position = elna::source::position;
|
||||
using source_error = elna::source::error;
|
||||
using source_result = elna::source::result<std::vector<Token>>;
|
||||
using source_result = elna::source::result<std::vector<token>>;
|
||||
|
||||
source::source(const std::string& buffer)
|
||||
: m_buffer(buffer)
|
||||
std::pair<text_iterator, text_iterator> text_iterators(const std::string &buffer)
|
||||
{
|
||||
return std::make_pair<>(text_iterator(std::cbegin(buffer)),
|
||||
text_iterator(std::cend(buffer), position{0, 0}));
|
||||
}
|
||||
|
||||
source::const_iterator source::begin() const
|
||||
{
|
||||
return source::const_iterator(std::cbegin(m_buffer));
|
||||
}
|
||||
|
||||
source::const_iterator source::end() const
|
||||
{
|
||||
source_position end_position{ 0, 0 };
|
||||
|
||||
return source::const_iterator(std::cend(m_buffer), end_position);
|
||||
}
|
||||
|
||||
source::const_iterator::const_iterator(std::string::const_iterator buffer,
|
||||
text_iterator::text_iterator(std::string::const_iterator buffer,
|
||||
const source_position start_position)
|
||||
: m_buffer(buffer), m_position(start_position)
|
||||
{
|
||||
}
|
||||
|
||||
const source_position& source::const_iterator::position() const noexcept
|
||||
const source_position& text_iterator::position() const noexcept
|
||||
{
|
||||
return this->m_position;
|
||||
}
|
||||
|
||||
source::const_iterator::reference source::const_iterator::operator*() const noexcept
|
||||
text_iterator::reference text_iterator::operator*() const noexcept
|
||||
{
|
||||
return *m_buffer;
|
||||
}
|
||||
|
||||
source::const_iterator::pointer source::const_iterator::operator->() const noexcept
|
||||
text_iterator::pointer text_iterator::operator->() const noexcept
|
||||
{
|
||||
return m_buffer.base();
|
||||
}
|
||||
|
||||
source::const_iterator& source::const_iterator::operator++()
|
||||
text_iterator& text_iterator::operator++()
|
||||
{
|
||||
if (*this->m_buffer == '\n')
|
||||
{
|
||||
@ -64,129 +52,156 @@ namespace lex
|
||||
return *this;
|
||||
}
|
||||
|
||||
source::const_iterator& source::const_iterator::operator++(int)
|
||||
text_iterator& text_iterator::operator++(int)
|
||||
{
|
||||
auto tmp = *this;
|
||||
++(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool source::const_iterator::operator==(const source::const_iterator& that) const noexcept
|
||||
bool text_iterator::operator==(const text_iterator& that) const noexcept
|
||||
{
|
||||
return this->m_buffer == that.m_buffer;
|
||||
}
|
||||
|
||||
bool source::const_iterator::operator!=(const source::const_iterator& that) const noexcept
|
||||
bool text_iterator::operator!=(const text_iterator& that) const noexcept
|
||||
{
|
||||
return !(*this == that);
|
||||
}
|
||||
|
||||
Token::Token(const type of, const char *value, source_position position)
|
||||
: m_type(of), m_position(position)
|
||||
token::value::value()
|
||||
: nil(nullptr)
|
||||
{
|
||||
std::size_t value_length = strlen(value);
|
||||
char *buffer = reinterpret_cast<char *>(malloc(value_length + 1));
|
||||
|
||||
std::memcpy(buffer, value, value_length);
|
||||
buffer[value_length] = 0;
|
||||
|
||||
m_value.identifier = buffer;
|
||||
}
|
||||
|
||||
Token::Token(const type of, std::int32_t number, source_position position)
|
||||
: m_type(of), m_position(position)
|
||||
token::value::value(std::int32_t value)
|
||||
: number(value)
|
||||
{
|
||||
m_value.number = number;
|
||||
}
|
||||
|
||||
Token::Token(const type of, source_position position)
|
||||
token::value::value(const std::string& value)
|
||||
: identifier(value)
|
||||
{
|
||||
}
|
||||
|
||||
token::value::~value()
|
||||
{
|
||||
}
|
||||
|
||||
token::token(const type of, const std::string& value, const source_position position)
|
||||
: m_type(of), m_value(value), m_position(position)
|
||||
{
|
||||
}
|
||||
|
||||
token::token(const type of, std::int32_t number, const source_position position)
|
||||
: m_type(of), m_value(number), m_position(position)
|
||||
{
|
||||
}
|
||||
|
||||
token::token(const type of, source_position position)
|
||||
: m_type(of), m_position(position)
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const Token& that)
|
||||
token::token(const token& that)
|
||||
: m_type(that.of()), m_position(that.position())
|
||||
{
|
||||
*this = that;
|
||||
}
|
||||
|
||||
Token::Token(Token&& that)
|
||||
token::token(token&& that)
|
||||
: m_type(that.of()), m_position(that.position())
|
||||
{
|
||||
*this = std::move(that);
|
||||
}
|
||||
|
||||
Token::~Token()
|
||||
token::~token()
|
||||
{
|
||||
if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator)
|
||||
{
|
||||
std::free(const_cast<char*>(m_value.identifier));
|
||||
m_value.identifier.~basic_string();
|
||||
}
|
||||
}
|
||||
|
||||
Token& Token::operator=(const Token& that)
|
||||
token& token::operator=(const token& that)
|
||||
{
|
||||
m_type = that.of();
|
||||
m_position = that.position();
|
||||
if (that.of() == type::identifier || that.of() == type::term_operator || m_type == type::factor_operator)
|
||||
{
|
||||
std::size_t value_length = strlen(that.identifier());
|
||||
char *buffer = reinterpret_cast<char *>(malloc(value_length + 1));
|
||||
|
||||
std::memcpy(buffer, that.identifier(), value_length);
|
||||
buffer[value_length] = 0;
|
||||
|
||||
m_value.identifier = buffer;
|
||||
}
|
||||
else if (that.of() == type::number)
|
||||
{
|
||||
m_value.number = that.number();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Token& Token::operator=(Token&& that)
|
||||
{
|
||||
m_type = that.of();
|
||||
m_position = that.position();
|
||||
if (that.of() == type::identifier || that.of() == type::term_operator || that.of() == type::factor_operator)
|
||||
if (that.has_identifier())
|
||||
{
|
||||
m_value.identifier = that.identifier();
|
||||
that.m_value.identifier = nullptr;
|
||||
}
|
||||
else if (that.of() == type::number)
|
||||
{
|
||||
m_value.number = that.number();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_value.nil = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Token::type Token::of() const noexcept
|
||||
token& token::operator=(token&& that)
|
||||
{
|
||||
m_type = that.of();
|
||||
m_position = that.position();
|
||||
if (that.has_identifier())
|
||||
{
|
||||
m_value.identifier = std::move(that.identifier());
|
||||
}
|
||||
else if (that.of() == type::number)
|
||||
{
|
||||
m_value.number = that.number();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_value.nil = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
token::type token::of() const noexcept
|
||||
{
|
||||
return m_type;
|
||||
}
|
||||
|
||||
const char *Token::identifier() const noexcept
|
||||
const std::string& token::identifier() const
|
||||
{
|
||||
if (!has_identifier())
|
||||
{
|
||||
throw std::bad_variant_access();
|
||||
}
|
||||
return m_value.identifier;
|
||||
}
|
||||
|
||||
std::int32_t Token::number() const noexcept
|
||||
std::int32_t token::number() const
|
||||
{
|
||||
if (of() != type::number)
|
||||
{
|
||||
throw std::bad_variant_access();
|
||||
}
|
||||
return m_value.number;
|
||||
}
|
||||
|
||||
const source_position& Token::position() const noexcept
|
||||
const source_position& token::position() const noexcept
|
||||
{
|
||||
return m_position;
|
||||
}
|
||||
|
||||
source_result lex(const char *buffer)
|
||||
bool token::has_identifier() const noexcept
|
||||
{
|
||||
std::vector<Token> tokens;
|
||||
source input{ buffer };
|
||||
return of() == type::identifier
|
||||
|| of() == type::term_operator
|
||||
|| of() == type::factor_operator;
|
||||
}
|
||||
|
||||
for (auto iterator = input.begin(); iterator != input.end();)
|
||||
source_result lex(const std::string& buffer)
|
||||
{
|
||||
std::vector<token> tokens;
|
||||
auto [iterator, text_end] = text_iterators(buffer);
|
||||
|
||||
while (iterator != text_end)
|
||||
{
|
||||
if (*iterator == ' ' || *iterator == '\n')
|
||||
{
|
||||
@ -194,59 +209,59 @@ namespace lex
|
||||
else if (std::isdigit(*iterator))
|
||||
{
|
||||
tokens.emplace_back(
|
||||
Token::type::number,
|
||||
token::type::number,
|
||||
static_cast<std::int32_t>(*iterator - '0'),
|
||||
iterator.position()
|
||||
);
|
||||
}
|
||||
else if (*iterator == '=')
|
||||
{
|
||||
tokens.emplace_back(Token::type::equals, iterator.position());
|
||||
tokens.emplace_back(token::type::equals, iterator.position());
|
||||
}
|
||||
else if (*iterator == '(')
|
||||
{
|
||||
tokens.emplace_back(Token::type::left_paren, iterator.position());
|
||||
tokens.emplace_back(token::type::left_paren, iterator.position());
|
||||
}
|
||||
else if (*iterator == ')')
|
||||
{
|
||||
tokens.emplace_back(Token::type::right_paren, iterator.position());
|
||||
tokens.emplace_back(token::type::right_paren, iterator.position());
|
||||
}
|
||||
else if (*iterator == ';')
|
||||
{
|
||||
tokens.emplace_back(Token::type::semicolon, iterator.position());
|
||||
tokens.emplace_back(token::type::semicolon, iterator.position());
|
||||
}
|
||||
else if (*iterator == ',')
|
||||
{
|
||||
tokens.emplace_back(Token::type::comma, iterator.position());
|
||||
tokens.emplace_back(token::type::comma, iterator.position());
|
||||
}
|
||||
else if (*iterator == '!')
|
||||
{
|
||||
tokens.emplace_back(Token::type::bang, iterator.position());
|
||||
tokens.emplace_back(token::type::bang, iterator.position());
|
||||
}
|
||||
else if (*iterator == '.')
|
||||
{
|
||||
tokens.emplace_back(Token::type::dot, iterator.position());
|
||||
tokens.emplace_back(token::type::dot, iterator.position());
|
||||
}
|
||||
else if (std::isalpha(*iterator))
|
||||
{
|
||||
std::string word;
|
||||
auto i = iterator;
|
||||
while (i != input.end() && std::isalpha(*i))
|
||||
while (i != text_end && std::isalpha(*i))
|
||||
{
|
||||
word.push_back(*i);
|
||||
++i;
|
||||
}
|
||||
if (word == "const")
|
||||
{
|
||||
tokens.emplace_back(Token::type::let, iterator.position());
|
||||
tokens.emplace_back(token::type::let, iterator.position());
|
||||
}
|
||||
else if (word == "var")
|
||||
{
|
||||
tokens.emplace_back(Token::type::var, iterator.position());
|
||||
tokens.emplace_back(token::type::var, iterator.position());
|
||||
}
|
||||
else
|
||||
{
|
||||
tokens.emplace_back(Token::type::identifier, word.c_str(), iterator.position());
|
||||
tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position());
|
||||
}
|
||||
iterator = i;
|
||||
continue;
|
||||
@ -255,13 +270,13 @@ namespace lex
|
||||
{
|
||||
std::string _operator{ *iterator };
|
||||
|
||||
tokens.emplace_back(Token::type::term_operator, _operator.c_str(), iterator.position());
|
||||
tokens.emplace_back(token::type::term_operator, _operator.c_str(), iterator.position());
|
||||
}
|
||||
else if (*iterator == '*' || *iterator == '/')
|
||||
{
|
||||
std::string _operator{ *iterator };
|
||||
|
||||
tokens.emplace_back(Token::type::factor_operator, _operator.c_str(), iterator.position());
|
||||
tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Reference in New Issue
Block a user