elna/source/lexer.cpp

326 lines
7.9 KiB
C++

#include "elna/source/lexer.hpp"
#include <cassert>
#include <variant>
namespace elna::source
{
using source_position = elna::source::position;
using source_error = elna::source::error;
token::value::value()
: nil(nullptr)
{
}
token::value::value(std::int32_t value)
: number(value)
{
}
token::value::value(const std::string& value)
: identifier(value)
{
}
token::value::~value()
{
}
token::token(const type of, const std::string& value, const source_position position)
: m_type(of), m_value(value), m_position(position)
{
}
token::token(const type of, std::int32_t number, const source_position position)
: m_type(of), m_value(number), m_position(position)
{
}
token::token(type of, value&& value, const elna::source::position position)
: m_type(of), m_position(position)
{
if (has_identifier())
{
new((void *) &m_value.identifier) std::string(std::move(value.identifier));
}
else if (is_numeric())
{
m_value.number = value.number;
}
else
{
m_value.nil = nullptr;
}
}
token::token(const type of, source_position position)
: m_type(of), m_position(position)
{
}
token::token(const token& that)
{
*this = that;
}
token::token(token&& that)
{
*this = std::move(that);
}
token::~token()
{
if (has_identifier())
{
m_value.identifier.~basic_string();
}
}
token& token::operator=(const token& that)
{
if (has_identifier())
{
m_value.identifier.~basic_string();
}
m_type = that.of();
m_position = that.position();
if (that.has_identifier())
{
new((void *) &m_value.identifier) std::string(that.identifier());
}
else if (that.is_numeric())
{
m_value.number = that.number();
}
else
{
m_value.nil = nullptr;
}
return *this;
}
token& token::operator=(token&& that)
{
if (has_identifier())
{
m_value.identifier.~basic_string();
}
m_type = that.of();
m_position = that.position();
if (that.has_identifier())
{
new((void *) &m_value.identifier) std::string(std::move(that.identifier()));
}
else if (that.is_numeric())
{
m_value.number = that.number();
}
else
{
m_value.nil = nullptr;
}
return *this;
}
token::type token::of() const noexcept
{
return m_type;
}
const std::string& token::identifier() const
{
if (!has_identifier())
{
throw std::bad_variant_access();
}
return m_value.identifier;
}
std::int32_t token::number() const
{
if (!is_numeric())
{
throw std::bad_variant_access();
}
return m_value.number;
}
const source_position& token::position() const noexcept
{
return m_position;
}
bool token::has_identifier() const noexcept
{
return of() == type::identifier
|| of() == type::term_operator
|| of() == type::factor_operator
|| of() == type::comparison_operator;
}
bool token::is_numeric() const noexcept
{
return of() == type::number
|| of() == type::boolean;
}
std::string token::to_string() const
{
switch (this->m_type)
{
case type::number:
return "«number»";
case type::boolean:
return "«boolean»";
case type::term_operator:
return "«term_operator»";
case type::let:
return "«const»";
case type::identifier:
return "«identifier»";
case type::equals:
return "«=»";
case type::var:
return "«var»";
case type::semicolon:
return "«;»";
case type::left_paren:
return "«(»";
case type::right_paren:
return "«)»";
case type::dot:
return "«)»";
case type::comma:
return "«,»";
case type::factor_operator:
return "«*»";
case type::eof:
return "«EOF»";
case type::begin:
return "«begin»";
case type::end:
return "«end»";
case type::assignment:
return "«:=»";
case type::colon:
return "«:»";
case type::when:
return "«if»";
case type::then:
return "«then»";
case type::loop:
return "«while»";
case type::_do:
return "«do»";
case type::procedure:
return "«proc»";
case type::comparison_operator:
return "«comparison_operator»";
};
assert(false);
}
unexpected_character::unexpected_character(const std::string& character, const std::filesystem::path& path,
const source::position position)
: error(path, position), character(character)
{
}
std::string unexpected_character::what() const
{
std::string ss{ "Unexpected character '" };
ss.insert(ss.cend(), character.cbegin(), character.cend());
ss.push_back('\'');
return ss;
}
unexpected_token::unexpected_token(const token& token, const std::filesystem::path& path)
: error(path, token.position()), m_token(token)
{
}
std::string unexpected_token::what() const
{
return "Unexpected token " + m_token.to_string();
}
lexer::lexer(std::vector<token>&& tokens, const position last_position, const std::filesystem::path& path)
: tokens(std::move(tokens)), iterator(this->tokens.cbegin()), eof(token(token::type::eof, last_position)),
source_file(path)
{
}
lexer& lexer::operator++()
{
++iterator;
return *this;
}
const token& lexer::operator*() const
{
return *iterator;
}
const token *lexer::operator->() const
{
return iterator.base();
}
const token& lexer::current() const noexcept
{
if (iterator == tokens.cend())
{
return this->eof;
}
return *iterator;
}
bool lexer::current(const token::type token_type) const noexcept
{
return current().of() == token_type;
}
void lexer::add_error(const token& expected)
{
m_errors.push_back(std::make_unique<unexpected_token>(expected, this->source_file));
}
std::optional<std::reference_wrapper<const token>> lexer::advance(const token::type token_type)
{
if (iterator != tokens.cend() && iterator->of() == token_type)
{
return std::make_optional<>(std::cref(*iterator++));
}
add_error(current());
return std::optional<std::reference_wrapper<const token>>();
}
const token& lexer::look_ahead() const
{
auto tmp = iterator;
++tmp;
if (iterator == tokens.cend() || tmp == tokens.cend())
{
return eof;
}
return *tmp;
}
bool lexer::look_ahead(const token::type token_type) const
{
return look_ahead().of() == token_type;
}
bool lexer::skip(const token::type token_type)
{
return advance(token_type).has_value();
}
const std::list<std::unique_ptr<error>>& lexer::errors() const noexcept
{
return m_errors;
}
}