elna/source/lexer.cpp

330 lines
8.0 KiB
C++
Raw Normal View History

#include "elna/source/lexer.hpp"
2024-03-23 14:53:26 +01:00
#include <cassert>
#include <variant>
2024-03-03 13:11:39 +01:00
2024-03-07 09:15:11 +01:00
namespace elna::source
2024-03-03 13:11:39 +01:00
{
using source_position = elna::source::position;
using source_error = elna::source::error;
2024-02-22 21:29:25 +01:00
token::value::value()
: nil(nullptr)
2024-02-22 21:29:25 +01:00
{
}
2024-02-28 16:18:39 +01:00
token::value::value(std::int32_t value)
: number(value)
{
}
2024-02-28 16:18:39 +01:00
token::value::value(const std::string& value)
: identifier(value)
{
2024-02-28 16:18:39 +01:00
}
token::value::~value()
2024-02-28 16:18:39 +01:00
{
}
token::token(const type of, const std::string& value, const source_position position)
: m_type(of), m_value(value), m_position(position)
{
}
token::token(const type of, std::int32_t number, const source_position position)
: m_type(of), m_value(number), m_position(position)
{
}
2024-04-06 16:10:07 +02:00
token::token(type of, value&& value, const elna::source::position position)
: m_type(of), m_position(position)
{
if (has_identifier())
{
new((void *) &m_value.identifier) std::string(std::move(value.identifier));
}
else if (is_numeric())
{
m_value.number = value.number;
}
else
{
m_value.nil = nullptr;
}
}
token::token(const type of, source_position position)
2024-02-28 16:18:39 +01:00
: m_type(of), m_position(position)
{
}
token::token(const token& that)
2024-02-28 16:18:39 +01:00
{
*this = that;
}
token::token(token&& that)
2024-02-28 16:18:39 +01:00
{
*this = std::move(that);
}
token::~token()
2024-02-28 16:18:39 +01:00
{
2024-03-19 09:35:50 +01:00
if (has_identifier())
2024-02-28 16:18:39 +01:00
{
m_value.identifier.~basic_string();
2024-02-28 16:18:39 +01:00
}
}
token& token::operator=(const token& that)
2024-02-28 16:18:39 +01:00
{
2024-03-19 09:35:50 +01:00
if (has_identifier())
{
m_value.identifier.~basic_string();
}
2024-02-28 16:18:39 +01:00
m_type = that.of();
m_position = that.position();
if (that.has_identifier())
2024-02-28 16:18:39 +01:00
{
2024-03-19 09:35:50 +01:00
new((void *) &m_value.identifier) std::string(that.identifier());
2024-02-28 16:18:39 +01:00
}
2024-03-17 01:00:44 +01:00
else if (that.is_numeric())
2024-02-28 16:18:39 +01:00
{
m_value.number = that.number();
}
else
{
m_value.nil = nullptr;
}
2024-02-28 16:18:39 +01:00
return *this;
}
token& token::operator=(token&& that)
2024-02-28 16:18:39 +01:00
{
2024-03-19 09:35:50 +01:00
if (has_identifier())
{
m_value.identifier.~basic_string();
}
2024-02-28 16:18:39 +01:00
m_type = that.of();
m_position = that.position();
if (that.has_identifier())
2024-02-28 16:18:39 +01:00
{
2024-03-19 09:35:50 +01:00
new((void *) &m_value.identifier) std::string(std::move(that.identifier()));
2024-02-28 16:18:39 +01:00
}
2024-03-17 01:00:44 +01:00
else if (that.is_numeric())
2024-02-28 16:18:39 +01:00
{
m_value.number = that.number();
}
else
{
m_value.nil = nullptr;
}
2024-02-28 16:18:39 +01:00
return *this;
2024-02-22 21:29:25 +01:00
}
token::type token::of() const noexcept
2024-02-22 21:29:25 +01:00
{
return m_type;
}
const std::string& token::identifier() const
2024-02-28 16:18:39 +01:00
{
if (!has_identifier())
{
throw std::bad_variant_access();
}
2024-02-28 16:18:39 +01:00
return m_value.identifier;
}
std::int32_t token::number() const
2024-02-22 21:29:25 +01:00
{
2024-03-17 01:00:44 +01:00
if (!is_numeric())
{
throw std::bad_variant_access();
}
2024-02-28 16:18:39 +01:00
return m_value.number;
2024-02-22 21:29:25 +01:00
}
const source_position& token::position() const noexcept
2024-02-22 21:29:25 +01:00
{
return m_position;
}
bool token::has_identifier() const noexcept
{
return of() == type::identifier
|| of() == type::term_operator
2024-04-02 09:07:13 +02:00
|| of() == type::factor_operator
|| of() == type::comparison_operator;
}
2024-03-17 01:00:44 +01:00
bool token::is_numeric() const noexcept
{
return of() == type::number
|| of() == type::boolean;
}
2024-03-23 14:53:26 +01:00
std::string token::to_string() const
{
switch (this->m_type)
{
case type::number:
2024-04-02 09:07:13 +02:00
return "«number»";
2024-03-23 14:53:26 +01:00
case type::boolean:
2024-04-02 09:07:13 +02:00
return "«boolean»";
2024-03-23 14:53:26 +01:00
case type::term_operator:
2024-04-02 09:07:13 +02:00
return "«term_operator»";
2024-03-23 14:53:26 +01:00
case type::let:
2024-04-02 09:07:13 +02:00
return "«const»";
2024-03-23 14:53:26 +01:00
case type::identifier:
2024-04-02 09:07:13 +02:00
return "«identifier»";
2024-03-23 14:53:26 +01:00
case type::equals:
2024-04-02 09:07:13 +02:00
return "«=»";
2024-03-23 14:53:26 +01:00
case type::var:
2024-04-02 09:07:13 +02:00
return "«var»";
2024-03-23 14:53:26 +01:00
case type::semicolon:
2024-04-02 09:07:13 +02:00
return "«;»";
2024-03-23 14:53:26 +01:00
case type::left_paren:
2024-04-02 09:07:13 +02:00
return "«(»";
2024-03-23 14:53:26 +01:00
case type::right_paren:
2024-04-02 09:07:13 +02:00
return "«)»";
2024-03-23 14:53:26 +01:00
case type::dot:
2024-04-02 09:07:13 +02:00
return "«)»";
2024-03-23 14:53:26 +01:00
case type::comma:
2024-04-02 09:07:13 +02:00
return "«,»";
2024-03-23 14:53:26 +01:00
case type::factor_operator:
2024-04-02 09:07:13 +02:00
return "«*»";
2024-03-23 14:53:26 +01:00
case type::eof:
2024-04-02 09:07:13 +02:00
return "«EOF»";
2024-03-23 14:53:26 +01:00
case type::begin:
2024-04-02 09:07:13 +02:00
return "«begin»";
2024-03-23 14:53:26 +01:00
case type::end:
2024-04-02 09:07:13 +02:00
return "«end»";
2024-03-23 14:53:26 +01:00
case type::assignment:
2024-04-02 09:07:13 +02:00
return "«:=»";
2024-03-23 14:53:26 +01:00
case type::colon:
2024-04-02 09:07:13 +02:00
return "«:»";
2024-03-23 14:53:26 +01:00
case type::when:
2024-04-02 09:07:13 +02:00
return "«if»";
2024-03-23 14:53:26 +01:00
case type::then:
2024-04-02 09:07:13 +02:00
return "«then»";
case type::loop:
return "«while»";
2024-03-23 14:53:26 +01:00
case type::_do:
2024-04-02 09:07:13 +02:00
return "«do»";
2024-03-23 14:53:26 +01:00
case type::procedure:
2024-04-02 09:07:13 +02:00
return "«proc»";
case type::comparison_operator:
return "«comparison_operator»";
2024-05-28 23:39:04 +02:00
case type::hat:
return "«^»";
case type::at:
return "«@»";
2024-03-23 14:53:26 +01:00
};
assert(false);
}
unexpected_character::unexpected_character(const std::string& character, const std::filesystem::path& path,
const source::position position)
: error(path, position), character(character)
2024-03-09 08:36:07 +01:00
{
}
std::string unexpected_character::what() const
{
2024-04-06 16:10:07 +02:00
std::string ss{ "Unexpected character '" };
2024-03-09 08:36:07 +01:00
2024-04-06 16:10:07 +02:00
ss.insert(ss.cend(), character.cbegin(), character.cend());
ss.push_back('\'');
2024-03-09 08:36:07 +01:00
2024-04-06 16:10:07 +02:00
return ss;
2024-03-09 08:36:07 +01:00
}
unexpected_token::unexpected_token(const token& token, const std::filesystem::path& path)
: error(path, token.position()), m_token(token)
2024-03-10 08:50:55 +01:00
{
}
std::string unexpected_token::what() const
{
2024-03-23 14:53:26 +01:00
return "Unexpected token " + m_token.to_string();
2024-03-10 08:50:55 +01:00
}
lexer::lexer(std::vector<token>&& tokens, const position last_position, const std::filesystem::path& path)
: tokens(std::move(tokens)), iterator(this->tokens.cbegin()), eof(token(token::type::eof, last_position)),
source_file(path)
2024-03-14 08:52:45 +01:00
{
}
lexer& lexer::operator++()
{
++iterator;
return *this;
}
const token& lexer::operator*() const
{
return *iterator;
}
const token *lexer::operator->() const
{
return iterator.base();
}
const token& lexer::current() const noexcept
{
if (iterator == tokens.cend())
{
return this->eof;
}
return *iterator;
}
bool lexer::current(const token::type token_type) const noexcept
{
return current().of() == token_type;
}
void lexer::add_error(const token& expected)
{
m_errors.push_back(std::make_unique<unexpected_token>(expected, this->source_file));
2024-03-14 08:52:45 +01:00
}
std::optional<std::reference_wrapper<const token>> lexer::advance(const token::type token_type)
{
if (iterator != tokens.cend() && iterator->of() == token_type)
{
return std::make_optional<>(std::cref(*iterator++));
}
add_error(current());
return std::optional<std::reference_wrapper<const token>>();
}
const token& lexer::look_ahead() const
{
auto tmp = iterator;
++tmp;
if (iterator == tokens.cend() || tmp == tokens.cend())
{
return eof;
}
return *tmp;
}
bool lexer::look_ahead(const token::type token_type) const
{
return look_ahead().of() == token_type;
}
bool lexer::skip(const token::type token_type)
{
return advance(token_type).has_value();
}
const std::list<std::unique_ptr<error>>& lexer::errors() const noexcept
{
return m_errors;
}
2024-02-22 21:29:25 +01:00
}