2024-03-06 07:51:56 +01:00
|
|
|
#include "elna/source/lexer.hpp"
|
2024-03-23 14:53:26 +01:00
|
|
|
#include <cassert>
|
2024-03-06 07:51:56 +01:00
|
|
|
#include <variant>
|
2024-03-03 13:11:39 +01:00
|
|
|
|
2024-03-07 09:15:11 +01:00
|
|
|
namespace elna::source
|
2024-03-03 13:11:39 +01:00
|
|
|
{
|
|
|
|
using source_position = elna::source::position;
|
|
|
|
using source_error = elna::source::error;
|
2024-02-22 21:29:25 +01:00
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::value::value()
|
|
|
|
: nil(nullptr)
|
2024-02-22 21:29:25 +01:00
|
|
|
{
|
2024-03-06 07:51:56 +01:00
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::value::value(std::int32_t value)
|
|
|
|
: number(value)
|
|
|
|
{
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::value::value(const std::string& value)
|
|
|
|
: identifier(value)
|
|
|
|
{
|
2024-02-28 16:18:39 +01:00
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::value::~value()
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::token(const type of, const std::string& value, const source_position position)
|
|
|
|
: m_type(of), m_value(value), m_position(position)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
token::token(const type of, std::int32_t number, const source_position position)
|
|
|
|
: m_type(of), m_value(number), m_position(position)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2024-04-06 16:10:07 +02:00
|
|
|
token::token(type of, value&& value, const elna::source::position position)
|
|
|
|
: m_type(of), m_position(position)
|
|
|
|
{
|
|
|
|
if (has_identifier())
|
|
|
|
{
|
|
|
|
new((void *) &m_value.identifier) std::string(std::move(value.identifier));
|
|
|
|
}
|
|
|
|
else if (is_numeric())
|
|
|
|
{
|
|
|
|
m_value.number = value.number;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
m_value.nil = nullptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::token(const type of, source_position position)
|
2024-02-28 16:18:39 +01:00
|
|
|
: m_type(of), m_position(position)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::token(const token& that)
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
|
|
|
*this = that;
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::token(token&& that)
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
|
|
|
*this = std::move(that);
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::~token()
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-19 09:35:50 +01:00
|
|
|
if (has_identifier())
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-06 07:51:56 +01:00
|
|
|
m_value.identifier.~basic_string();
|
2024-02-28 16:18:39 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token& token::operator=(const token& that)
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-19 09:35:50 +01:00
|
|
|
if (has_identifier())
|
|
|
|
{
|
|
|
|
m_value.identifier.~basic_string();
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
m_type = that.of();
|
|
|
|
m_position = that.position();
|
2024-03-06 07:51:56 +01:00
|
|
|
if (that.has_identifier())
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-19 09:35:50 +01:00
|
|
|
new((void *) &m_value.identifier) std::string(that.identifier());
|
2024-02-28 16:18:39 +01:00
|
|
|
}
|
2024-03-17 01:00:44 +01:00
|
|
|
else if (that.is_numeric())
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
|
|
|
m_value.number = that.number();
|
|
|
|
}
|
2024-03-06 07:51:56 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
m_value.nil = nullptr;
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token& token::operator=(token&& that)
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-19 09:35:50 +01:00
|
|
|
if (has_identifier())
|
|
|
|
{
|
|
|
|
m_value.identifier.~basic_string();
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
m_type = that.of();
|
|
|
|
m_position = that.position();
|
2024-03-06 07:51:56 +01:00
|
|
|
if (that.has_identifier())
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-19 09:35:50 +01:00
|
|
|
new((void *) &m_value.identifier) std::string(std::move(that.identifier()));
|
2024-02-28 16:18:39 +01:00
|
|
|
}
|
2024-03-17 01:00:44 +01:00
|
|
|
else if (that.is_numeric())
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
|
|
|
m_value.number = that.number();
|
|
|
|
}
|
2024-03-06 07:51:56 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
m_value.nil = nullptr;
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
return *this;
|
2024-02-22 21:29:25 +01:00
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
token::type token::of() const noexcept
|
2024-02-22 21:29:25 +01:00
|
|
|
{
|
|
|
|
return m_type;
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
const std::string& token::identifier() const
|
2024-02-28 16:18:39 +01:00
|
|
|
{
|
2024-03-06 07:51:56 +01:00
|
|
|
if (!has_identifier())
|
|
|
|
{
|
|
|
|
throw std::bad_variant_access();
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
return m_value.identifier;
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
std::int32_t token::number() const
|
2024-02-22 21:29:25 +01:00
|
|
|
{
|
2024-03-17 01:00:44 +01:00
|
|
|
if (!is_numeric())
|
2024-03-06 07:51:56 +01:00
|
|
|
{
|
|
|
|
throw std::bad_variant_access();
|
|
|
|
}
|
2024-02-28 16:18:39 +01:00
|
|
|
return m_value.number;
|
2024-02-22 21:29:25 +01:00
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
const source_position& token::position() const noexcept
|
2024-02-22 21:29:25 +01:00
|
|
|
{
|
|
|
|
return m_position;
|
|
|
|
}
|
|
|
|
|
2024-03-06 07:51:56 +01:00
|
|
|
bool token::has_identifier() const noexcept
|
|
|
|
{
|
|
|
|
return of() == type::identifier
|
|
|
|
|| of() == type::term_operator
|
2024-04-02 09:07:13 +02:00
|
|
|
|| of() == type::factor_operator
|
|
|
|
|| of() == type::comparison_operator;
|
2024-03-06 07:51:56 +01:00
|
|
|
}
|
|
|
|
|
2024-03-17 01:00:44 +01:00
|
|
|
bool token::is_numeric() const noexcept
|
|
|
|
{
|
|
|
|
return of() == type::number
|
|
|
|
|| of() == type::boolean;
|
|
|
|
}
|
|
|
|
|
2024-03-23 14:53:26 +01:00
|
|
|
std::string token::to_string() const
|
|
|
|
{
|
|
|
|
switch (this->m_type)
|
|
|
|
{
|
|
|
|
case type::number:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«number»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::boolean:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«boolean»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::term_operator:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«term_operator»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::let:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«const»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::identifier:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«identifier»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::equals:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«=»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::var:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«var»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::semicolon:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«;»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::left_paren:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«(»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::right_paren:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«)»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::dot:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«)»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::comma:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«,»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::factor_operator:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«*»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::eof:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«EOF»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::begin:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«begin»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::end:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«end»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::assignment:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«:=»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::colon:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«:»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::when:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«if»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::then:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«then»";
|
|
|
|
case type::loop:
|
|
|
|
return "«while»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::_do:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«do»";
|
2024-03-23 14:53:26 +01:00
|
|
|
case type::procedure:
|
2024-04-02 09:07:13 +02:00
|
|
|
return "«proc»";
|
|
|
|
case type::comparison_operator:
|
|
|
|
return "«comparison_operator»";
|
2024-05-28 23:39:04 +02:00
|
|
|
case type::hat:
|
|
|
|
return "«^»";
|
|
|
|
case type::at:
|
|
|
|
return "«@»";
|
2024-03-23 14:53:26 +01:00
|
|
|
};
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
2024-03-30 00:21:58 +01:00
|
|
|
unexpected_character::unexpected_character(const std::string& character, const std::filesystem::path& path,
|
|
|
|
const source::position position)
|
|
|
|
: error(path, position), character(character)
|
2024-03-09 08:36:07 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string unexpected_character::what() const
|
|
|
|
{
|
2024-04-06 16:10:07 +02:00
|
|
|
std::string ss{ "Unexpected character '" };
|
2024-03-09 08:36:07 +01:00
|
|
|
|
2024-04-06 16:10:07 +02:00
|
|
|
ss.insert(ss.cend(), character.cbegin(), character.cend());
|
|
|
|
ss.push_back('\'');
|
2024-03-09 08:36:07 +01:00
|
|
|
|
2024-04-06 16:10:07 +02:00
|
|
|
return ss;
|
2024-03-09 08:36:07 +01:00
|
|
|
}
|
|
|
|
|
2024-03-30 00:21:58 +01:00
|
|
|
unexpected_token::unexpected_token(const token& token, const std::filesystem::path& path)
|
|
|
|
: error(path, token.position()), m_token(token)
|
2024-03-10 08:50:55 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string unexpected_token::what() const
|
|
|
|
{
|
2024-03-23 14:53:26 +01:00
|
|
|
return "Unexpected token " + m_token.to_string();
|
2024-03-10 08:50:55 +01:00
|
|
|
}
|
|
|
|
|
2024-03-30 00:21:58 +01:00
|
|
|
lexer::lexer(std::vector<token>&& tokens, const position last_position, const std::filesystem::path& path)
|
|
|
|
: tokens(std::move(tokens)), iterator(this->tokens.cbegin()), eof(token(token::type::eof, last_position)),
|
|
|
|
source_file(path)
|
2024-03-14 08:52:45 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
lexer& lexer::operator++()
|
|
|
|
{
|
|
|
|
++iterator;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
const token& lexer::operator*() const
|
|
|
|
{
|
|
|
|
return *iterator;
|
|
|
|
}
|
|
|
|
|
|
|
|
const token *lexer::operator->() const
|
|
|
|
{
|
|
|
|
return iterator.base();
|
|
|
|
}
|
|
|
|
|
|
|
|
const token& lexer::current() const noexcept
|
|
|
|
{
|
|
|
|
if (iterator == tokens.cend())
|
|
|
|
{
|
|
|
|
return this->eof;
|
|
|
|
}
|
|
|
|
return *iterator;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool lexer::current(const token::type token_type) const noexcept
|
|
|
|
{
|
|
|
|
return current().of() == token_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
void lexer::add_error(const token& expected)
|
|
|
|
{
|
2024-03-30 00:21:58 +01:00
|
|
|
m_errors.push_back(std::make_unique<unexpected_token>(expected, this->source_file));
|
2024-03-14 08:52:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
std::optional<std::reference_wrapper<const token>> lexer::advance(const token::type token_type)
|
|
|
|
{
|
|
|
|
if (iterator != tokens.cend() && iterator->of() == token_type)
|
|
|
|
{
|
|
|
|
return std::make_optional<>(std::cref(*iterator++));
|
|
|
|
}
|
|
|
|
add_error(current());
|
|
|
|
return std::optional<std::reference_wrapper<const token>>();
|
|
|
|
}
|
|
|
|
|
|
|
|
const token& lexer::look_ahead() const
|
|
|
|
{
|
|
|
|
auto tmp = iterator;
|
|
|
|
++tmp;
|
|
|
|
if (iterator == tokens.cend() || tmp == tokens.cend())
|
|
|
|
{
|
|
|
|
return eof;
|
|
|
|
}
|
|
|
|
return *tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool lexer::look_ahead(const token::type token_type) const
|
|
|
|
{
|
|
|
|
return look_ahead().of() == token_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool lexer::skip(const token::type token_type)
|
|
|
|
{
|
|
|
|
return advance(token_type).has_value();
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::list<std::unique_ptr<error>>& lexer::errors() const noexcept
|
|
|
|
{
|
|
|
|
return m_errors;
|
|
|
|
}
|
2024-02-22 21:29:25 +01:00
|
|
|
}
|