#include "elna/source/lexer.hpp" #include #include #include namespace elna::source { using source_position = elna::source::position; using source_error = elna::source::error; std::pair text_iterators(const std::string &buffer) { return std::make_pair<>(text_iterator(std::cbegin(buffer)), text_iterator(std::cend(buffer), position{0, 0})); } text_iterator::text_iterator(std::string::const_iterator buffer, const source_position start_position) : m_buffer(buffer), m_position(start_position) { } const source_position& text_iterator::position() const noexcept { return this->m_position; } text_iterator::reference text_iterator::operator*() const noexcept { return *m_buffer; } text_iterator::pointer text_iterator::operator->() const noexcept { return m_buffer.base(); } text_iterator& text_iterator::operator++() { if (*this->m_buffer == '\n') { this->m_position.column = 1; ++this->m_position.line; } else { ++this->m_position.column; } std::advance(this->m_buffer, 1); return *this; } text_iterator& text_iterator::operator++(int) { auto tmp = *this; ++(*this); return *this; } text_iterator text_iterator::operator+(std::size_t step) { auto result = *this; return ++result; } bool text_iterator::operator==(const text_iterator& that) const noexcept { return this->m_buffer == that.m_buffer; } bool text_iterator::operator!=(const text_iterator& that) const noexcept { return !(*this == that); } token::value::value() : nil(nullptr) { } token::value::value(std::int32_t value) : number(value) { } token::value::value(const std::string& value) : identifier(value) { } token::value::~value() { } token::token(const type of, const std::string& value, const source_position position) : m_type(of), m_value(value), m_position(position) { } token::token(const type of, std::int32_t number, const source_position position) : m_type(of), m_value(number), m_position(position) { } token::token(const type of, source_position position) : m_type(of), m_position(position) { } token::token(const token& that) { *this = that; } token::token(token&& that) { *this = std::move(that); } token::~token() { if (has_identifier()) { m_value.identifier.~basic_string(); } } token& token::operator=(const token& that) { if (has_identifier()) { m_value.identifier.~basic_string(); } m_type = that.of(); m_position = that.position(); if (that.has_identifier()) { new((void *) &m_value.identifier) std::string(that.identifier()); } else if (that.is_numeric()) { m_value.number = that.number(); } else { m_value.nil = nullptr; } return *this; } token& token::operator=(token&& that) { if (has_identifier()) { m_value.identifier.~basic_string(); } m_type = that.of(); m_position = that.position(); if (that.has_identifier()) { new((void *) &m_value.identifier) std::string(std::move(that.identifier())); } else if (that.is_numeric()) { m_value.number = that.number(); } else { m_value.nil = nullptr; } return *this; } token::type token::of() const noexcept { return m_type; } const std::string& token::identifier() const { if (!has_identifier()) { throw std::bad_variant_access(); } return m_value.identifier; } std::int32_t token::number() const { if (!is_numeric()) { throw std::bad_variant_access(); } return m_value.number; } const source_position& token::position() const noexcept { return m_position; } bool token::has_identifier() const noexcept { return of() == type::identifier || of() == type::term_operator || of() == type::factor_operator || of() == type::comparison_operator; } bool token::is_numeric() const noexcept { return of() == type::number || of() == type::boolean; } std::string token::to_string() const { switch (this->m_type) { case type::number: return "«number»"; case type::boolean: return "«boolean»"; case type::term_operator: return "«term_operator»"; case type::let: return "«const»"; case type::identifier: return "«identifier»"; case type::equals: return "«=»"; case type::var: return "«var»"; case type::semicolon: return "«;»"; case type::left_paren: return "«(»"; case type::right_paren: return "«)»"; case type::dot: return "«)»"; case type::comma: return "«,»"; case type::factor_operator: return "«*»"; case type::eof: return "«EOF»"; case type::begin: return "«begin»"; case type::end: return "«end»"; case type::assignment: return "«:=»"; case type::colon: return "«:»"; case type::when: return "«if»"; case type::then: return "«then»"; case type::loop: return "«while»"; case type::_do: return "«do»"; case type::procedure: return "«proc»"; case type::comparison_operator: return "«comparison_operator»"; }; assert(false); } unexpected_character::unexpected_character(const std::string& character, const std::filesystem::path& path, const source::position position) : error(path, position), character(character) { } std::string unexpected_character::what() const { std::stringstream ss{ "Unexpected character '" }; ss << character << "'"; return ss.str(); } unexpected_token::unexpected_token(const token& token, const std::filesystem::path& path) : error(path, token.position()), m_token(token) { } std::string unexpected_token::what() const { return "Unexpected token " + m_token.to_string(); } lexer::lexer(std::vector&& tokens, const position last_position, const std::filesystem::path& path) : tokens(std::move(tokens)), iterator(this->tokens.cbegin()), eof(token(token::type::eof, last_position)), source_file(path) { } lexer& lexer::operator++() { ++iterator; return *this; } const token& lexer::operator*() const { return *iterator; } const token *lexer::operator->() const { return iterator.base(); } const token& lexer::current() const noexcept { if (iterator == tokens.cend()) { return this->eof; } return *iterator; } bool lexer::current(const token::type token_type) const noexcept { return current().of() == token_type; } void lexer::add_error(const token& expected) { m_errors.push_back(std::make_unique(expected, this->source_file)); } std::optional> lexer::advance(const token::type token_type) { if (iterator != tokens.cend() && iterator->of() == token_type) { return std::make_optional<>(std::cref(*iterator++)); } add_error(current()); return std::optional>(); } const token& lexer::look_ahead() const { auto tmp = iterator; ++tmp; if (iterator == tokens.cend() || tmp == tokens.cend()) { return eof; } return *tmp; } bool lexer::look_ahead(const token::type token_type) const { return look_ahead().of() == token_type; } bool lexer::skip(const token::type token_type) { return advance(token_type).has_value(); } const std::list>& lexer::errors() const noexcept { return m_errors; } result lex(const std::string& buffer, const std::filesystem::path& path) { std::vector tokens; auto [iterator, text_end] = text_iterators(buffer); while (iterator != text_end) { if (*iterator == ' ' || *iterator == '\n') { } else if (std::isdigit(*iterator)) { tokens.emplace_back( token::type::number, static_cast(*iterator - '0'), iterator.position() ); } else if (*iterator == '=') { tokens.emplace_back(token::type::equals, iterator.position()); } else if (*iterator == '(') { tokens.emplace_back(token::type::left_paren, iterator.position()); } else if (*iterator == ')') { tokens.emplace_back(token::type::right_paren, iterator.position()); } else if (*iterator == ';') { tokens.emplace_back(token::type::semicolon, iterator.position()); } else if (*iterator == ',') { tokens.emplace_back(token::type::comma, iterator.position()); } else if (*iterator == '.') { tokens.emplace_back(token::type::dot, iterator.position()); } else if (std::isalpha(*iterator)) { std::string word; auto i = iterator; while (i != text_end && std::isalpha(*i)) { word.push_back(*i); ++i; } if (word == "const") { tokens.emplace_back(token::type::let, iterator.position()); } else if (word == "var") { tokens.emplace_back(token::type::var, iterator.position()); } else if (word == "begin") { tokens.emplace_back(token::type::begin, iterator.position()); } else if (word == "end") { tokens.emplace_back(token::type::end, iterator.position()); } else if (word == "if") { tokens.emplace_back(token::type::when, iterator.position()); } else if (word == "then") { tokens.emplace_back(token::type::then, iterator.position()); } else if (word == "while") { tokens.emplace_back(token::type::loop, iterator.position()); } else if (word == "do") { tokens.emplace_back(token::type::_do, iterator.position()); } else if (word == "True") { tokens.emplace_back(token::type::boolean, 1, iterator.position()); } else if (word == "False") { tokens.emplace_back(token::type::boolean, 0, iterator.position()); } else if (word == "proc") { tokens.emplace_back(token::type::procedure, 0, iterator.position()); } else { tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position()); } iterator = i; continue; } else if (*iterator == '+' || *iterator == '-') { std::string _operator{ *iterator }; tokens.emplace_back(token::type::term_operator, _operator.c_str(), iterator.position()); } else if (*iterator == '/' && iterator + 1 != text_end && *(iterator + 1) == '=') { tokens.emplace_back(token::type::comparison_operator, "n", iterator.position()); ++iterator; } else if (*iterator == '*' || *iterator == '/') { std::string _operator{ *iterator }; tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position()); } else if (*iterator == '<') { std::string _operator; auto operator_position = iterator.position(); if (iterator + 1 == text_end || *(iterator + 1) != '=') { _operator.push_back(*iterator); } else { ++iterator; _operator.push_back('l'); } tokens.emplace_back(token::type::comparison_operator, _operator.c_str(), operator_position); } else if (*iterator == '>') { std::string _operator; auto operator_position = iterator.position(); if (iterator + 1 == text_end || *(iterator + 1) != '=') { _operator.push_back(*iterator); } else { ++iterator; _operator.push_back('g'); } tokens.emplace_back(token::type::comparison_operator, _operator.c_str(), operator_position); } else if (*iterator == ':' && iterator + 1 != text_end && *(iterator + 1) == '=') { tokens.emplace_back(token::type::assignment, iterator.position()); ++iterator; } else if (*iterator == ':') { tokens.emplace_back(token::type::colon, iterator.position()); } else { return result(unexpected_character{ std::string{ *iterator }, path, iterator.position() }); } ++iterator; } return result(std::in_place, std::move(tokens), iterator.position(), path); } }