#include "elna/source/lexer.hpp" #include #include namespace elna::source { using source_position = elna::source::position; using source_error = elna::source::error; using source_result = elna::source::result>; std::pair text_iterators(const std::string &buffer) { return std::make_pair<>(text_iterator(std::cbegin(buffer)), text_iterator(std::cend(buffer), position{0, 0})); } text_iterator::text_iterator(std::string::const_iterator buffer, const source_position start_position) : m_buffer(buffer), m_position(start_position) { } const source_position& text_iterator::position() const noexcept { return this->m_position; } text_iterator::reference text_iterator::operator*() const noexcept { return *m_buffer; } text_iterator::pointer text_iterator::operator->() const noexcept { return m_buffer.base(); } text_iterator& text_iterator::operator++() { if (*this->m_buffer == '\n') { this->m_position.column = 1; ++this->m_position.line; } else { ++this->m_position.column; } std::advance(this->m_buffer, 1); return *this; } text_iterator& text_iterator::operator++(int) { auto tmp = *this; ++(*this); return *this; } bool text_iterator::operator==(const text_iterator& that) const noexcept { return this->m_buffer == that.m_buffer; } bool text_iterator::operator!=(const text_iterator& that) const noexcept { return !(*this == that); } token::value::value() : nil(nullptr) { } token::value::value(std::int32_t value) : number(value) { } token::value::value(const std::string& value) : identifier(value) { } token::value::~value() { } token::token(const type of, const std::string& value, const source_position position) : m_type(of), m_value(value), m_position(position) { } token::token(const type of, std::int32_t number, const source_position position) : m_type(of), m_value(number), m_position(position) { } token::token(const type of, source_position position) : m_type(of), m_position(position) { } token::token(const token& that) : m_type(that.of()), m_position(that.position()) { *this = that; } token::token(token&& that) : m_type(that.of()), m_position(that.position()) { *this = std::move(that); } token::~token() { if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator) { m_value.identifier.~basic_string(); } } token& token::operator=(const token& that) { m_type = that.of(); m_position = that.position(); if (that.has_identifier()) { m_value.identifier = that.identifier(); } else if (that.of() == type::number) { m_value.number = that.number(); } else { m_value.nil = nullptr; } return *this; } token& token::operator=(token&& that) { m_type = that.of(); m_position = that.position(); if (that.has_identifier()) { m_value.identifier = std::move(that.identifier()); } else if (that.of() == type::number) { m_value.number = that.number(); } else { m_value.nil = nullptr; } return *this; } token::type token::of() const noexcept { return m_type; } const std::string& token::identifier() const { if (!has_identifier()) { throw std::bad_variant_access(); } return m_value.identifier; } std::int32_t token::number() const { if (of() != type::number) { throw std::bad_variant_access(); } return m_value.number; } const source_position& token::position() const noexcept { return m_position; } bool token::has_identifier() const noexcept { return of() == type::identifier || of() == type::term_operator || of() == type::factor_operator; } unexpected_character::unexpected_character(const std::string& character, const source::position position) : error(position), character(character) { } std::string unexpected_character::what() const { std::stringstream ss{ "Unexpected character '" }; ss << character << "'"; return ss.str(); } source_result lex(const std::string& buffer) { std::vector tokens; auto [iterator, text_end] = text_iterators(buffer); while (iterator != text_end) { if (*iterator == ' ' || *iterator == '\n') { } else if (std::isdigit(*iterator)) { tokens.emplace_back( token::type::number, static_cast(*iterator - '0'), iterator.position() ); } else if (*iterator == '=') { tokens.emplace_back(token::type::equals, iterator.position()); } else if (*iterator == '(') { tokens.emplace_back(token::type::left_paren, iterator.position()); } else if (*iterator == ')') { tokens.emplace_back(token::type::right_paren, iterator.position()); } else if (*iterator == ';') { tokens.emplace_back(token::type::semicolon, iterator.position()); } else if (*iterator == ',') { tokens.emplace_back(token::type::comma, iterator.position()); } else if (*iterator == '!') { tokens.emplace_back(token::type::bang, iterator.position()); } else if (*iterator == '.') { tokens.emplace_back(token::type::dot, iterator.position()); } else if (std::isalpha(*iterator)) { std::string word; auto i = iterator; while (i != text_end && std::isalpha(*i)) { word.push_back(*i); ++i; } if (word == "const") { tokens.emplace_back(token::type::let, iterator.position()); } else if (word == "var") { tokens.emplace_back(token::type::var, iterator.position()); } else { tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position()); } iterator = i; continue; } else if (*iterator == '+' || *iterator == '-') { std::string _operator{ *iterator }; tokens.emplace_back(token::type::term_operator, _operator.c_str(), iterator.position()); } else if (*iterator == '*' || *iterator == '/') { std::string _operator{ *iterator }; tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position()); } else { return source_result(unexpected_character{ std::string{ *iterator }, iterator.position() }); } ++iterator; } return source_result(std::move(tokens)); } }