#include "elna/lexer.hpp" #include namespace elna { namespace lex { using source_position = elna::source::position; using source_error = elna::source::error; using source_result = elna::source::result>; source::source(const std::string& buffer) : m_buffer(buffer) { } source::const_iterator source::begin() const { return source::const_iterator(std::cbegin(m_buffer)); } source::const_iterator source::end() const { source_position end_position{ 0, 0 }; return source::const_iterator(std::cend(m_buffer), end_position); } source::const_iterator::const_iterator(std::string::const_iterator buffer, const source_position start_position) : m_buffer(buffer), m_position(start_position) { } const source_position& source::const_iterator::position() const noexcept { return this->m_position; } source::const_iterator::reference source::const_iterator::operator*() const noexcept { return *m_buffer; } source::const_iterator::pointer source::const_iterator::operator->() const noexcept { return m_buffer.base(); } source::const_iterator& source::const_iterator::operator++() { if (*this->m_buffer == '\n') { this->m_position.column = 1; ++this->m_position.line; } else { ++this->m_position.column; } std::advance(this->m_buffer, 1); return *this; } source::const_iterator& source::const_iterator::operator++(int) { auto tmp = *this; ++(*this); return *this; } bool source::const_iterator::operator==(const source::const_iterator& that) const noexcept { return this->m_buffer == that.m_buffer; } bool source::const_iterator::operator!=(const source::const_iterator& that) const noexcept { return !(*this == that); } Token::Token(const type of, const char *value, source_position position) : m_type(of), m_position(position) { std::size_t value_length = strlen(value); char *buffer = reinterpret_cast(malloc(value_length + 1)); std::memcpy(buffer, value, value_length); buffer[value_length] = 0; m_value.identifier = buffer; } Token::Token(const type of, std::int32_t number, source_position position) : m_type(of), m_position(position) { m_value.number = number; } Token::Token(const type of, source_position position) : m_type(of), m_position(position) { } Token::Token(const Token& that) : m_type(that.of()), m_position(that.position()) { *this = that; } Token::Token(Token&& that) : m_type(that.of()), m_position(that.position()) { *this = std::move(that); } Token::~Token() { if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator) { std::free(const_cast(m_value.identifier)); } } Token& Token::operator=(const Token& that) { m_type = that.of(); m_position = that.position(); if (that.of() == type::identifier || that.of() == type::term_operator || m_type == type::factor_operator) { std::size_t value_length = strlen(that.identifier()); char *buffer = reinterpret_cast(malloc(value_length + 1)); std::memcpy(buffer, that.identifier(), value_length); buffer[value_length] = 0; m_value.identifier = buffer; } else if (that.of() == type::number) { m_value.number = that.number(); } return *this; } Token& Token::operator=(Token&& that) { m_type = that.of(); m_position = that.position(); if (that.of() == type::identifier || that.of() == type::term_operator || that.of() == type::factor_operator) { m_value.identifier = that.identifier(); that.m_value.identifier = nullptr; } else if (that.of() == type::number) { m_value.number = that.number(); } return *this; } Token::type Token::of() const noexcept { return m_type; } const char *Token::identifier() const noexcept { return m_value.identifier; } std::int32_t Token::number() const noexcept { return m_value.number; } const source_position& Token::position() const noexcept { return m_position; } source_result lex(const char *buffer) { std::vector tokens; source input{ buffer }; for (auto iterator = input.begin(); iterator != input.end();) { if (*iterator == ' ' || *iterator == '\n') { } else if (std::isdigit(*iterator)) { tokens.emplace_back( Token::type::number, static_cast(*iterator - '0'), iterator.position() ); } else if (*iterator == '=') { tokens.emplace_back(Token::type::equals, iterator.position()); } else if (*iterator == '(') { tokens.emplace_back(Token::type::left_paren, iterator.position()); } else if (*iterator == ')') { tokens.emplace_back(Token::type::right_paren, iterator.position()); } else if (*iterator == ';') { tokens.emplace_back(Token::type::semicolon, iterator.position()); } else if (*iterator == ',') { tokens.emplace_back(Token::type::comma, iterator.position()); } else if (*iterator == '!') { tokens.emplace_back(Token::type::bang, iterator.position()); } else if (*iterator == '.') { tokens.emplace_back(Token::type::dot, iterator.position()); } else if (std::isalpha(*iterator)) { std::string word; auto i = iterator; while (i != input.end() && std::isalpha(*i)) { word.push_back(*i); ++i; } if (word == "const") { tokens.emplace_back(Token::type::let, iterator.position()); } else if (word == "var") { tokens.emplace_back(Token::type::var, iterator.position()); } else { tokens.emplace_back(Token::type::identifier, word.c_str(), iterator.position()); } iterator = i; continue; } else if (*iterator == '+' || *iterator == '-') { std::string _operator{ *iterator }; tokens.emplace_back(Token::type::term_operator, _operator.c_str(), iterator.position()); } else if (*iterator == '*' || *iterator == '/') { std::string _operator{ *iterator }; tokens.emplace_back(Token::type::factor_operator, _operator.c_str(), iterator.position()); } else { return source_result("Unexpected next character", iterator.position()); } ++iterator; } return source_result(tokens); } } }