#include "elna/lexer.hpp" namespace elna { source::source(const std::string& buffer) : m_buffer(buffer) { } source::const_iterator source::begin() const { return source::const_iterator(std::cbegin(m_buffer)); } source::const_iterator source::end() const { Position end_position{ 0, 0 }; return source::const_iterator(std::cend(m_buffer), end_position); } source::const_iterator::const_iterator(std::string::const_iterator buffer, const Position start_position) : m_buffer(buffer), m_position(start_position) { } const Position& source::const_iterator::position() const noexcept { return this->m_position; } source::const_iterator::reference source::const_iterator::operator*() const noexcept { return *m_buffer; } source::const_iterator::pointer source::const_iterator::operator->() const noexcept { return m_buffer.base(); } source::const_iterator& source::const_iterator::operator++() { if (*this->m_buffer == '\n') { this->m_position.column = 1; ++this->m_position.line; } else { ++this->m_position.column; } std::advance(this->m_buffer, 1); return *this; } source::const_iterator& source::const_iterator::operator++(int) { auto tmp = *this; ++(*this); return *this; } bool source::const_iterator::operator==(const source::const_iterator& that) const noexcept { return this->m_buffer == that.m_buffer; } bool source::const_iterator::operator!=(const source::const_iterator& that) const noexcept { return !(*this == that); } Token::Token(const Type of, const char *value, Position position) : m_type(of), m_position(position) { std::size_t value_length = strlen(value); char *buffer = reinterpret_cast(malloc(value_length + 1)); std::memcpy(buffer, value, value_length); buffer[value_length] = 0; m_value.identifier = buffer; } Token::Token(const Type of, std::int32_t number, Position position) : m_type(of), m_position(position) { m_value.number = number; } Token::Token(const Type of, Position position) : m_type(of), m_position(position) { } Token::Token(const Token& that) : m_type(that.of()), m_position(that.position()) { *this = that; } Token::Token(Token&& that) : m_type(that.of()), m_position(that.position()) { *this = std::move(that); } Token::~Token() { if (m_type == TOKEN_IDENTIFIER || m_type == TOKEN_OPERATOR) { std::free(const_cast(m_value.identifier)); } } Token& Token::operator=(const Token& that) { m_type = that.of(); m_position = that.position(); if (that.of() == TOKEN_IDENTIFIER || that.of() == TOKEN_OPERATOR) { std::size_t value_length = strlen(that.identifier()); char *buffer = reinterpret_cast(malloc(value_length + 1)); std::memcpy(buffer, that.identifier(), value_length); buffer[value_length] = 0; m_value.identifier = buffer; } else if (that.of() == TOKEN_NUMBER) { m_value.number = that.number(); } return *this; } Token& Token::operator=(Token&& that) { m_type = that.of(); m_position = that.position(); if (that.of() == TOKEN_IDENTIFIER || that.of() == TOKEN_OPERATOR) { m_value.identifier = that.identifier(); that.m_value.identifier = nullptr; } else if (that.of() == TOKEN_NUMBER) { m_value.number = that.number(); } return *this; } Token::Type Token::of() const noexcept { return m_type; } const char *Token::identifier() const noexcept { return m_value.identifier; } std::int32_t Token::number() const noexcept { return m_value.number; } const Position& Token::position() const noexcept { return m_position; } Token *lex(const char *buffer, CompileError *compile_error, std::size_t *length) { std::vector tokens; source input{ buffer }; for (auto iterator = input.begin(); iterator != input.end();) { if (*iterator == ' ' || *iterator == '\n') { } else if (std::isdigit(*iterator)) { tokens.emplace_back( Token::TOKEN_NUMBER, static_cast(*iterator - '0'), iterator.position() ); } else if (*iterator == '=') { tokens.emplace_back(Token::TOKEN_EQUALS, iterator.position()); } else if (*iterator == '(') { tokens.emplace_back(Token::TOKEN_LEFT_PAREN, iterator.position()); } else if (*iterator == ')') { tokens.emplace_back(Token::TOKEN_RIGHT_PAREN, iterator.position()); } else if (*iterator == ';') { tokens.emplace_back(Token::TOKEN_SEMICOLON, iterator.position()); } else if (*iterator == ',') { tokens.emplace_back(Token::TOKEN_COMMA, iterator.position()); } else if (*iterator == '!') { tokens.emplace_back(Token::TOKEN_BANG, iterator.position()); } else if (*iterator == '.') { tokens.emplace_back(Token::TOKEN_DOT, iterator.position()); } else if (std::isalpha(*iterator)) { std::string word; auto i = iterator; while (i != input.end() && std::isalpha(*i)) { word.push_back(*i); ++i; } if (word == "const") { tokens.emplace_back(Token::TOKEN_LET, iterator.position()); } else if (word == "var") { tokens.emplace_back(Token::TOKEN_VAR, iterator.position()); } else { tokens.emplace_back(Token::TOKEN_IDENTIFIER, word.c_str(), iterator.position()); } iterator = i; continue; } else if (*iterator == '+' || *iterator == '-') { std::string _operator{ *iterator }; tokens.emplace_back(Token::TOKEN_OPERATOR, _operator.c_str(), iterator.position()); } else { *compile_error = CompileError("Unexpected next character", iterator.position()); return nullptr; } ++iterator; } Token *target = reinterpret_cast(malloc(tokens.size() * sizeof(Token) + sizeof(Token))); int i = 0; for (auto& token : tokens) { target[i] = std::move(token); ++i; } *length = i; return target; } }