elna/source/lexer.cpp

276 lines
7.6 KiB
C++
Raw Normal View History

2024-02-22 21:29:25 +01:00
#include "elna/lexer.hpp"
2024-03-03 13:11:39 +01:00
#include <cstring>
2024-02-22 21:29:25 +01:00
namespace elna
{
2024-03-03 13:11:39 +01:00
namespace lex
{
using source_position = elna::source::position;
using source_error = elna::source::error;
using source_result = elna::source::result<std::vector<Token>>;
2024-02-22 21:29:25 +01:00
source::source(const std::string& buffer)
: m_buffer(buffer)
{
}
source::const_iterator source::begin() const
{
2024-02-25 15:16:19 +01:00
return source::const_iterator(std::cbegin(m_buffer));
2024-02-22 21:29:25 +01:00
}
source::const_iterator source::end() const
{
2024-03-03 13:11:39 +01:00
source_position end_position{ 0, 0 };
2024-02-22 21:29:25 +01:00
return source::const_iterator(std::cend(m_buffer), end_position);
}
2024-02-25 15:16:19 +01:00
source::const_iterator::const_iterator(std::string::const_iterator buffer,
2024-03-03 13:11:39 +01:00
const source_position start_position)
2024-02-22 21:29:25 +01:00
: m_buffer(buffer), m_position(start_position)
{
}
2024-03-03 13:11:39 +01:00
const source_position& source::const_iterator::position() const noexcept
2024-02-22 21:29:25 +01:00
{
return this->m_position;
}
source::const_iterator::reference source::const_iterator::operator*() const noexcept
{
return *m_buffer;
}
source::const_iterator::pointer source::const_iterator::operator->() const noexcept
{
return m_buffer.base();
}
source::const_iterator& source::const_iterator::operator++()
{
if (*this->m_buffer == '\n')
{
this->m_position.column = 1;
++this->m_position.line;
}
else
{
++this->m_position.column;
}
std::advance(this->m_buffer, 1);
return *this;
}
source::const_iterator& source::const_iterator::operator++(int)
{
auto tmp = *this;
++(*this);
return *this;
}
bool source::const_iterator::operator==(const source::const_iterator& that) const noexcept
{
return this->m_buffer == that.m_buffer;
}
bool source::const_iterator::operator!=(const source::const_iterator& that) const noexcept
{
return !(*this == that);
}
2024-03-03 13:11:39 +01:00
Token::Token(const type of, const char *value, source_position position)
2024-02-28 16:18:39 +01:00
: m_type(of), m_position(position)
2024-02-22 21:29:25 +01:00
{
2024-02-28 16:18:39 +01:00
std::size_t value_length = strlen(value);
char *buffer = reinterpret_cast<char *>(malloc(value_length + 1));
std::memcpy(buffer, value, value_length);
buffer[value_length] = 0;
m_value.identifier = buffer;
}
2024-03-03 13:11:39 +01:00
Token::Token(const type of, std::int32_t number, source_position position)
2024-02-28 16:18:39 +01:00
: m_type(of), m_position(position)
{
m_value.number = number;
}
2024-03-03 13:11:39 +01:00
Token::Token(const type of, source_position position)
2024-02-28 16:18:39 +01:00
: m_type(of), m_position(position)
{
}
Token::Token(const Token& that)
: m_type(that.of()), m_position(that.position())
{
*this = that;
}
Token::Token(Token&& that)
: m_type(that.of()), m_position(that.position())
{
*this = std::move(that);
}
Token::~Token()
{
2024-03-03 13:11:39 +01:00
if (m_type == type::identifier || m_type == type::term_operator || m_type == type::factor_operator)
2024-02-28 16:18:39 +01:00
{
std::free(const_cast<char*>(m_value.identifier));
}
}
Token& Token::operator=(const Token& that)
{
m_type = that.of();
m_position = that.position();
2024-03-03 13:11:39 +01:00
if (that.of() == type::identifier || that.of() == type::term_operator || m_type == type::factor_operator)
2024-02-28 16:18:39 +01:00
{
std::size_t value_length = strlen(that.identifier());
char *buffer = reinterpret_cast<char *>(malloc(value_length + 1));
std::memcpy(buffer, that.identifier(), value_length);
buffer[value_length] = 0;
m_value.identifier = buffer;
}
2024-03-03 13:11:39 +01:00
else if (that.of() == type::number)
2024-02-28 16:18:39 +01:00
{
m_value.number = that.number();
}
return *this;
}
Token& Token::operator=(Token&& that)
{
m_type = that.of();
m_position = that.position();
2024-03-03 13:11:39 +01:00
if (that.of() == type::identifier || that.of() == type::term_operator || that.of() == type::factor_operator)
2024-02-28 16:18:39 +01:00
{
m_value.identifier = that.identifier();
that.m_value.identifier = nullptr;
}
2024-03-03 13:11:39 +01:00
else if (that.of() == type::number)
2024-02-28 16:18:39 +01:00
{
m_value.number = that.number();
}
return *this;
2024-02-22 21:29:25 +01:00
}
2024-03-03 13:11:39 +01:00
Token::type Token::of() const noexcept
2024-02-22 21:29:25 +01:00
{
return m_type;
}
2024-02-28 16:18:39 +01:00
const char *Token::identifier() const noexcept
{
return m_value.identifier;
}
std::int32_t Token::number() const noexcept
2024-02-22 21:29:25 +01:00
{
2024-02-28 16:18:39 +01:00
return m_value.number;
2024-02-22 21:29:25 +01:00
}
2024-03-03 13:11:39 +01:00
const source_position& Token::position() const noexcept
2024-02-22 21:29:25 +01:00
{
return m_position;
}
2024-03-03 13:11:39 +01:00
source_result lex(const char *buffer)
2024-02-22 21:29:25 +01:00
{
2024-02-28 16:18:39 +01:00
std::vector<Token> tokens;
2024-02-22 21:29:25 +01:00
source input{ buffer };
for (auto iterator = input.begin(); iterator != input.end();)
{
if (*iterator == ' ' || *iterator == '\n')
{
}
2024-02-28 16:18:39 +01:00
else if (std::isdigit(*iterator))
{
tokens.emplace_back(
2024-03-03 13:11:39 +01:00
Token::type::number,
2024-02-28 16:18:39 +01:00
static_cast<std::int32_t>(*iterator - '0'),
iterator.position()
);
}
else if (*iterator == '=')
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::equals, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (*iterator == '(')
2024-02-22 21:29:25 +01:00
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::left_paren, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (*iterator == ')')
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::right_paren, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (*iterator == ';')
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::semicolon, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (*iterator == ',')
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::comma, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (*iterator == '!')
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::bang, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (*iterator == '.')
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::dot, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (std::isalpha(*iterator))
{
std::string word;
auto i = iterator;
while (i != input.end() && std::isalpha(*i))
2024-02-22 21:29:25 +01:00
{
2024-02-28 16:18:39 +01:00
word.push_back(*i);
++i;
2024-02-22 21:29:25 +01:00
}
2024-02-28 16:18:39 +01:00
if (word == "const")
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::let, iterator.position());
2024-02-28 16:18:39 +01:00
}
else if (word == "var")
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::var, iterator.position());
2024-02-28 16:18:39 +01:00
}
else
{
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::identifier, word.c_str(), iterator.position());
2024-02-28 16:18:39 +01:00
}
iterator = i;
2024-02-22 21:29:25 +01:00
continue;
}
2024-02-28 16:18:39 +01:00
else if (*iterator == '+' || *iterator == '-')
{
std::string _operator{ *iterator };
2024-03-03 13:11:39 +01:00
tokens.emplace_back(Token::type::term_operator, _operator.c_str(), iterator.position());
}
else if (*iterator == '*' || *iterator == '/')
{
std::string _operator{ *iterator };
tokens.emplace_back(Token::type::factor_operator, _operator.c_str(), iterator.position());
2024-02-28 16:18:39 +01:00
}
else
{
2024-03-03 13:11:39 +01:00
return source_result("Unexpected next character", iterator.position());
2024-02-28 16:18:39 +01:00
}
2024-02-22 21:29:25 +01:00
++iterator;
}
2024-03-03 13:11:39 +01:00
return source_result(tokens);
2024-02-22 21:29:25 +01:00
}
}
2024-03-03 13:11:39 +01:00
}