Tokenize the input with flex
This commit is contained in:
260
source/lexer.cpp
260
source/lexer.cpp
@ -1,79 +1,12 @@
|
||||
#include "elna/source/lexer.hpp"
|
||||
#include <cassert>
|
||||
#include <variant>
|
||||
#include <sstream>
|
||||
|
||||
namespace elna::source
|
||||
{
|
||||
using source_position = elna::source::position;
|
||||
using source_error = elna::source::error;
|
||||
|
||||
std::pair<text_iterator, text_iterator> text_iterators(const std::string &buffer)
|
||||
{
|
||||
return std::make_pair<>(text_iterator(std::cbegin(buffer)),
|
||||
text_iterator(std::cend(buffer), position{0, 0}));
|
||||
}
|
||||
|
||||
text_iterator::text_iterator(std::string::const_iterator buffer,
|
||||
const source_position start_position)
|
||||
: m_buffer(buffer), m_position(start_position)
|
||||
{
|
||||
}
|
||||
|
||||
const source_position& text_iterator::position() const noexcept
|
||||
{
|
||||
return this->m_position;
|
||||
}
|
||||
|
||||
text_iterator::reference text_iterator::operator*() const noexcept
|
||||
{
|
||||
return *m_buffer;
|
||||
}
|
||||
|
||||
text_iterator::pointer text_iterator::operator->() const noexcept
|
||||
{
|
||||
return m_buffer.base();
|
||||
}
|
||||
|
||||
text_iterator& text_iterator::operator++()
|
||||
{
|
||||
if (*this->m_buffer == '\n')
|
||||
{
|
||||
this->m_position.column = 1;
|
||||
++this->m_position.line;
|
||||
}
|
||||
else
|
||||
{
|
||||
++this->m_position.column;
|
||||
}
|
||||
std::advance(this->m_buffer, 1);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
text_iterator& text_iterator::operator++(int)
|
||||
{
|
||||
auto tmp = *this;
|
||||
++(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
text_iterator text_iterator::operator+(std::size_t step)
|
||||
{
|
||||
auto result = *this;
|
||||
return ++result;
|
||||
}
|
||||
|
||||
bool text_iterator::operator==(const text_iterator& that) const noexcept
|
||||
{
|
||||
return this->m_buffer == that.m_buffer;
|
||||
}
|
||||
|
||||
bool text_iterator::operator!=(const text_iterator& that) const noexcept
|
||||
{
|
||||
return !(*this == that);
|
||||
}
|
||||
|
||||
token::value::value()
|
||||
: nil(nullptr)
|
||||
{
|
||||
@ -103,6 +36,23 @@ namespace elna::source
|
||||
{
|
||||
}
|
||||
|
||||
token::token(type of, value&& value, const elna::source::position position)
|
||||
: m_type(of), m_position(position)
|
||||
{
|
||||
if (has_identifier())
|
||||
{
|
||||
new((void *) &m_value.identifier) std::string(std::move(value.identifier));
|
||||
}
|
||||
else if (is_numeric())
|
||||
{
|
||||
m_value.number = value.number;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_value.nil = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
token::token(const type of, source_position position)
|
||||
: m_type(of), m_position(position)
|
||||
{
|
||||
@ -278,11 +228,12 @@ namespace elna::source
|
||||
|
||||
std::string unexpected_character::what() const
|
||||
{
|
||||
std::stringstream ss{ "Unexpected character '" };
|
||||
std::string ss{ "Unexpected character '" };
|
||||
|
||||
ss << character << "'";
|
||||
ss.insert(ss.cend(), character.cbegin(), character.cend());
|
||||
ss.push_back('\'');
|
||||
|
||||
return ss.str();
|
||||
return ss;
|
||||
}
|
||||
|
||||
unexpected_token::unexpected_token(const token& token, const std::filesystem::path& path)
|
||||
@ -371,173 +322,4 @@ namespace elna::source
|
||||
{
|
||||
return m_errors;
|
||||
}
|
||||
|
||||
result<lexer> lex(const std::string& buffer, const std::filesystem::path& path)
|
||||
{
|
||||
std::vector<token> tokens;
|
||||
auto [iterator, text_end] = text_iterators(buffer);
|
||||
|
||||
while (iterator != text_end)
|
||||
{
|
||||
if (*iterator == ' ' || *iterator == '\n')
|
||||
{
|
||||
}
|
||||
else if (std::isdigit(*iterator))
|
||||
{
|
||||
tokens.emplace_back(
|
||||
token::type::number,
|
||||
static_cast<std::int32_t>(*iterator - '0'),
|
||||
iterator.position()
|
||||
);
|
||||
}
|
||||
else if (*iterator == '=')
|
||||
{
|
||||
tokens.emplace_back(token::type::equals, iterator.position());
|
||||
}
|
||||
else if (*iterator == '(')
|
||||
{
|
||||
tokens.emplace_back(token::type::left_paren, iterator.position());
|
||||
}
|
||||
else if (*iterator == ')')
|
||||
{
|
||||
tokens.emplace_back(token::type::right_paren, iterator.position());
|
||||
}
|
||||
else if (*iterator == ';')
|
||||
{
|
||||
tokens.emplace_back(token::type::semicolon, iterator.position());
|
||||
}
|
||||
else if (*iterator == ',')
|
||||
{
|
||||
tokens.emplace_back(token::type::comma, iterator.position());
|
||||
}
|
||||
else if (*iterator == '.')
|
||||
{
|
||||
tokens.emplace_back(token::type::dot, iterator.position());
|
||||
}
|
||||
else if (std::isalpha(*iterator))
|
||||
{
|
||||
std::string word;
|
||||
auto i = iterator;
|
||||
while (i != text_end && std::isalpha(*i))
|
||||
{
|
||||
word.push_back(*i);
|
||||
++i;
|
||||
}
|
||||
if (word == "const")
|
||||
{
|
||||
tokens.emplace_back(token::type::let, iterator.position());
|
||||
}
|
||||
else if (word == "var")
|
||||
{
|
||||
tokens.emplace_back(token::type::var, iterator.position());
|
||||
}
|
||||
else if (word == "begin")
|
||||
{
|
||||
tokens.emplace_back(token::type::begin, iterator.position());
|
||||
}
|
||||
else if (word == "end")
|
||||
{
|
||||
tokens.emplace_back(token::type::end, iterator.position());
|
||||
}
|
||||
else if (word == "if")
|
||||
{
|
||||
tokens.emplace_back(token::type::when, iterator.position());
|
||||
}
|
||||
else if (word == "then")
|
||||
{
|
||||
tokens.emplace_back(token::type::then, iterator.position());
|
||||
}
|
||||
else if (word == "while")
|
||||
{
|
||||
tokens.emplace_back(token::type::loop, iterator.position());
|
||||
}
|
||||
else if (word == "do")
|
||||
{
|
||||
tokens.emplace_back(token::type::_do, iterator.position());
|
||||
}
|
||||
else if (word == "True")
|
||||
{
|
||||
tokens.emplace_back(token::type::boolean, 1, iterator.position());
|
||||
}
|
||||
else if (word == "False")
|
||||
{
|
||||
tokens.emplace_back(token::type::boolean, 0, iterator.position());
|
||||
}
|
||||
else if (word == "proc")
|
||||
{
|
||||
tokens.emplace_back(token::type::procedure, 0, iterator.position());
|
||||
}
|
||||
else
|
||||
{
|
||||
tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position());
|
||||
}
|
||||
iterator = i;
|
||||
continue;
|
||||
}
|
||||
else if (*iterator == '+' || *iterator == '-')
|
||||
{
|
||||
std::string _operator{ *iterator };
|
||||
|
||||
tokens.emplace_back(token::type::term_operator, _operator.c_str(), iterator.position());
|
||||
}
|
||||
else if (*iterator == '/' && iterator + 1 != text_end && *(iterator + 1) == '=')
|
||||
{
|
||||
tokens.emplace_back(token::type::comparison_operator, "n", iterator.position());
|
||||
++iterator;
|
||||
}
|
||||
else if (*iterator == '*' || *iterator == '/')
|
||||
{
|
||||
std::string _operator{ *iterator };
|
||||
|
||||
tokens.emplace_back(token::type::factor_operator, _operator.c_str(), iterator.position());
|
||||
}
|
||||
else if (*iterator == '<')
|
||||
{
|
||||
std::string _operator;
|
||||
auto operator_position = iterator.position();
|
||||
|
||||
if (iterator + 1 == text_end || *(iterator + 1) != '=')
|
||||
{
|
||||
_operator.push_back(*iterator);
|
||||
}
|
||||
else
|
||||
{
|
||||
++iterator;
|
||||
_operator.push_back('l');
|
||||
}
|
||||
tokens.emplace_back(token::type::comparison_operator, _operator.c_str(), operator_position);
|
||||
}
|
||||
else if (*iterator == '>')
|
||||
{
|
||||
std::string _operator;
|
||||
auto operator_position = iterator.position();
|
||||
|
||||
if (iterator + 1 == text_end || *(iterator + 1) != '=')
|
||||
{
|
||||
_operator.push_back(*iterator);
|
||||
}
|
||||
else
|
||||
{
|
||||
++iterator;
|
||||
_operator.push_back('g');
|
||||
}
|
||||
tokens.emplace_back(token::type::comparison_operator, _operator.c_str(), operator_position);
|
||||
}
|
||||
else if (*iterator == ':' && iterator + 1 != text_end && *(iterator + 1) == '=')
|
||||
{
|
||||
tokens.emplace_back(token::type::assignment, iterator.position());
|
||||
++iterator;
|
||||
}
|
||||
else if (*iterator == ':')
|
||||
{
|
||||
tokens.emplace_back(token::type::colon, iterator.position());
|
||||
}
|
||||
else
|
||||
{
|
||||
return result<lexer>(unexpected_character{ std::string{ *iterator }, path, iterator.position() });
|
||||
}
|
||||
++iterator;
|
||||
}
|
||||
return result<lexer>(std::in_place, std::move(tokens), iterator.position(), path);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user