elna/include/elna/source/lexer.hpp

139 lines
3.5 KiB
C++

#pragma once
#include <cstdint>
#include <string>
#include <vector>
#include "elna/source/result.hpp"
namespace elna::source
{
/**
* Range over the source text that keeps track of the current position.
*/
class text_iterator
{
std::string::const_iterator m_buffer;
elna::source::position m_position;
text_iterator(std::string::const_iterator buffer,
const elna::source::position start_position = elna::source::position());
public:
using iterator_category = std::forward_iterator_tag;
using difference_type = ptrdiff_t;
using value_type = char;
using pointer = const value_type *;
using reference = const value_type&;
const elna::source::position& position() const noexcept;
reference operator*() const noexcept;
pointer operator->() const noexcept;
text_iterator& operator++();
text_iterator& operator++(int);
bool operator==(const text_iterator& that) const noexcept;
bool operator!=(const text_iterator& that) const noexcept;
text_iterator operator+(std::size_t step);
friend std::pair<text_iterator, text_iterator> text_iterators(const std::string& buffer);
};
std::pair<text_iterator, text_iterator>
text_iterators(const std::string &buffer);
/**
* Union type representing a single token.
*/
struct token
{
/**
* Token type.
*/
enum class type : std::uint16_t
{
number,
term_operator,
let,
identifier,
equals,
var,
semicolon,
left_paren,
right_paren,
bang,
dot,
comma,
factor_operator,
eof,
begin,
end,
assignment
};
/**
* Type of the token value.
*/
union value
{
value();
value(std::int32_t value);
value(const std::string& value);
~value();
std::nullptr_t nil;
std::int32_t number;
std::string identifier;
};
token(type of, elna::source::position position);
token(type of, std::int32_t value, const elna::source::position position);
token(type of, const std::string& value, const elna::source::position position);
token(const token& that);
token(token&& that);
~token();
token& operator=(const token& that);
token& operator=(token&& that);
type of() const noexcept;
const std::string& identifier() const;
std::int32_t number() const;
const elna::source::position& position() const noexcept;
private:
type m_type;
value m_value;
elna::source::position m_position;
bool has_identifier() const noexcept;
};
class unexpected_character final : public error
{
std::string character;
public:
unexpected_character(const std::string& character, const source::position position);
std::string what() const override;
};
class unexpected_token final : public error
{
token m_token;
public:
explicit unexpected_token(const token& token);
std::string what() const override;
};
/**
* Split the source into tokens.
*
* \param buffer Source text.
* \return Tokens or error.
*/
elna::source::result<std::vector<token>> lex(const std::string& buffer);
}