elna/source/elna/lexer.d

module elna.lexer;

import core.stdc.stdlib;
import core.stdc.ctype;
import core.stdc.string;
import elna.result;
import std.range;
import tanya.container.array;
import tanya.container.string;

extern(C++)
struct Token
{
    enum Type
    {
        number,
        operator,
        let,
        identifier,
        equals,
        var,
        semicolon,
        leftParen,
        rightParen,
        bang,
        dot,
        comma,
    }

    union Value
    {
        int number;
        String identifier;
    }

    private Type type;
    private Value value_;
    private Position position_;

    @disable this();

    this(Type type, Position position) @nogc nothrow pure @safe
    {
        this.type = type;
        this.position_ = position;
    }

    this(Type type, int value, Position position) @nogc nothrow pure @trusted
    in (type == Type.number)
    {
        this(type, position);
        this.value_.number = value;
    }

    this()(Type type, auto ref String value, Position position)
    @nogc nothrow pure @trusted
    in (type == Type.identifier || type == Type.operator)
    {
        this(type, position);
        this.value_.identifier = value;
    }

    /**
     * Params:
     *   type = Expected type.
     *
     * Returns: Whether this token is of the expected type.
     */
    bool ofType(Type type) const @nogc nothrow pure @safe
    {
        return this.type == type;
    }

    @property auto value(Type type)() @nogc nothrow pure @trusted
    in (ofType(type), "Expected type: " ~ type.stringof)
    {
        static if (type == Type.number)
        {
            return this.value_.number;
        }
        else static if (type == Type.identifier || type == Type.operator)
        {
            return this.value_.identifier;
        }
        else
        {
            static assert(false, "This type doesn't have a value");
        }
    }

    /**
     * Returns: The token position in the source text.
     */
    @property const(Position) position() const @nogc nothrow pure @safe
    {
        return this.position_;
    }
}

/**
 * Range over the source text that keeps track of the current position.
 */
extern(C++)
struct Source
{
    char* buffer_;
    size_t length_;
    Position position;

    this(char* buffer, const size_t length) @nogc nothrow pure
    {
        this.buffer_ = buffer;
        this.length_ = length;
    }

    @disable this();

    bool empty() @nogc nothrow pure @safe
    {
        return this.length == 0;
    }

    char front() @nogc nothrow pure
    in (!empty)
    {
        return this.buffer_[0];
    }

    void popFront() @nogc nothrow pure
    in (!empty)
    {
        ++this.buffer_;
        --this.length_;
        ++this.position.column;
    }

    void breakLine() @nogc nothrow pure
    in (!empty)
    {
        ++this.buffer_;
        --this.length_;
        ++this.position.line;
        this.position.column = 1;
    }

    @property size_t length() const @nogc nothrow pure @safe
    {
        return this.length_;
    }

    char opIndex(size_t index) @nogc nothrow pure
    in (index < length)
    {
        return this.buffer_[index];
    }

    char* buffer() @nogc nothrow pure
    {
        return this.buffer_;
    }
}

Result!(Array!Token) lex(char[] buffer) @nogc
{
    Array!Token tokens;
    auto source = Source(buffer.ptr, buffer.length);

    while (!source.empty)
    {
        if (source.front == ' ')
        {
            source.popFront;
        }
        else if (source.front >= '0' && source.front <= '9') // Multi-digit.
        {
            tokens.insertBack(Token(Token.Type.number, source.front - '0', source.position));
            source.popFront;
        }
        else if (source.front == '=')
        {
            tokens.insertBack(Token(Token.Type.equals, source.position));
            source.popFront;
        }
        else if (source.front == '(')
        {
            tokens.insertBack(Token(Token.Type.leftParen, source.position));
            source.popFront;
        }
        else if (source.front == ')')
        {
            tokens.insertBack(Token(Token.Type.rightParen, source.position));
            source.popFront;
        }
        else if (source.front == ';')
        {
            tokens.insertBack(Token(Token.Type.semicolon, source.position));
            source.popFront;
        }
        else if (source.front == ',')
        {
            tokens.insertBack(Token(Token.Type.comma, source.position));
            source.popFront;
        }
        else if (source.front == '!')
        {
            tokens.insertBack(Token(Token.Type.bang, source.position));
            source.popFront;
        }
        else if (source.front == '.')
        {
            tokens.insertBack(Token(Token.Type.dot, source.position));
            source.popFront;
        }
        else if (isalpha(source.front))
        {
            size_t i = 1;
            while (i < source.length && isalpha(source[i]))
            {
                ++i;
            }
            if (source.buffer[0 .. i] == "const")
            {
                tokens.insertBack(Token(Token.Type.let, source.position));
            }
            else if (source.buffer[0 .. i] == "var")
            {
                tokens.insertBack(Token(Token.Type.var, source.position));
            }
            else
            {
                auto identifier = String(source.buffer[0 .. i]);
                tokens.insertBack(Token(Token.Type.identifier, identifier, source.position));
            }
            source.popFrontN(i);
        }
        else if (source.front == '+' || source.front == '-')
        {
            String operator;

            operator.insertBack(source.front);
            tokens.insertBack(Token(Token.Type.operator, operator, source.position));
            source.popFront;
        }
        else if (source.front == '\n')
        {
            source.breakLine;
        }
        else
        {
            return typeof(return)("Unexptected next character", source.position);
        }
    }
    return typeof(return)(tokens);
}