253 lines
5.8 KiB
D
253 lines
5.8 KiB
D
module elna.lexer;
|
|
|
|
import core.stdc.stdlib;
|
|
import core.stdc.ctype;
|
|
import core.stdc.string;
|
|
import elna.result;
|
|
import std.range;
|
|
import tanya.container.array;
|
|
import tanya.container.string;
|
|
import tanya.memory.mmappool;
|
|
|
|
struct Token
|
|
{
|
|
enum Type
|
|
{
|
|
number,
|
|
operator,
|
|
let,
|
|
identifier,
|
|
equals,
|
|
var,
|
|
semicolon,
|
|
leftParen,
|
|
rightParen,
|
|
bang,
|
|
dot,
|
|
comma,
|
|
}
|
|
|
|
union Value
|
|
{
|
|
int number;
|
|
String identifier;
|
|
}
|
|
|
|
private Type type;
|
|
private Value value_;
|
|
private Position position_;
|
|
|
|
@disable this();
|
|
|
|
this(Type type, Position position) @nogc nothrow pure @safe
|
|
{
|
|
this.type = type;
|
|
this.position_ = position;
|
|
}
|
|
|
|
this(Type type, int value, Position position) @nogc nothrow pure @trusted
|
|
in (type == Type.number)
|
|
{
|
|
this(type, position);
|
|
this.value_.number = value;
|
|
}
|
|
|
|
this()(Type type, auto ref String value, Position position)
|
|
@nogc nothrow pure @trusted
|
|
in (type == Type.identifier)
|
|
{
|
|
this(type, position);
|
|
this.value_.identifier = value;
|
|
}
|
|
|
|
/**
|
|
* Params:
|
|
* type = Expected type.
|
|
*
|
|
* Returns: Whether this token is of the expected type.
|
|
*/
|
|
bool ofType(Type type) const @nogc nothrow pure @safe
|
|
{
|
|
return this.type == type;
|
|
}
|
|
|
|
@property auto value(Type type)() @nogc nothrow pure @trusted
|
|
in (ofType(type))
|
|
{
|
|
static if (type == Type.number)
|
|
{
|
|
return this.value_.number;
|
|
}
|
|
else static if (type == Type.identifier)
|
|
{
|
|
return this.value_.identifier;
|
|
}
|
|
else
|
|
{
|
|
static assert(false, "This type doesn't have a value");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns: The token position in the source text.
|
|
*/
|
|
@property const(Position) position() const @nogc nothrow pure @safe
|
|
{
|
|
return this.position_;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Range over the source text that keeps track of the current position.
|
|
*/
|
|
struct Source
|
|
{
|
|
char[] buffer;
|
|
Position position;
|
|
|
|
this(char[] buffer) @nogc nothrow pure @safe
|
|
{
|
|
this.buffer = buffer;
|
|
}
|
|
|
|
@disable this();
|
|
|
|
bool empty() @nogc nothrow pure @safe
|
|
{
|
|
return this.length == 0;
|
|
}
|
|
|
|
char front() @nogc nothrow pure @safe
|
|
in (!empty)
|
|
{
|
|
return this.buffer[0];
|
|
}
|
|
|
|
void popFront() @nogc nothrow pure @safe
|
|
in (!empty)
|
|
{
|
|
this.buffer = buffer[1 .. $];
|
|
++this.position.column;
|
|
}
|
|
|
|
void breakLine() @nogc nothrow pure @safe
|
|
in (!empty)
|
|
{
|
|
this.buffer = buffer[1 .. $];
|
|
++this.position.line;
|
|
this.position.column = 1;
|
|
}
|
|
|
|
@property size_t length() const @nogc nothrow pure @safe
|
|
{
|
|
return this.buffer.length;
|
|
}
|
|
|
|
char opIndex(size_t index) @nogc nothrow pure @safe
|
|
in (index < length)
|
|
{
|
|
return this.buffer[index];
|
|
}
|
|
|
|
char[] opSlice(size_t i, size_t j) @nogc nothrow pure @safe
|
|
in
|
|
{
|
|
assert(i <= j);
|
|
assert(j <= length);
|
|
}
|
|
do
|
|
{
|
|
return this.buffer[i .. j];
|
|
}
|
|
}
|
|
|
|
Array!Token lex(char[] buffer) @nogc
|
|
{
|
|
Array!Token tokens;
|
|
auto source = Source(buffer);
|
|
|
|
while (!source.empty)
|
|
{
|
|
if (source.front == ' ')
|
|
{
|
|
source.popFront;
|
|
}
|
|
else if (source.front >= '0' && source.front <= '9') // Multi-digit.
|
|
{
|
|
tokens.insertBack(Token(Token.Type.number, source.front - '0', source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '=')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.equals, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '(')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.leftParen, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == ')')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.rightParen, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == ';')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.semicolon, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == ',')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.comma, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '!')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.bang, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '.')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.dot, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (isalpha(source.front))
|
|
{
|
|
size_t i = 1;
|
|
while (i < source.length && isalpha(source[i]))
|
|
{
|
|
++i;
|
|
}
|
|
if (source[0 .. i] == "const")
|
|
{
|
|
tokens.insertBack(Token(Token.Type.let, source.position));
|
|
}
|
|
else if (source[0 .. i] == "var")
|
|
{
|
|
tokens.insertBack(Token(Token.Type.var, source.position));
|
|
}
|
|
else
|
|
{
|
|
auto identifier = String(source[0 .. i]);
|
|
tokens.insertBack(Token(Token.Type.identifier, identifier, source.position));
|
|
}
|
|
source.popFrontN(i);
|
|
}
|
|
else if (source.front == '+') // Multi-character, random special characters.
|
|
{
|
|
tokens.insertBack(Token(Token.Type.operator, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '\n')
|
|
{
|
|
source.breakLine;
|
|
}
|
|
else
|
|
{
|
|
return typeof(tokens)(); // Error.
|
|
}
|
|
}
|
|
return tokens;
|
|
}
|