255 lines
5.9 KiB
D
255 lines
5.9 KiB
D
module elna.lexer;
|
|
|
|
import core.stdc.stdlib;
|
|
import core.stdc.ctype;
|
|
import core.stdc.string;
|
|
import elna.result;
|
|
import std.range;
|
|
import tanya.container.array;
|
|
import tanya.container.string;
|
|
|
|
extern(C++)
|
|
struct Token
|
|
{
|
|
enum Type
|
|
{
|
|
number,
|
|
operator,
|
|
let,
|
|
identifier,
|
|
equals,
|
|
var,
|
|
semicolon,
|
|
leftParen,
|
|
rightParen,
|
|
bang,
|
|
dot,
|
|
comma,
|
|
}
|
|
|
|
union Value
|
|
{
|
|
int number;
|
|
String identifier;
|
|
}
|
|
|
|
private Type type;
|
|
private Value value_;
|
|
private Position position_;
|
|
|
|
@disable this();
|
|
|
|
this(Type type, Position position) @nogc nothrow pure @safe
|
|
{
|
|
this.type = type;
|
|
this.position_ = position;
|
|
}
|
|
|
|
this(Type type, int value, Position position) @nogc nothrow pure @trusted
|
|
in (type == Type.number)
|
|
{
|
|
this(type, position);
|
|
this.value_.number = value;
|
|
}
|
|
|
|
this()(Type type, auto ref String value, Position position)
|
|
@nogc nothrow pure @trusted
|
|
in (type == Type.identifier || type == Type.operator)
|
|
{
|
|
this(type, position);
|
|
this.value_.identifier = value;
|
|
}
|
|
|
|
/**
|
|
* Params:
|
|
* type = Expected type.
|
|
*
|
|
* Returns: Whether this token is of the expected type.
|
|
*/
|
|
bool ofType(Type type) const @nogc nothrow pure @safe
|
|
{
|
|
return this.type == type;
|
|
}
|
|
|
|
@property auto value(Type type)() @nogc nothrow pure @trusted
|
|
in (ofType(type), "Expected type: " ~ type.stringof)
|
|
{
|
|
static if (type == Type.number)
|
|
{
|
|
return this.value_.number;
|
|
}
|
|
else static if (type == Type.identifier || type == Type.operator)
|
|
{
|
|
return this.value_.identifier;
|
|
}
|
|
else
|
|
{
|
|
static assert(false, "This type doesn't have a value");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns: The token position in the source text.
|
|
*/
|
|
@property const(Position) position() const @nogc nothrow pure @safe
|
|
{
|
|
return this.position_;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Range over the source text that keeps track of the current position.
|
|
*/
|
|
extern(C++)
|
|
struct Source
|
|
{
|
|
char* buffer_;
|
|
size_t length_;
|
|
Position position;
|
|
|
|
this(char* buffer, const size_t length) @nogc nothrow pure
|
|
{
|
|
this.buffer_ = buffer;
|
|
this.length_ = length;
|
|
}
|
|
|
|
@disable this();
|
|
|
|
bool empty() @nogc nothrow pure @safe
|
|
{
|
|
return this.length == 0;
|
|
}
|
|
|
|
char front() @nogc nothrow pure
|
|
in (!empty)
|
|
{
|
|
return this.buffer_[0];
|
|
}
|
|
|
|
void popFront() @nogc nothrow pure
|
|
in (!empty)
|
|
{
|
|
++this.buffer_;
|
|
--this.length_;
|
|
++this.position.column;
|
|
}
|
|
|
|
void breakLine() @nogc nothrow pure
|
|
in (!empty)
|
|
{
|
|
++this.buffer_;
|
|
--this.length_;
|
|
++this.position.line;
|
|
this.position.column = 1;
|
|
}
|
|
|
|
@property size_t length() const @nogc nothrow pure @safe
|
|
{
|
|
return this.length_;
|
|
}
|
|
|
|
char opIndex(size_t index) @nogc nothrow pure
|
|
in (index < length)
|
|
{
|
|
return this.buffer_[index];
|
|
}
|
|
|
|
char* buffer() @nogc nothrow pure
|
|
{
|
|
return this.buffer_;
|
|
}
|
|
}
|
|
|
|
Result!(Array!Token) lex(char[] buffer) @nogc
|
|
{
|
|
Array!Token tokens;
|
|
auto source = Source(buffer.ptr, buffer.length);
|
|
|
|
while (!source.empty)
|
|
{
|
|
if (source.front == ' ')
|
|
{
|
|
source.popFront;
|
|
}
|
|
else if (source.front >= '0' && source.front <= '9') // Multi-digit.
|
|
{
|
|
tokens.insertBack(Token(Token.Type.number, source.front - '0', source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '=')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.equals, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '(')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.leftParen, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == ')')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.rightParen, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == ';')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.semicolon, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == ',')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.comma, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '!')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.bang, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '.')
|
|
{
|
|
tokens.insertBack(Token(Token.Type.dot, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (isalpha(source.front))
|
|
{
|
|
size_t i = 1;
|
|
while (i < source.length && isalpha(source[i]))
|
|
{
|
|
++i;
|
|
}
|
|
if (source.buffer[0 .. i] == "const")
|
|
{
|
|
tokens.insertBack(Token(Token.Type.let, source.position));
|
|
}
|
|
else if (source.buffer[0 .. i] == "var")
|
|
{
|
|
tokens.insertBack(Token(Token.Type.var, source.position));
|
|
}
|
|
else
|
|
{
|
|
auto identifier = String(source.buffer[0 .. i]);
|
|
tokens.insertBack(Token(Token.Type.identifier, identifier, source.position));
|
|
}
|
|
source.popFrontN(i);
|
|
}
|
|
else if (source.front == '+' || source.front == '-')
|
|
{
|
|
String operator;
|
|
|
|
operator.insertBack(source.front);
|
|
tokens.insertBack(Token(Token.Type.operator, operator, source.position));
|
|
source.popFront;
|
|
}
|
|
else if (source.front == '\n')
|
|
{
|
|
source.breakLine;
|
|
}
|
|
else
|
|
{
|
|
return typeof(return)("Unexptected next character", source.position);
|
|
}
|
|
}
|
|
return typeof(return)(tokens);
|
|
}
|