Merge GCC frontend into the branch
This commit is contained in:
16
source/CommandLineInterface.def
Normal file
16
source/CommandLineInterface.def
Normal file
@@ -0,0 +1,16 @@
|
||||
DEFINITION MODULE CommandLineInterface;
|
||||
|
||||
FROM Common IMPORT ShortString;
|
||||
|
||||
TYPE
|
||||
CommandLine = RECORD
|
||||
input: ShortString;
|
||||
output: ShortString;
|
||||
lex: BOOLEAN;
|
||||
parse: BOOLEAN
|
||||
END;
|
||||
PCommandLine = POINTER TO CommandLine;
|
||||
|
||||
PROCEDURE parse_command_line(): PCommandLine;
|
||||
|
||||
END CommandLineInterface.
|
||||
12
source/Common.def
Normal file
12
source/Common.def
Normal file
@@ -0,0 +1,12 @@
|
||||
DEFINITION MODULE Common;
|
||||
|
||||
TYPE
|
||||
ShortString = ARRAY[1..256] OF CHAR;
|
||||
Identifier = ARRAY[1..256] OF CHAR;
|
||||
PIdentifier = POINTER TO Identifier;
|
||||
TextLocation = RECORD
|
||||
line: CARDINAL;
|
||||
column: CARDINAL
|
||||
END;
|
||||
|
||||
END Common.
|
||||
107
source/Lexer.def
Normal file
107
source/Lexer.def
Normal file
@@ -0,0 +1,107 @@
|
||||
DEFINITION MODULE Lexer;
|
||||
|
||||
FROM FIO IMPORT File;
|
||||
|
||||
FROM Common IMPORT Identifier, ShortString, TextLocation;
|
||||
|
||||
TYPE
|
||||
PLexerBuffer = POINTER TO CHAR;
|
||||
BufferPosition = RECORD
|
||||
iterator: PLexerBuffer;
|
||||
location: TextLocation
|
||||
END;
|
||||
PBufferPosition = POINTER TO BufferPosition;
|
||||
Lexer = RECORD
|
||||
input: File;
|
||||
buffer: PLexerBuffer;
|
||||
size: CARDINAL;
|
||||
length: CARDINAL;
|
||||
start: BufferPosition;
|
||||
current: BufferPosition
|
||||
END;
|
||||
PLexer = POINTER TO Lexer;
|
||||
LexerKind = (
|
||||
lexerKindEof,
|
||||
lexerKindIdentifier,
|
||||
lexerKindIf,
|
||||
lexerKindThen,
|
||||
lexerKindElse,
|
||||
lexerKindElsif,
|
||||
lexerKindWhile,
|
||||
lexerKindDo,
|
||||
lexerKindProc,
|
||||
lexerKindBegin,
|
||||
lexerKindEnd,
|
||||
lexerKindXor,
|
||||
lexerKindConst,
|
||||
lexerKindVar,
|
||||
lexerKindCase,
|
||||
lexerKindOf,
|
||||
lexerKindType,
|
||||
lexerKindRecord,
|
||||
lexerKindUnion,
|
||||
lexerKindPipe,
|
||||
lexerKindTo,
|
||||
lexerKindBoolean,
|
||||
lexerKindNull,
|
||||
lexerKindAnd,
|
||||
lexerKindOr,
|
||||
lexerKindTilde,
|
||||
lexerKindReturn,
|
||||
lexerKindDefer,
|
||||
lexerKindRange,
|
||||
lexerKindLeftParen,
|
||||
lexerKindRightParen,
|
||||
lexerKindLeftSquare,
|
||||
lexerKindRightSquare,
|
||||
lexerKindGreaterEqual,
|
||||
lexerKindLessEqual,
|
||||
lexerKindGreaterThan,
|
||||
lexerKindLessThan,
|
||||
lexerKindNotEqual,
|
||||
lexerKindEqual,
|
||||
lexerKindSemicolon,
|
||||
lexerKindDot,
|
||||
lexerKindComma,
|
||||
lexerKindPlus,
|
||||
lexerKindMinus,
|
||||
lexerKindAsterisk,
|
||||
lexerKindDivision,
|
||||
lexerKindRemainder,
|
||||
lexerKindAssignment,
|
||||
lexerKindColon,
|
||||
lexerKindHat,
|
||||
lexerKindAt,
|
||||
lexerKindComment,
|
||||
lexerKindInteger,
|
||||
lexerKindWord,
|
||||
lexerKindCharacter,
|
||||
lexerKindString,
|
||||
lexerKindFrom,
|
||||
lexerKindPointer,
|
||||
lexerKindArray,
|
||||
lexerKindArrow,
|
||||
lexerKindProgram,
|
||||
lexerKindModule,
|
||||
lexerKindImport
|
||||
);
|
||||
LexerToken = RECORD
|
||||
CASE kind: LexerKind OF
|
||||
lexerKindBoolean: booleanKind: BOOLEAN |
|
||||
lexerKindIdentifier: identifierKind: Identifier |
|
||||
lexerKindInteger: integerKind: INTEGER |
|
||||
lexerKindString: stringKind: ShortString
|
||||
END;
|
||||
start_location: TextLocation;
|
||||
end_location: TextLocation
|
||||
END;
|
||||
PLexerToken = POINTER TO LexerToken;
|
||||
|
||||
PROCEDURE lexer_initialize(lexer: PLexer; input: File);
|
||||
PROCEDURE lexer_destroy(lexer: PLexer);
|
||||
(* Returns the last read token. *)
|
||||
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
|
||||
(* Read and return the next token. *)
|
||||
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
|
||||
|
||||
END Lexer.
|
||||
200
source/Parser.def
Normal file
200
source/Parser.def
Normal file
@@ -0,0 +1,200 @@
|
||||
DEFINITION MODULE Parser;
|
||||
|
||||
FROM Common IMPORT Identifier, PIdentifier, ShortString;
|
||||
FROM Lexer IMPORT PLexer;
|
||||
|
||||
TYPE
|
||||
Parser = RECORD
|
||||
lexer: PLexer
|
||||
END;
|
||||
PParser = POINTER TO Parser;
|
||||
|
||||
AstLiteralKind = (
|
||||
astLiteralKindInteger,
|
||||
astLiteralKindString,
|
||||
astLiteralKindNull,
|
||||
astLiteralKindBoolean
|
||||
);
|
||||
AstLiteral = RECORD
|
||||
CASE kind: AstLiteralKind OF
|
||||
astLiteralKindInteger: integer: INTEGER |
|
||||
astLiteralKindString: string: ShortString |
|
||||
astLiteralKindNull: |
|
||||
astLiteralKindBoolean: boolean: BOOLEAN
|
||||
END
|
||||
END;
|
||||
PAstLiteral = POINTER TO AstLiteral;
|
||||
|
||||
AstUnaryOperator = (
|
||||
astUnaryOperatorReference,
|
||||
astUnaryOperatorNot,
|
||||
astUnaryOperatorMinus
|
||||
);
|
||||
AstBinaryOperator = (
|
||||
astBinaryOperatorSum,
|
||||
astBinaryOperatorSubtraction,
|
||||
astBinaryOperatorMultiplication,
|
||||
astBinaryOperatorDivision,
|
||||
astBinaryOperatorRemainder,
|
||||
astBinaryOperatorEquals,
|
||||
astBinaryOperatorNotEquals,
|
||||
astBinaryOperatorLess,
|
||||
astBinaryOperatorGreater,
|
||||
astBinaryOperatorLessEqual,
|
||||
astBinaryOperatorGreaterEqual,
|
||||
astBinaryOperatorDisjunction,
|
||||
astBinaryOperatorConjunction,
|
||||
astBinaryOperatorExclusiveDisjunction,
|
||||
astBinaryOperatorShiftLeft,
|
||||
astBinaryOperatorShiftRight
|
||||
);
|
||||
|
||||
AstExpressionKind = (
|
||||
astExpressionKindLiteral,
|
||||
astExpressionKindIdentifier,
|
||||
astExpressionKindArrayAccess,
|
||||
astExpressionKindDereference,
|
||||
astExpressionKindFieldAccess,
|
||||
astExpressionKindUnary,
|
||||
astExpressionKindBinary,
|
||||
astExpressionKindCall
|
||||
);
|
||||
AstExpression = RECORD
|
||||
CASE kind: AstExpressionKind OF
|
||||
astExpressionKindLiteral: literal: PAstLiteral |
|
||||
astExpressionKindIdentifier: identifier: Identifier |
|
||||
astExpressionKindDereference: reference: PAstExpression |
|
||||
astExpressionKindArrayAccess:
|
||||
array: PAstExpression;
|
||||
index: PAstExpression |
|
||||
astExpressionKindFieldAccess:
|
||||
aggregate: PAstExpression;
|
||||
field: Identifier |
|
||||
astExpressionKindUnary:
|
||||
unary_operator: AstUnaryOperator;
|
||||
unary_operand: PAstExpression |
|
||||
astExpressionKindBinary:
|
||||
binary_operator: AstBinaryOperator;
|
||||
lhs: PAstExpression;
|
||||
rhs: PAstExpression |
|
||||
astExpressionKindCall:
|
||||
callable: PAstExpression;
|
||||
argument_count: CARDINAL;
|
||||
arguments: PPAstExpression
|
||||
END
|
||||
END;
|
||||
PAstExpression = POINTER TO AstExpression;
|
||||
PPAstExpression = POINTER TO PAstExpression;
|
||||
|
||||
AstStatementKind = (
|
||||
astStatementKindIf,
|
||||
astStatementKindWhile,
|
||||
astStatementKindAssignment,
|
||||
astStatementKindReturn,
|
||||
astStatementKindCall
|
||||
);
|
||||
AstStatement = RECORD
|
||||
CASE kind: AstStatementKind OF
|
||||
astStatementKindIf:
|
||||
if_condition: PAstExpression;
|
||||
if_branch: AstCompoundStatement |
|
||||
astStatementKindWhile:
|
||||
while_condition: PAstExpression;
|
||||
while_body: AstCompoundStatement |
|
||||
astStatementKindAssignment:
|
||||
assignee: PAstExpression;
|
||||
assignment: PAstExpression |
|
||||
astStatementKindReturn: returned: PAstExpression |
|
||||
astStatementKindCall: call: PAstExpression
|
||||
END
|
||||
END;
|
||||
PAstStatement = POINTER TO AstStatement;
|
||||
PPAstStatement = POINTER TO PAstStatement;
|
||||
AstCompoundStatement = RECORD
|
||||
count: CARDINAL;
|
||||
statements: PPAstStatement
|
||||
END;
|
||||
|
||||
AstImportStatement = RECORD
|
||||
package: Identifier;
|
||||
symbols: PIdentifier
|
||||
END;
|
||||
PAstImportStatement = POINTER TO AstImportStatement;
|
||||
PPAstImportStatement = POINTER TO PAstImportStatement;
|
||||
|
||||
AstConstantDeclaration = RECORD
|
||||
constant_name: Identifier;
|
||||
constant_value: INTEGER
|
||||
END;
|
||||
PAstConstantDeclaration = POINTER TO AstConstantDeclaration;
|
||||
PPAstConstantDeclaration = POINTER TO PAstConstantDeclaration;
|
||||
|
||||
AstFieldDeclaration = RECORD
|
||||
field_name: Identifier;
|
||||
field_type: PAstTypeExpression
|
||||
END;
|
||||
PAstFieldDeclaration = POINTER TO AstFieldDeclaration;
|
||||
|
||||
AstTypeExpressionKind = (
|
||||
astTypeExpressionKindNamed,
|
||||
astTypeExpressionKindRecord,
|
||||
astTypeExpressionKindEnumeration,
|
||||
astTypeExpressionKindArray,
|
||||
astTypeExpressionKindPointer,
|
||||
astTypeExpressionKindProcedure
|
||||
);
|
||||
AstTypeExpression = RECORD
|
||||
CASE kind: AstTypeExpressionKind OF
|
||||
astTypeExpressionKindNamed: name: Identifier |
|
||||
astTypeExpressionKindEnumeration: cases: PIdentifier |
|
||||
astTypeExpressionKindPointer: target: PAstTypeExpression |
|
||||
astTypeExpressionKindRecord: fields: PAstFieldDeclaration |
|
||||
astTypeExpressionKindArray:
|
||||
base: PAstTypeExpression;
|
||||
length: CARDINAL |
|
||||
astTypeExpressionKindProcedure: parameters: PPAstTypeExpression
|
||||
END
|
||||
END;
|
||||
PAstTypeExpression = POINTER TO AstTypeExpression;
|
||||
PPAstTypeExpression = POINTER TO PAstTypeExpression;
|
||||
|
||||
AstTypedDeclaration = RECORD
|
||||
identifier: Identifier;
|
||||
type_expression: PAstTypeExpression
|
||||
END;
|
||||
PAstTypedDeclaration = POINTER TO AstTypedDeclaration;
|
||||
PPAstTypedDeclaration = POINTER TO PAstTypedDeclaration;
|
||||
|
||||
AstVariableDeclaration = RECORD
|
||||
variable_name: Identifier;
|
||||
variable_type: PAstTypeExpression
|
||||
END;
|
||||
PAstVariableDeclaration = POINTER TO AstVariableDeclaration;
|
||||
PPAstVariableDeclaration = POINTER TO PAstVariableDeclaration;
|
||||
|
||||
AstProcedureDeclaration = RECORD
|
||||
name: Identifier;
|
||||
parameter_count: CARDINAL;
|
||||
parameters: PAstTypedDeclaration;
|
||||
return_type: PAstTypeExpression;
|
||||
constants: PPAstConstantDeclaration;
|
||||
variables: PPAstVariableDeclaration;
|
||||
statements: AstCompoundStatement
|
||||
END;
|
||||
PAstProcedureDeclaration = POINTER TO AstProcedureDeclaration;
|
||||
PPAstProcedureDeclaration = POINTER TO PAstProcedureDeclaration;
|
||||
|
||||
AstModule = RECORD
|
||||
main: BOOLEAN;
|
||||
imports: PPAstImportStatement;
|
||||
constants: PPAstConstantDeclaration;
|
||||
types: PPAstTypedDeclaration;
|
||||
variables: PPAstVariableDeclaration;
|
||||
procedures: PPAstProcedureDeclaration;
|
||||
statements: AstCompoundStatement
|
||||
END;
|
||||
PAstModule = POINTER TO AstModule;
|
||||
|
||||
PROCEDURE parse(lexer: PLexer): PAstModule;
|
||||
|
||||
END Parser.
|
||||
1174
source/Parser.elna
Normal file
1174
source/Parser.elna
Normal file
File diff suppressed because it is too large
Load Diff
20
source/Transpiler.def
Normal file
20
source/Transpiler.def
Normal file
@@ -0,0 +1,20 @@
|
||||
DEFINITION MODULE Transpiler;
|
||||
|
||||
FROM FIO IMPORT File;
|
||||
|
||||
FROM Common IMPORT ShortString;
|
||||
FROM Lexer IMPORT PLexer, Lexer;
|
||||
FROM Parser IMPORT PAstModule;
|
||||
|
||||
TYPE
|
||||
TranspilerContext = RECORD
|
||||
input_name: ShortString;
|
||||
output: File;
|
||||
definition: File;
|
||||
indentation: CARDINAL
|
||||
END;
|
||||
PTranspilerContext = POINTER TO TranspilerContext;
|
||||
|
||||
PROCEDURE transpile(ast_module: PAstModule; output: File; definition: File; input_name: ShortString);
|
||||
|
||||
END Transpiler.
|
||||
631
source/Transpiler.elna
Normal file
631
source/Transpiler.elna
Normal file
@@ -0,0 +1,631 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
from FIO import File, WriteNBytes, WriteLine, WriteChar, WriteString;
|
||||
from NumberIO import IntToStr;
|
||||
|
||||
import common, Parser;
|
||||
|
||||
type
|
||||
TranspilerContext* = record
|
||||
input_name: String;
|
||||
output: File;
|
||||
definition: File;
|
||||
indentation: Word
|
||||
end;
|
||||
|
||||
proc indent(context: ^TranspilerContext);
|
||||
var
|
||||
count: Word;
|
||||
begin
|
||||
count := 0;
|
||||
|
||||
while count < context^.indentation do
|
||||
WriteString(context^.output, " ");
|
||||
count := count + 1u
|
||||
end
|
||||
end;
|
||||
|
||||
(* Write a semicolon followed by a newline. *)
|
||||
proc write_semicolon(output: File);
|
||||
begin
|
||||
WriteChar(output, ';');
|
||||
WriteLine(output)
|
||||
end;
|
||||
|
||||
proc transpile_import_statement(context: ^TranspilerContext, import_statement: ^AstImportStatement);
|
||||
var
|
||||
current_symbol: ^Identifier;
|
||||
begin
|
||||
WriteString(context^.output, "FROM ");
|
||||
transpile_identifier(context, import_statement^.package);
|
||||
|
||||
WriteString(context^.output, " IMPORT ");
|
||||
|
||||
current_symbol := import_statement^.symbols;
|
||||
transpile_identifier(context, current_symbol^);
|
||||
current_symbol := current_symbol + 1;
|
||||
|
||||
while current_symbol^[1] <> '\0' do
|
||||
WriteString(context^.output, ", ");
|
||||
transpile_identifier(context, current_symbol^);
|
||||
current_symbol := current_symbol + 1;
|
||||
end;
|
||||
write_semicolon(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_import_part(context: ^TranspilerContext, imports: ^^AstImportStatement);
|
||||
var
|
||||
import_statement: ^AstImportStatement;
|
||||
begin
|
||||
while imports^ <> nil do
|
||||
transpile_import_statement(context, imports^);
|
||||
imports := imports + 1
|
||||
end;
|
||||
WriteLine(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_constant_declaration(context: ^TranspilerContext, declaration: ^AstConstantDeclaration);
|
||||
var
|
||||
buffer: [20]Char;
|
||||
begin
|
||||
WriteString(context^.output, " ");
|
||||
transpile_identifier(context, declaration^.constant_name);
|
||||
|
||||
WriteString(context^.output, " = ");
|
||||
|
||||
IntToStr(declaration^.constant_value, 0, buffer);
|
||||
WriteString(context^.output, buffer);
|
||||
|
||||
write_semicolon(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_constant_part(context: ^TranspilerContext, declarations: ^^AstConstantDeclaration, extra_newline: Bool);
|
||||
var
|
||||
current_declaration: ^^AstConstantDeclaration;
|
||||
begin
|
||||
if declarations^ <> nil then
|
||||
WriteString(context^.output, "CONST");
|
||||
WriteLine(context^.output);
|
||||
|
||||
current_declaration := declarations;
|
||||
while current_declaration^ <> nil do
|
||||
transpile_constant_declaration(context, current_declaration^);
|
||||
|
||||
current_declaration := current_declaration + 1
|
||||
end;
|
||||
if extra_newline then
|
||||
WriteLine(context^.output)
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_module(context: ^TranspilerContext, result: ^AstModule);
|
||||
begin
|
||||
if result^.main = false then
|
||||
WriteString(context^.output, "IMPLEMENTATION ")
|
||||
end;
|
||||
WriteString(context^.output, "MODULE ");
|
||||
|
||||
(* Write the module name and end the line with a semicolon and newline. *)
|
||||
transpile_module_name(context);
|
||||
|
||||
write_semicolon(context^.output);
|
||||
WriteLine(context^.output);
|
||||
|
||||
(* Write the module body. *)
|
||||
|
||||
transpile_import_part(context, result^.imports);
|
||||
transpile_constant_part(context, result^.constants, true);
|
||||
transpile_type_part(context, result^.types);
|
||||
transpile_variable_part(context, result^.variables, true);
|
||||
transpile_procedure_part(context, result^.procedures);
|
||||
transpile_statement_part(context, result^.statements);
|
||||
|
||||
WriteString(context^.output, "END ");
|
||||
transpile_module_name(context);
|
||||
|
||||
WriteChar(context^.output, ".");
|
||||
WriteLine(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_type_fields(context: ^TranspilerContext, fields: ^AstFieldDeclaration);
|
||||
var
|
||||
current_field: ^AstFieldDeclaration;
|
||||
begin
|
||||
current_field := fields;
|
||||
|
||||
while current_field^.field_name[1] <> '\0' do
|
||||
WriteString(context^.output, " ");
|
||||
transpile_identifier(context, current_field^.field_name);
|
||||
|
||||
WriteString(context^.output, ": ");
|
||||
transpile_type_expression(context, current_field^.field_type);
|
||||
|
||||
current_field := current_field + 1;
|
||||
|
||||
if current_field^.field_name[1] <> '\0' then
|
||||
WriteChar(context^.output, ';')
|
||||
end;
|
||||
WriteLine(context^.output)
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_record_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
|
||||
begin
|
||||
WriteString(context^.output, "RECORD");
|
||||
WriteLine(context^.output);
|
||||
transpile_type_fields(context, type_expression^.fields);
|
||||
WriteString(context^.output, " END")
|
||||
end;
|
||||
|
||||
proc transpile_pointer_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
|
||||
begin
|
||||
WriteString(context^.output, "POINTER TO ");
|
||||
|
||||
transpile_type_expression(context, type_expression^.target)
|
||||
end;
|
||||
|
||||
proc transpile_array_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
|
||||
var
|
||||
buffer: [20]Char;
|
||||
begin
|
||||
WriteString(context^.output, "ARRAY");
|
||||
|
||||
if type_expression^.length <> 0 then
|
||||
WriteString(context^.output, "[1..");
|
||||
|
||||
IntToStr(type_expression^.length, 0, buffer);
|
||||
WriteString(context^.output, buffer);
|
||||
|
||||
WriteChar(context^.output, ']')
|
||||
end;
|
||||
WriteString(context^.output, " OF ");
|
||||
|
||||
transpile_type_expression(context, type_expression^.base)
|
||||
end;
|
||||
|
||||
proc transpile_enumeration_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
|
||||
var
|
||||
current_case: ^Identifier;
|
||||
begin
|
||||
current_case := type_expression^.cases;
|
||||
|
||||
WriteString(context^.output, "(");
|
||||
WriteLine(context^.output);
|
||||
WriteString(context^.output, " ");
|
||||
transpile_identifier(context, current_case^);
|
||||
current_case := current_case + 1;
|
||||
|
||||
while current_case^[1] <> '\0' do
|
||||
WriteChar(context^.output, ',');
|
||||
WriteLine(context^.output);
|
||||
WriteString(context^.output, " ");
|
||||
transpile_identifier(context, current_case^);
|
||||
|
||||
current_case := current_case + 1
|
||||
end;
|
||||
WriteLine(context^.output);
|
||||
WriteString(context^.output, " )")
|
||||
end;
|
||||
|
||||
proc transpile_identifier(context: ^TranspilerContext, identifier: Identifier);
|
||||
var
|
||||
written_bytes: Word;
|
||||
begin
|
||||
written_bytes := WriteNBytes(context^.output, cast(identifier[1]: Word), @identifier[2])
|
||||
end;
|
||||
|
||||
proc transpile_procedure_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
|
||||
var
|
||||
result: ^AstTypeExpression;
|
||||
current_parameter: ^^AstTypeExpression;
|
||||
parameter_count: Word;
|
||||
begin
|
||||
WriteString(context^.output, "PROCEDURE(");
|
||||
current_parameter := type_expression^.parameters;
|
||||
|
||||
while current_parameter^ <> nil do
|
||||
transpile_type_expression(context, current_parameter^);
|
||||
|
||||
current_parameter := current_parameter + 1;
|
||||
|
||||
if current_parameter^ <> nil then
|
||||
WriteString(context^.output, ", ")
|
||||
end
|
||||
end;
|
||||
WriteChar(context^.output, ')')
|
||||
end;
|
||||
|
||||
proc transpile_type_expression(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
|
||||
begin
|
||||
if type_expression^.kind = astTypeExpressionKindRecord then
|
||||
transpile_record_type(context, type_expression)
|
||||
end;
|
||||
if type_expression^.kind = astTypeExpressionKindEnumeration then
|
||||
transpile_enumeration_type(context, type_expression)
|
||||
end;
|
||||
if type_expression^.kind = astTypeExpressionKindArray then
|
||||
transpile_array_type(context, type_expression)
|
||||
end;
|
||||
if type_expression^.kind = astTypeExpressionKindPointer then
|
||||
transpile_pointer_type(context, type_expression)
|
||||
end;
|
||||
if type_expression^.kind = astTypeExpressionKindProcedure then
|
||||
transpile_procedure_type(context, type_expression)
|
||||
end;
|
||||
if type_expression^.kind = astTypeExpressionKindNamed then
|
||||
transpile_identifier(context, type_expression^.name)
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_type_declaration(context: ^TranspilerContext, declaration: ^AstTypedDeclaration);
|
||||
var
|
||||
written_bytes: Word;
|
||||
begin
|
||||
WriteString(context^.output, " ");
|
||||
|
||||
transpile_identifier(context^.output, declaration^.identifier);
|
||||
WriteString(context^.output, " = ");
|
||||
|
||||
transpile_type_expression(context, declaration^.type_expression);
|
||||
write_semicolon(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_type_part(context: ^TranspilerContext, declarations: ^^AstTypedDeclaration);
|
||||
var
|
||||
current_declaration: ^^AstTypedDeclaration;
|
||||
begin
|
||||
if declarations^ <> nil then
|
||||
WriteString(context^.output, "TYPE");
|
||||
WriteLine(context^.output);
|
||||
|
||||
current_declaration := declarations;
|
||||
while current_declaration^ <> nil do
|
||||
transpile_type_declaration(context, current_declaration^);
|
||||
|
||||
current_declaration := current_declaration + 1
|
||||
end;
|
||||
WriteLine(context^.output)
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_variable_declaration(context: ^TranspilerContext, declaration: ^AstVariableDeclaration);
|
||||
begin
|
||||
WriteString(context^.output, " ");
|
||||
transpile_identifier(context, declaration^.variable_name);
|
||||
|
||||
WriteString(context^.output, ": ");
|
||||
|
||||
transpile_type_expression(context, declaration^.variable_type);
|
||||
write_semicolon(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_variable_part(context: ^TranspilerContext, declarations: ^^AstVariableDeclaration, extra_newline: Bool);
|
||||
var
|
||||
current_declaration: ^^AstVariableDeclaration;
|
||||
begin
|
||||
if declarations^ <> nil then
|
||||
WriteString(context^.output, "VAR");
|
||||
WriteLine(context^.output);
|
||||
|
||||
current_declaration := declarations;
|
||||
while current_declaration^ <> nil do
|
||||
transpile_variable_declaration(context, current_declaration^);
|
||||
|
||||
current_declaration := current_declaration + 1
|
||||
end;
|
||||
if extra_newline then
|
||||
WriteLine(context^.output)
|
||||
end
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_procedure_heading(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration);
|
||||
var
|
||||
parameter_index: Word;
|
||||
current_parameter: ^AstTypedDeclaration;
|
||||
begin
|
||||
WriteString(context^.output, "PROCEDURE ");
|
||||
transpile_identifier(context, declaration^.name);
|
||||
WriteChar(context^.output, '(');
|
||||
|
||||
parameter_index := 0;
|
||||
current_parameter := declaration^.parameters;
|
||||
|
||||
while parameter_index < declaration^.parameter_count do
|
||||
transpile_identifier(context, current_parameter^.identifier);
|
||||
WriteString(context^.output, ": ");
|
||||
transpile_type_expression(context, current_parameter^.type_expression);
|
||||
|
||||
parameter_index := parameter_index + 1u;
|
||||
current_parameter := current_parameter + 1;
|
||||
|
||||
if parameter_index <> declaration^.parameter_count then
|
||||
WriteString(context^.output, "; ")
|
||||
end
|
||||
end;
|
||||
|
||||
WriteString(context^.output, ")");
|
||||
|
||||
(* Check for the return type and write it. *)
|
||||
if declaration^.return_type <> nil then
|
||||
WriteString(context^.output, ": ");
|
||||
transpile_type_expression(context, declaration^.return_type)
|
||||
end;
|
||||
write_semicolon(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_unary_operator(context: ^TranspilerContext, operator: AstUnaryOperator);
|
||||
begin
|
||||
if operator = AstUnaryOperator.minus then
|
||||
WriteChar(context^.output, '-')
|
||||
end;
|
||||
if operator = AstUnaryOperator.not then
|
||||
WriteChar(context^.output, '~')
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_binary_operator(context: ^TranspilerContext, operator: AstBinaryOperator);
|
||||
begin
|
||||
case operator of
|
||||
AstBinaryOperator.sum: WriteChar(context^.output, '+')
|
||||
| AstBinaryOperator.subtraction: WriteChar(context^.output, '-')
|
||||
| AstBinaryOperator.multiplication: WriteChar(context^.output, '*')
|
||||
| AstBinaryOperator.equals: WriteChar(context^.output, '=')
|
||||
| AstBinaryOperator.not_equals: WriteChar(context^.output, '#')
|
||||
| AstBinaryOperator.less: WriteChar(context^.output, '<')
|
||||
| AstBinaryOperator.greater: WriteChar(context^.output, '>')
|
||||
| AstBinaryOperator.less_equal: WriteString(context^.output, "<=")
|
||||
| AstBinaryOperator.greater_equal: WriteString(context^.output, ">=")
|
||||
| AstBinaryOperator.disjunction: WriteString(context^.output, "OR")
|
||||
| AstBinaryOperatorConjunction: WriteString(context^.output, "AND")
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_expression(context: ^TranspilerContext, expression: ^AstExpression);
|
||||
var
|
||||
literal: ^AstLiteral;
|
||||
buffer: [20]Char;
|
||||
argument_index: Word;
|
||||
current_argument: ^^AstExpression;
|
||||
begin
|
||||
if expression^.kind = astExpressionKindLiteral then
|
||||
literal := expression^.literal;
|
||||
|
||||
if literal^.kind = AstLiteralKind.integer then
|
||||
IntToStr(literal^.integer, 0, buffer);
|
||||
WriteString(context^.output, buffer)
|
||||
end;
|
||||
if literal^.kind = AstLiteralKind.string then
|
||||
WriteString(context^.output, literal^.string)
|
||||
end;
|
||||
if literal^.kind = AstLiteralKind.null then
|
||||
WriteString(context^.output, "NIL")
|
||||
end;
|
||||
if (literal^.kind = AstLiteralKind.boolean) & literal^.boolean then
|
||||
WriteString(context^.output, "TRUE")
|
||||
end;
|
||||
if (literal^.kind = AstLiteralKind.boolean) & (literal^.boolean = false) then
|
||||
WriteString(context^.output, "FALSE")
|
||||
end
|
||||
end;
|
||||
if expression^.kind = astExpressionKindIdentifier then
|
||||
transpile_identifier(context, expression^.identifier)
|
||||
end;
|
||||
if expression^.kind = astExpressionKindDereference then
|
||||
transpile_expression(context, expression^.reference);
|
||||
WriteChar(context^.output, '^')
|
||||
end;
|
||||
if expression^.kind = astExpressionKindArrayAccess then
|
||||
transpile_expression(context, expression^.array);
|
||||
WriteChar(context^.output, '[');
|
||||
transpile_expression(context, expression^.index);
|
||||
WriteChar(context^.output, ']')
|
||||
end;
|
||||
if expression^.kind = astExpressionKindFieldAccess then
|
||||
transpile_expression(context, expression^.aggregate);
|
||||
WriteChar(context^.output, '.');
|
||||
transpile_identifier(contextexpression^.field)
|
||||
end;
|
||||
if expression^.kind = astExpressionKindUnary then
|
||||
transpile_unary_operator(context, expression^.unary_operator);
|
||||
transpile_expression(context, expression^.unary_operand)
|
||||
end;
|
||||
if expression^.kind = astExpressionKindBinary then
|
||||
WriteChar(context^.output, '(');
|
||||
transpile_expression(context, expression^.lhs);
|
||||
WriteChar(context^.output, ' ');
|
||||
transpile_binary_operator(context, expression^.binary_operator);
|
||||
WriteChar(context^.output, ' ');
|
||||
transpile_expression(context, expression^.rhs);
|
||||
WriteChar(context^.output, ')')
|
||||
end;
|
||||
if expression^.kind = astExpressionKindCall then
|
||||
transpile_expression(context, expression^.callable);
|
||||
WriteChar(context^.output, '(');
|
||||
|
||||
current_argument := expression^.arguments;
|
||||
if expression^.argument_count > 0 then
|
||||
transpile_expression(context, current_argument^);
|
||||
|
||||
argument_index := 1u;
|
||||
current_argument := current_argument + 1;
|
||||
|
||||
while argument_index < expression^.argument_count do
|
||||
WriteString(context^.output, ", ");
|
||||
|
||||
transpile_expression(context, current_argument^);
|
||||
|
||||
current_argument := current_argument + 1;
|
||||
argument_index := argument_index + 1u
|
||||
end
|
||||
end;
|
||||
WriteChar(context^.output, ')')
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_if_statement(context: ^TranspilerContext, statement: ^AstStatement);
|
||||
begin
|
||||
WriteString(context^.output, "IF ");
|
||||
transpile_expression(context, statement^.if_condition);
|
||||
|
||||
WriteString(context^.output, " THEN");
|
||||
WriteLine(context^.output);
|
||||
context^.indentation := context^.indentation + 1u;
|
||||
|
||||
transpile_compound_statement(context, statement^.if_branch);
|
||||
context^.indentation := context^.indentation - 1u;
|
||||
indent(context);
|
||||
WriteString(context^.output, "END")
|
||||
end;
|
||||
|
||||
proc transpile_while_statement(context: ^TranspilerContext, statement: ^AstStatement);
|
||||
begin
|
||||
WriteString(context^.output, "WHILE ");
|
||||
transpile_expression(context, statement^.while_condition);
|
||||
|
||||
WriteString(context^.output, " DO");
|
||||
WriteLine(context^.output);
|
||||
context^.indentation := context^.indentation + 1u;
|
||||
|
||||
transpile_compound_statement(context, statement^.while_body);
|
||||
context^.indentation := context^.indentation - 1u;
|
||||
indent(context);
|
||||
WriteString(context^.output, "END")
|
||||
end;
|
||||
|
||||
proc transpile_assignment_statement(context: ^TranspilerContext, statement: ^AstStatement);
|
||||
begin
|
||||
transpile_expression(context, statement^.assignee);
|
||||
WriteString(context^.output, " := ");
|
||||
transpile_expression(context, statement^.assignment)
|
||||
end;
|
||||
|
||||
proc transpile_return_statement(context: ^TranspilerContext, statement: ^AstStatement);
|
||||
begin
|
||||
WriteString(context^.output, "RETURN ");
|
||||
|
||||
transpile_expression(context, statement^.returned)
|
||||
end;
|
||||
|
||||
proc transpile_compound_statement(context: ^TranspilerContext, statement: AstCompoundStatement);
|
||||
var
|
||||
current_statement: ^^AstStatement;
|
||||
index: Word;
|
||||
begin
|
||||
index := 0;
|
||||
current_statement := statement.statements;
|
||||
|
||||
while index < statement.count do
|
||||
transpile_statement(context, current_statement^);
|
||||
|
||||
current_statement := current_statement + 1;
|
||||
index := index + 1u;
|
||||
|
||||
if index <> statement.count then
|
||||
WriteChar(context^.output, ';')
|
||||
end;
|
||||
WriteLine(context^.output)
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_statement(context: ^TranspilerContext, statement: ^AstStatement);
|
||||
begin
|
||||
indent(context);
|
||||
|
||||
if statement^.kind = astStatementKindIf then
|
||||
transpile_if_statement(context, statement)
|
||||
end;
|
||||
if statement^.kind = astStatementKindWhile then
|
||||
transpile_while_statement(context, statement)
|
||||
end;
|
||||
if statement^.kind = astStatementKindReturn then
|
||||
transpile_return_statement(context, statement)
|
||||
end;
|
||||
if statement^.kind = astStatementKindAssignment then
|
||||
transpile_assignment_statement(context, statement)
|
||||
end;
|
||||
if statement^.kind = astStatementKindCall then
|
||||
transpile_expression(context, statement^.call)
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_statement_part(context: ^TranspilerContext, compound: AstCompoundStatement);
|
||||
begin
|
||||
if compound.count > 0 then
|
||||
WriteString(context^.output, "BEGIN");
|
||||
WriteLine(context^.output);
|
||||
|
||||
context^.indentation := context^.indentation + 1u;
|
||||
transpile_compound_statement(context, compound);
|
||||
context^.indentation := context^.indentation - 1u;
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_procedure_declaration(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration);
|
||||
begin
|
||||
transpile_procedure_heading(context, declaration);
|
||||
|
||||
transpile_constant_part(context, declaration^.constants, false);
|
||||
transpile_variable_part(context, declaration^.variables, false);
|
||||
transpile_statement_part(context, declaration^.statements);
|
||||
|
||||
WriteString(context^.output, "END ");
|
||||
transpile_identifier(context^.output, declaration^.name);
|
||||
|
||||
write_semicolon(context^.output)
|
||||
end;
|
||||
|
||||
proc transpile_procedure_part(context: ^TranspilerContext, declaration: ^^AstProcedureDeclaration);
|
||||
begin
|
||||
while declaration^ <> nil do
|
||||
transpile_procedure_declaration(context, declaration^);
|
||||
WriteLine(context^.output);
|
||||
|
||||
declaration := declaration + 1
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile_module_name(context: ^TranspilerContext);
|
||||
var
|
||||
counter: Word;
|
||||
last_slash: Word;
|
||||
begin
|
||||
counter := 1u;
|
||||
last_slash := 0u;
|
||||
|
||||
while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do
|
||||
if context^.input_name[counter] = '/' then
|
||||
last_slash := counter
|
||||
end;
|
||||
counter := counter + 1u
|
||||
end;
|
||||
|
||||
if last_slash = 0u then
|
||||
counter := 1u
|
||||
end;
|
||||
if last_slash <> 0u then
|
||||
counter := last_slash + 1u
|
||||
end;
|
||||
while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do
|
||||
WriteChar(context^.output, context^.input_name[counter]);
|
||||
counter := counter + 1u
|
||||
end
|
||||
end;
|
||||
|
||||
proc transpile*(ast_module: ^AstModule, output: File, definition: File, input_name: String);
|
||||
var
|
||||
context: TranspilerContext;
|
||||
begin
|
||||
context.input_name := input_name;
|
||||
context.output := output;
|
||||
context.definition := definition;
|
||||
context.indentation := 0u;
|
||||
|
||||
transpile_module(@context, ast_module)
|
||||
end;
|
||||
|
||||
end.
|
||||
14
source/cctype.elna
Normal file
14
source/cctype.elna
Normal file
@@ -0,0 +1,14 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
proc isdigit*(c: Int ) -> Int; extern;
|
||||
proc isalnum*(c: Int) -> Int; extern;
|
||||
proc isalpha*(c: Int) -> Int; extern;
|
||||
proc isspace*(c: Int) -> Int; extern;
|
||||
|
||||
proc tolower*(c: Int) -> Int; extern;
|
||||
proc toupper*(c: Int) -> Int; extern;
|
||||
|
||||
end.
|
||||
93
source/command_line_interface.elna
Normal file
93
source/command_line_interface.elna
Normal file
@@ -0,0 +1,93 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
|
||||
(*
|
||||
Command line handling.
|
||||
*)
|
||||
module;
|
||||
|
||||
import cstdlib, cstring, common;
|
||||
|
||||
type
|
||||
CommandLine* = record
|
||||
input: ^Char;
|
||||
output: ^Char;
|
||||
lex: Bool;
|
||||
parse: Bool
|
||||
end;
|
||||
|
||||
proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
|
||||
var
|
||||
parameter: ^Char;
|
||||
i: Int;
|
||||
result: ^CommandLine;
|
||||
parsed: Bool;
|
||||
begin
|
||||
i := 1;
|
||||
result := cast(malloc(#size(CommandLine)): ^CommandLine);
|
||||
result^.lex := false;
|
||||
result^.parse := false;
|
||||
result^.input := nil;
|
||||
result^.output := nil;
|
||||
|
||||
while i < argc & result <> nil do
|
||||
parameter := (argv + i)^;
|
||||
parsed := false;
|
||||
|
||||
if strcmp(parameter, "--lex\0".ptr) = 0 then
|
||||
parsed := true;
|
||||
result^.lex := true
|
||||
end;
|
||||
if strcmp(parameter, "--parse\0".ptr) = 0 then
|
||||
parsed := true;
|
||||
result^.parse := true
|
||||
end;
|
||||
if strcmp(parameter, "-o\0".ptr) = 0 then
|
||||
i := i + 1;
|
||||
|
||||
if i = argc then
|
||||
write_s("Fatal error: expecting a file name following -o.");
|
||||
result := nil
|
||||
end;
|
||||
if i < argc then
|
||||
parameter := (argv + i)^;
|
||||
result^.output := parameter
|
||||
end;
|
||||
parsed := true
|
||||
end;
|
||||
if (parameter^ <> '-') & ~parsed then
|
||||
parsed := true;
|
||||
|
||||
if result^.input <> nil then
|
||||
write_s("Fatal error: only one source file can be compiled at once. First given \"");
|
||||
write_z(result^.input);
|
||||
write_s("\", then \"");
|
||||
write_z(parameter);
|
||||
write_s("\".\n");
|
||||
result := nil
|
||||
end;
|
||||
if result <> nil then
|
||||
result^.input := parameter
|
||||
end
|
||||
end;
|
||||
if ~parsed then
|
||||
write_s("Fatal error: unknown command line options: ");
|
||||
|
||||
write_z(parameter);
|
||||
write_s(".\n");
|
||||
|
||||
result := nil
|
||||
end;
|
||||
|
||||
i := i + 1
|
||||
end;
|
||||
if result <> nil & result^.input = nil then
|
||||
write_s("Fatal error: no input files.\n");
|
||||
result := nil
|
||||
end;
|
||||
|
||||
return result
|
||||
end;
|
||||
|
||||
end.
|
||||
72
source/common.elna
Normal file
72
source/common.elna
Normal file
@@ -0,0 +1,72 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
import cstring, cstdio;
|
||||
|
||||
type
|
||||
Identifier = [256]Char;
|
||||
TextLocation* = record
|
||||
line: Word;
|
||||
column: Word
|
||||
end;
|
||||
|
||||
proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern;
|
||||
|
||||
proc write_s*(value: String);
|
||||
begin
|
||||
(* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *)
|
||||
write(1, cast(value.ptr: Pointer), cast(value.length: Int))
|
||||
end;
|
||||
|
||||
proc write_z*(value: ^Char);
|
||||
begin
|
||||
write(1, cast(value: Pointer), cast(strlen(value): Int))
|
||||
end;
|
||||
|
||||
proc write_b*(value: Bool);
|
||||
begin
|
||||
if value then
|
||||
write_s("true")
|
||||
else
|
||||
write_s("false")
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_c*(value: Char);
|
||||
begin
|
||||
putchar(cast(value: Int));
|
||||
fflush(nil)
|
||||
end;
|
||||
|
||||
proc write_i*(value: Int);
|
||||
var
|
||||
digit: Int;
|
||||
n: Word;
|
||||
buffer: [10]Char;
|
||||
begin
|
||||
n := 10u;
|
||||
|
||||
if value = 0 then
|
||||
write_c('0')
|
||||
end;
|
||||
while value <> 0 do
|
||||
digit := value % 10;
|
||||
value := value / 10;
|
||||
|
||||
buffer[n] := cast(cast('0': Int) + digit: Char);
|
||||
n := n - 1u
|
||||
end;
|
||||
while n < 10u do
|
||||
n := n + 1u;
|
||||
write_c(buffer[n])
|
||||
end
|
||||
end;
|
||||
|
||||
proc write_u*(value: Word);
|
||||
begin
|
||||
write_i(cast(value: Int))
|
||||
end;
|
||||
|
||||
end.
|
||||
29
source/cstdio.elna
Normal file
29
source/cstdio.elna
Normal file
@@ -0,0 +1,29 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
type
|
||||
FILE* = record end;
|
||||
|
||||
var
|
||||
stdin*: ^FILE := extern;
|
||||
stdout*: ^FILE := extern;
|
||||
stderr*: ^FILE := extern;
|
||||
|
||||
proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern;
|
||||
proc fclose*(stream: ^FILE) -> Int; extern;
|
||||
proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern;
|
||||
proc rewind*(stream: ^FILE); extern;
|
||||
proc ftell*(stream: ^FILE) -> Int; extern;
|
||||
proc fflush*(stream: ^FILE) -> Int; extern;
|
||||
|
||||
proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern;
|
||||
proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern;
|
||||
|
||||
proc perror(s: ^Char); extern;
|
||||
|
||||
proc puts(s: ^Char) -> Int; extern;
|
||||
proc putchar(c: Int) -> Int; extern;
|
||||
|
||||
end.
|
||||
15
source/cstdlib.elna
Normal file
15
source/cstdlib.elna
Normal file
@@ -0,0 +1,15 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
proc malloc(size: Word) -> Pointer; extern;
|
||||
proc free(ptr: Pointer); extern;
|
||||
proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
|
||||
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
|
||||
|
||||
proc atoi(str: ^Char) -> Int; extern;
|
||||
|
||||
proc exit(code: Int) -> !; extern;
|
||||
|
||||
end.
|
||||
15
source/cstring.elna
Normal file
15
source/cstring.elna
Normal file
@@ -0,0 +1,15 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern;
|
||||
proc memcpy(dst: Pointer, src: Pointer, n: Word); extern;
|
||||
|
||||
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;
|
||||
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern;
|
||||
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern;
|
||||
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern;
|
||||
proc strlen(ptr: ^Char) -> Word; extern;
|
||||
|
||||
end.
|
||||
952
source/lexer.elna
Normal file
952
source/lexer.elna
Normal file
@@ -0,0 +1,952 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
module;
|
||||
|
||||
import cstdio, cstring, cctype, cstdlib, common;
|
||||
|
||||
const
|
||||
CHUNK_SIZE := 85536u;
|
||||
|
||||
type
|
||||
(*
|
||||
* Classification table assigns each possible character to a group (class). All
|
||||
* characters of the same group are handled equivalently.
|
||||
*
|
||||
* Classification:
|
||||
*)
|
||||
TransitionClass = (
|
||||
invalid,
|
||||
digit,
|
||||
alpha,
|
||||
space,
|
||||
colon,
|
||||
equals,
|
||||
left_paren,
|
||||
right_paren,
|
||||
asterisk,
|
||||
underscore,
|
||||
single,
|
||||
hex,
|
||||
zero,
|
||||
x,
|
||||
eof,
|
||||
dot,
|
||||
minus,
|
||||
single_quote,
|
||||
double_quote,
|
||||
greater,
|
||||
less,
|
||||
other
|
||||
);
|
||||
TransitionState = (
|
||||
start,
|
||||
colon,
|
||||
identifier,
|
||||
decimal,
|
||||
greater,
|
||||
minus,
|
||||
left_paren,
|
||||
less,
|
||||
dot,
|
||||
comment,
|
||||
closing_comment,
|
||||
character,
|
||||
string,
|
||||
leading_zero,
|
||||
decimal_suffix,
|
||||
finish
|
||||
);
|
||||
LexerToken = record
|
||||
kind: LexerKind;
|
||||
value: union
|
||||
booleanKind: Bool;
|
||||
identifierKind: Identifier;
|
||||
integerKind: Int;
|
||||
stringKind: String
|
||||
end;
|
||||
start_location: TextLocation;
|
||||
end_location: TextLocation
|
||||
end;
|
||||
TransitionAction = proc(^Lexer, ^LexerToken);
|
||||
Transition = record
|
||||
action: TransitionAction;
|
||||
next_state: TransitionState
|
||||
end;
|
||||
TransitionClasses = [22]Transition;
|
||||
|
||||
BufferPosition* = record
|
||||
iterator: ^Char;
|
||||
location: TextLocation
|
||||
end;
|
||||
Lexer* = record
|
||||
input: ^FILE;
|
||||
buffer: ^Char;
|
||||
size: Word;
|
||||
length: Word;
|
||||
start: BufferPosition;
|
||||
current: BufferPosition
|
||||
end;
|
||||
LexerKind* = (
|
||||
unknown,
|
||||
identifier,
|
||||
_if,
|
||||
_then,
|
||||
_else,
|
||||
_elsif,
|
||||
_while,
|
||||
_do,
|
||||
_proc,
|
||||
_begin,
|
||||
_end,
|
||||
_extern,
|
||||
_const,
|
||||
_var,
|
||||
_case,
|
||||
_of,
|
||||
_type,
|
||||
_record,
|
||||
_union,
|
||||
pipe,
|
||||
to,
|
||||
boolean,
|
||||
null,
|
||||
and,
|
||||
_or,
|
||||
_xor,
|
||||
not,
|
||||
_return,
|
||||
_cast,
|
||||
shift_left,
|
||||
shift_right,
|
||||
left_paren,
|
||||
right_paren,
|
||||
left_square,
|
||||
right_square,
|
||||
greater_equal,
|
||||
less_equal,
|
||||
greater_than,
|
||||
less_than,
|
||||
not_equal,
|
||||
equal,
|
||||
semicolon,
|
||||
dot,
|
||||
comma,
|
||||
plus,
|
||||
minus,
|
||||
multiplication,
|
||||
division,
|
||||
remainder,
|
||||
assignment,
|
||||
colon,
|
||||
hat,
|
||||
at,
|
||||
comment,
|
||||
integer,
|
||||
word,
|
||||
character,
|
||||
string,
|
||||
_defer,
|
||||
exclamation,
|
||||
arrow,
|
||||
trait,
|
||||
_program,
|
||||
_module,
|
||||
_import
|
||||
);
|
||||
|
||||
var
|
||||
classification: [128]TransitionClass;
|
||||
transitions: [16]TransitionClasses;
|
||||
|
||||
proc initialize_classification();
|
||||
var
|
||||
i: Word;
|
||||
begin
|
||||
classification[1] := TransitionClass.eof; (* NUL *)
|
||||
classification[2] := TransitionClass.invalid; (* SOH *)
|
||||
classification[3] := TransitionClass.invalid; (* STX *)
|
||||
classification[4] := TransitionClass.invalid; (* ETX *)
|
||||
classification[5] := TransitionClass.invalid; (* EOT *)
|
||||
classification[6] := TransitionClass.invalid; (* EMQ *)
|
||||
classification[7] := TransitionClass.invalid; (* ACK *)
|
||||
classification[8] := TransitionClass.invalid; (* BEL *)
|
||||
classification[9] := TransitionClass.invalid; (* BS *)
|
||||
classification[10] := TransitionClass.space; (* HT *)
|
||||
classification[11] := TransitionClass.space; (* LF *)
|
||||
classification[12] := TransitionClass.invalid; (* VT *)
|
||||
classification[13] := TransitionClass.invalid; (* FF *)
|
||||
classification[14] := TransitionClass.space; (* CR *)
|
||||
classification[15] := TransitionClass.invalid; (* SO *)
|
||||
classification[16] := TransitionClass.invalid; (* SI *)
|
||||
classification[17] := TransitionClass.invalid; (* DLE *)
|
||||
classification[18] := TransitionClass.invalid; (* DC1 *)
|
||||
classification[19] := TransitionClass.invalid; (* DC2 *)
|
||||
classification[20] := TransitionClass.invalid; (* DC3 *)
|
||||
classification[21] := TransitionClass.invalid; (* DC4 *)
|
||||
classification[22] := TransitionClass.invalid; (* NAK *)
|
||||
classification[23] := TransitionClass.invalid; (* SYN *)
|
||||
classification[24] := TransitionClass.invalid; (* ETB *)
|
||||
classification[25] := TransitionClass.invalid; (* CAN *)
|
||||
classification[26] := TransitionClass.invalid; (* EM *)
|
||||
classification[27] := TransitionClass.invalid; (* SUB *)
|
||||
classification[28] := TransitionClass.invalid; (* ESC *)
|
||||
classification[29] := TransitionClass.invalid; (* FS *)
|
||||
classification[30] := TransitionClass.invalid; (* GS *)
|
||||
classification[31] := TransitionClass.invalid; (* RS *)
|
||||
classification[32] := TransitionClass.invalid; (* US *)
|
||||
classification[33] := TransitionClass.space; (* Space *)
|
||||
classification[34] := TransitionClass.single; (* ! *)
|
||||
classification[35] := TransitionClass.double_quote; (* " *)
|
||||
classification[36] := TransitionClass.other; (* # *)
|
||||
classification[37] := TransitionClass.other; (* $ *)
|
||||
classification[38] := TransitionClass.single; (* % *)
|
||||
classification[39] := TransitionClass.single; (* & *)
|
||||
classification[40] := TransitionClass.single_quote; (* ' *)
|
||||
classification[41] := TransitionClass.left_paren; (* ( *)
|
||||
classification[42] := TransitionClass.right_paren; (* ) *)
|
||||
classification[43] := TransitionClass.asterisk; (* * *)
|
||||
classification[44] := TransitionClass.single; (* + *)
|
||||
classification[45] := TransitionClass.single; (* , *)
|
||||
classification[46] := TransitionClass.minus; (* - *)
|
||||
classification[47] := TransitionClass.dot; (* . *)
|
||||
classification[48] := TransitionClass.single; (* / *)
|
||||
classification[49] := TransitionClass.zero; (* 0 *)
|
||||
classification[50] := TransitionClass.digit; (* 1 *)
|
||||
classification[51] := TransitionClass.digit; (* 2 *)
|
||||
classification[52] := TransitionClass.digit; (* 3 *)
|
||||
classification[53] := TransitionClass.digit; (* 4 *)
|
||||
classification[54] := TransitionClass.digit; (* 5 *)
|
||||
classification[55] := TransitionClass.digit; (* 6 *)
|
||||
classification[56] := TransitionClass.digit; (* 7 *)
|
||||
classification[57] := TransitionClass.digit; (* 8 *)
|
||||
classification[58] := TransitionClass.digit; (* 9 *)
|
||||
classification[59] := TransitionClass.colon; (* : *)
|
||||
classification[60] := TransitionClass.single; (* ; *)
|
||||
classification[61] := TransitionClass.less; (* < *)
|
||||
classification[62] := TransitionClass.equals; (* = *)
|
||||
classification[63] := TransitionClass.greater; (* > *)
|
||||
classification[64] := TransitionClass.other; (* ? *)
|
||||
classification[65] := TransitionClass.single; (* @ *)
|
||||
classification[66] := TransitionClass.alpha; (* A *)
|
||||
classification[67] := TransitionClass.alpha; (* B *)
|
||||
classification[68] := TransitionClass.alpha; (* C *)
|
||||
classification[69] := TransitionClass.alpha; (* D *)
|
||||
classification[70] := TransitionClass.alpha; (* E *)
|
||||
classification[71] := TransitionClass.alpha; (* F *)
|
||||
classification[72] := TransitionClass.alpha; (* G *)
|
||||
classification[73] := TransitionClass.alpha; (* H *)
|
||||
classification[74] := TransitionClass.alpha; (* I *)
|
||||
classification[75] := TransitionClass.alpha; (* J *)
|
||||
classification[76] := TransitionClass.alpha; (* K *)
|
||||
classification[77] := TransitionClass.alpha; (* L *)
|
||||
classification[78] := TransitionClass.alpha; (* M *)
|
||||
classification[79] := TransitionClass.alpha; (* N *)
|
||||
classification[80] := TransitionClass.alpha; (* O *)
|
||||
classification[81] := TransitionClass.alpha; (* P *)
|
||||
classification[82] := TransitionClass.alpha; (* Q *)
|
||||
classification[83] := TransitionClass.alpha; (* R *)
|
||||
classification[84] := TransitionClass.alpha; (* S *)
|
||||
classification[85] := TransitionClass.alpha; (* T *)
|
||||
classification[86] := TransitionClass.alpha; (* U *)
|
||||
classification[87] := TransitionClass.alpha; (* V *)
|
||||
classification[88] := TransitionClass.alpha; (* W *)
|
||||
classification[89] := TransitionClass.alpha; (* X *)
|
||||
classification[90] := TransitionClass.alpha; (* Y *)
|
||||
classification[91] := TransitionClass.alpha; (* Z *)
|
||||
classification[92] := TransitionClass.single; (* [ *)
|
||||
classification[93] := TransitionClass.other; (* \ *)
|
||||
classification[94] := TransitionClass.single; (* ] *)
|
||||
classification[95] := TransitionClass.single; (* ^ *)
|
||||
classification[96] := TransitionClass.underscore; (* _ *)
|
||||
classification[97] := TransitionClass.other; (* ` *)
|
||||
classification[98] := TransitionClass.hex; (* a *)
|
||||
classification[99] := TransitionClass.hex; (* b *)
|
||||
classification[100] := TransitionClass.hex; (* c *)
|
||||
classification[101] := TransitionClass.hex; (* d *)
|
||||
classification[102] := TransitionClass.hex; (* e *)
|
||||
classification[103] := TransitionClass.hex; (* f *)
|
||||
classification[104] := TransitionClass.alpha; (* g *)
|
||||
classification[105] := TransitionClass.alpha; (* h *)
|
||||
classification[106] := TransitionClass.alpha; (* i *)
|
||||
classification[107] := TransitionClass.alpha; (* j *)
|
||||
classification[108] := TransitionClass.alpha; (* k *)
|
||||
classification[109] := TransitionClass.alpha; (* l *)
|
||||
classification[110] := TransitionClass.alpha; (* m *)
|
||||
classification[111] := TransitionClass.alpha; (* n *)
|
||||
classification[112] := TransitionClass.alpha; (* o *)
|
||||
classification[113] := TransitionClass.alpha; (* p *)
|
||||
classification[114] := TransitionClass.alpha; (* q *)
|
||||
classification[115] := TransitionClass.alpha; (* r *)
|
||||
classification[116] := TransitionClass.alpha; (* s *)
|
||||
classification[117] := TransitionClass.alpha; (* t *)
|
||||
classification[118] := TransitionClass.alpha; (* u *)
|
||||
classification[119] := TransitionClass.alpha; (* v *)
|
||||
classification[120] := TransitionClass.alpha; (* w *)
|
||||
classification[121] := TransitionClass.x; (* x *)
|
||||
classification[122] := TransitionClass.alpha; (* y *)
|
||||
classification[123] := TransitionClass.alpha; (* z *)
|
||||
classification[124] := TransitionClass.other; (* { *)
|
||||
classification[125] := TransitionClass.single; (* | *)
|
||||
classification[126] := TransitionClass.other; (* } *)
|
||||
classification[127] := TransitionClass.single; (* ~ *)
|
||||
classification[128] := TransitionClass.invalid; (* DEL *)
|
||||
|
||||
i := 129u;
|
||||
while i <= 256u do
|
||||
classification[i] := TransitionClass.other;
|
||||
i := i + 1u
|
||||
end
|
||||
end;
|
||||
|
||||
proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
|
||||
var
|
||||
result: Bool;
|
||||
index: Word;
|
||||
continue: Bool;
|
||||
begin
|
||||
index := 0u;
|
||||
result := true;
|
||||
continue := (index < keyword.length) & (token_start.iterator <> token_end);
|
||||
|
||||
while continue & result do
|
||||
result := keyword[index] = token_start.iterator^
|
||||
or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^;
|
||||
token_start.iterator := token_start.iterator + 1;
|
||||
index := index + 1u;
|
||||
continue := (index < keyword.length) & (token_start.iterator <> token_end)
|
||||
end;
|
||||
result := result & index = keyword.length;
|
||||
|
||||
return result & (token_start.iterator = token_end)
|
||||
end;
|
||||
|
||||
(* Reached the end of file. *)
|
||||
proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
token^.kind := LexerKind.unknown
|
||||
end;
|
||||
|
||||
proc increment(position: ^BufferPosition);
|
||||
begin
|
||||
position^.iterator := position^.iterator + 1
|
||||
end;
|
||||
|
||||
(* Add the character to the token currently read and advance to the next character. *)
|
||||
proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
increment(@lexer^.current)
|
||||
end;
|
||||
|
||||
(* The current character is not a part of the token. Finish the token already
|
||||
* read. Don't advance to the next character. *)
|
||||
proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
if lexer^.start.iterator^ = ':' then
|
||||
token^.kind := LexerKind.colon
|
||||
end;
|
||||
if lexer^.start.iterator^ = '>' then
|
||||
token^.kind := LexerKind.greater_than
|
||||
end;
|
||||
if lexer^.start.iterator^ = '<' then
|
||||
token^.kind := LexerKind.less_than
|
||||
end;
|
||||
if lexer^.start.iterator^ = '(' then
|
||||
token^.kind := LexerKind.left_paren
|
||||
end;
|
||||
if lexer^.start.iterator^ = '-' then
|
||||
token^.kind := LexerKind.minus
|
||||
end;
|
||||
if lexer^.start.iterator^ = '.' then
|
||||
token^.kind := LexerKind.dot
|
||||
end
|
||||
end;
|
||||
|
||||
(* An action for tokens containing multiple characters. *)
|
||||
proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
if lexer^.start.iterator^ = '<' then
|
||||
if lexer^.current.iterator^ = '>' then
|
||||
token^.kind := LexerKind.not_equal
|
||||
end;
|
||||
if lexer^.current.iterator^ = '=' then
|
||||
token^.kind := LexerKind.less_equal
|
||||
end
|
||||
end;
|
||||
if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
|
||||
token^.kind := LexerKind.greater_equal
|
||||
end;
|
||||
if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
|
||||
token^.kind := LexerKind.assignment
|
||||
end;
|
||||
if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
|
||||
token^.kind := LexerKind.arrow
|
||||
end;
|
||||
increment(@lexer^.current)
|
||||
end;
|
||||
|
||||
(* Skip a space. *)
|
||||
proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
increment(@lexer^.start);
|
||||
|
||||
if lexer^.start.iterator^ = '\n' then
|
||||
lexer^.start.location.line := lexer^.start.location.line + 1u;
|
||||
lexer^.start.location.column := 1u
|
||||
end;
|
||||
lexer^.current := lexer^.start
|
||||
end;
|
||||
|
||||
(* Delimited string action. *)
|
||||
proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
|
||||
var
|
||||
text_length: Word;
|
||||
begin
|
||||
if lexer^.start.iterator^ = '(' then
|
||||
token^.kind := LexerKind.comment
|
||||
end;
|
||||
if lexer^.start.iterator^ = '"' then
|
||||
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
|
||||
|
||||
token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
|
||||
memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
|
||||
|
||||
token^.kind := LexerKind.character
|
||||
end;
|
||||
if lexer^.start.iterator^ = '\'' then
|
||||
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
|
||||
|
||||
token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
|
||||
memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
|
||||
|
||||
token^.kind := LexerKind.string
|
||||
end;
|
||||
increment(@lexer^.current)
|
||||
end;
|
||||
|
||||
(* Finalize keyword or identifier. *)
|
||||
proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
token^.kind := LexerKind.identifier;
|
||||
|
||||
token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
|
||||
memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word));
|
||||
|
||||
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._program
|
||||
end;
|
||||
if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._import
|
||||
end;
|
||||
if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._const
|
||||
end;
|
||||
if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._var
|
||||
end;
|
||||
if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._if
|
||||
end;
|
||||
if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._then
|
||||
end;
|
||||
if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._elsif
|
||||
end;
|
||||
if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._else
|
||||
end;
|
||||
if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._while
|
||||
end;
|
||||
if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._do
|
||||
end;
|
||||
if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._proc
|
||||
end;
|
||||
if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._begin
|
||||
end;
|
||||
if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._end
|
||||
end;
|
||||
if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._type
|
||||
end;
|
||||
if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._record
|
||||
end;
|
||||
if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._union
|
||||
end;
|
||||
if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind.null
|
||||
end;
|
||||
if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._or
|
||||
end;
|
||||
if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._return
|
||||
end;
|
||||
if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._defer
|
||||
end;
|
||||
if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind.to
|
||||
end;
|
||||
if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._case
|
||||
end;
|
||||
if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._of
|
||||
end;
|
||||
if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._module
|
||||
end;
|
||||
if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind._xor
|
||||
end;
|
||||
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind.boolean;
|
||||
token^.value.booleanKind := true
|
||||
end;
|
||||
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
|
||||
token^.kind := LexerKind.boolean;
|
||||
token^.value.booleanKind := false
|
||||
end
|
||||
end;
|
||||
|
||||
(* Action for tokens containing only one character. The character cannot be
|
||||
* followed by other characters forming a composite token. *)
|
||||
proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
|
||||
begin
|
||||
if lexer^.current.iterator^ = '&' then
|
||||
token^.kind := LexerKind.and
|
||||
end;
|
||||
if lexer^.current.iterator^ = ';' then
|
||||
token^.kind := LexerKind.semicolon
|
||||
end;
|
||||
if lexer^.current.iterator^ = ',' then
|
||||
token^.kind := LexerKind.comma
|
||||
end;
|
||||
if lexer^.current.iterator^ = '~' then
|
||||
token^.kind := LexerKind.not
|
||||
end;
|
||||
if lexer^.current.iterator^ = ')' then
|
||||
token^.kind := LexerKind.right_paren
|
||||
end;
|
||||
if lexer^.current.iterator^ = '[' then
|
||||
token^.kind := LexerKind.left_square
|
||||
end;
|
||||
if lexer^.current.iterator^ = ']' then
|
||||
token^.kind := LexerKind.right_square
|
||||
end;
|
||||
if lexer^.current.iterator^ = '^' then
|
||||
token^.kind := LexerKind.hat
|
||||
end;
|
||||
if lexer^.current.iterator^ = '=' then
|
||||
token^.kind := LexerKind.equal
|
||||
end;
|
||||
if lexer^.current.iterator^ = '+' then
|
||||
token^.kind := LexerKind.plus
|
||||
end;
|
||||
if lexer^.current.iterator^ = '*' then
|
||||
token^.kind := LexerKind.multiplication
|
||||
end;
|
||||
if lexer^.current.iterator^ = '/' then
|
||||
token^.kind := LexerKind.division
|
||||
end;
|
||||
if lexer^.current.iterator^ = '%' then
|
||||
token^.kind := LexerKind.remainder
|
||||
end;
|
||||
if lexer^.current.iterator^ = '@' then
|
||||
token^.kind := LexerKind.at
|
||||
end;
|
||||
if lexer^.current.iterator^ = '|' then
|
||||
token^.kind := LexerKind.pipe
|
||||
end;
|
||||
increment(@lexer^.current)
|
||||
end;
|
||||
|
||||
(* Handle an integer literal. *)
|
||||
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
|
||||
var
|
||||
buffer: String;
|
||||
integer_length: Word;
|
||||
found: Bool;
|
||||
begin
|
||||
token^.kind := LexerKind.integer;
|
||||
|
||||
integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word);
|
||||
memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier));
|
||||
memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length);
|
||||
|
||||
token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0';
|
||||
token^.value.integerKind := atoi(@token^.value.identifierKind[2])
|
||||
end;
|
||||
|
||||
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int;
|
||||
var
|
||||
default_transition: Transition;
|
||||
state_index: Int;
|
||||
begin
|
||||
default_transition.action := default_action;
|
||||
default_transition.next_state := next_state;
|
||||
state_index := cast(current_state: Int) + 1;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition;
|
||||
transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition;
|
||||
|
||||
return state_index
|
||||
end;
|
||||
|
||||
(*
|
||||
* The transition table describes transitions from one state to another, given
|
||||
* a symbol (character class).
|
||||
*
|
||||
* The table has m rows and n columns, where m is the amount of states and n is
|
||||
* the amount of classes. So given the current state and a classified character
|
||||
* the table can be used to look up the next state.
|
||||
*
|
||||
* Each cell is a word long.
|
||||
* - The least significant byte of the word is a row number (beginning with 0).
|
||||
* It specifies the target state. "ff" means that this is an end state and no
|
||||
* transition is possible.
|
||||
* - The next byte is the action that should be performed when transitioning.
|
||||
* For the meaning of actions see labels in the lex_next function, which
|
||||
* handles each action.
|
||||
*)
|
||||
proc initialize_transitions();
|
||||
var
|
||||
state_index: Int;
|
||||
begin
|
||||
(* Start state. *)
|
||||
state_index := cast(TransitionState.start: Int) + 1;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip;
|
||||
transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single;
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single;
|
||||
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single;
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single;
|
||||
transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof;
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Colon state. *)
|
||||
state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Identifier state. *)
|
||||
state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
|
||||
|
||||
(* Decimal state. *)
|
||||
state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix;
|
||||
|
||||
(* Greater state. *)
|
||||
state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Minus state. *)
|
||||
state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Left paren state. *)
|
||||
state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment;
|
||||
|
||||
(* Less state. *)
|
||||
state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
|
||||
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
|
||||
transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Hexadecimal after 0x. *)
|
||||
state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite;
|
||||
transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Comment. *)
|
||||
state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Closing comment. *)
|
||||
state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited;
|
||||
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
|
||||
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Character. *)
|
||||
state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited;
|
||||
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* String. *)
|
||||
state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited;
|
||||
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Leading zero. *)
|
||||
state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
(* Digit with a character suffix. *)
|
||||
state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish);
|
||||
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
|
||||
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
|
||||
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish
|
||||
end;
|
||||
|
||||
proc lexer_make*(lexer: ^Lexer, input: ^FILE);
|
||||
begin
|
||||
lexer^.input := input;
|
||||
lexer^.length := 0u;
|
||||
|
||||
lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char);
|
||||
memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE);
|
||||
lexer^.size := CHUNK_SIZE
|
||||
end;
|
||||
|
||||
(* Returns the last read token. *)
|
||||
proc lexer_current*(lexer: ^Lexer) -> LexerToken;
|
||||
var
|
||||
current_class: TransitionClass;
|
||||
current_state: TransitionState;
|
||||
current_transition: Transition;
|
||||
result: LexerToken;
|
||||
index1: Word;
|
||||
index2: Word;
|
||||
begin
|
||||
lexer^.current := lexer^.start;
|
||||
current_state := TransitionState.start;
|
||||
|
||||
while current_state <> TransitionState.finish do
|
||||
index1 := cast(lexer^.current.iterator^: Word) + 1u;
|
||||
current_class := classification[index1];
|
||||
|
||||
index1 := cast(current_state: Word) + 1u;
|
||||
index2 := cast(current_class: Word) + 1u;
|
||||
|
||||
current_transition := transitions[index1][index2];
|
||||
if current_transition.action <> nil then
|
||||
current_transition.action(lexer, @result)
|
||||
end;
|
||||
current_state := current_transition.next_state
|
||||
end;
|
||||
result.start_location := lexer^.start.location;
|
||||
result.end_location := lexer^.current.location;
|
||||
|
||||
return result
|
||||
end;
|
||||
|
||||
(* Read and return the next token. *)
|
||||
proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
|
||||
var
|
||||
result: LexerToken;
|
||||
begin
|
||||
if lexer^.length = 0u then
|
||||
lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input);
|
||||
lexer^.current.location.column := 1u;
|
||||
lexer^.current.location.line := 1u;
|
||||
lexer^.current.iterator := lexer^.buffer
|
||||
end;
|
||||
lexer^.start := lexer^.current;
|
||||
|
||||
result := lexer_current(lexer);
|
||||
return result
|
||||
end;
|
||||
|
||||
proc lexer_destroy*(lexer: ^Lexer);
|
||||
begin
|
||||
free(cast(lexer^.buffer: Pointer))
|
||||
end;
|
||||
|
||||
proc lexer_initialize();
|
||||
begin
|
||||
initialize_classification();
|
||||
initialize_transitions()
|
||||
end;
|
||||
|
||||
end.
|
||||
841
source/main.elna
Normal file
841
source/main.elna
Normal file
@@ -0,0 +1,841 @@
|
||||
(* This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
obtain one at https://mozilla.org/MPL/2.0/. *)
|
||||
program;
|
||||
|
||||
import cstdio, cctype, common, command_line_interface, lexer;
|
||||
|
||||
type
|
||||
SourceFile* = record
|
||||
buffer: [1024]Char;
|
||||
handle: ^FILE;
|
||||
size: Word;
|
||||
index: Word
|
||||
end;
|
||||
StringBuffer* = record
|
||||
data: Pointer;
|
||||
size: Word;
|
||||
capacity: Word
|
||||
end;
|
||||
SourceCode = record
|
||||
position: TextLocation;
|
||||
|
||||
input: Pointer;
|
||||
empty: proc(Pointer) -> Bool;
|
||||
advance: proc(Pointer);
|
||||
head: proc(Pointer) -> Char
|
||||
end;
|
||||
Token* = record
|
||||
kind: LexerKind;
|
||||
value: union
|
||||
int_value: Int;
|
||||
string: String;
|
||||
boolean_value: Bool;
|
||||
char_value: Char
|
||||
end
|
||||
end;
|
||||
Tokenizer* = record
|
||||
length: Word;
|
||||
data: ^Token
|
||||
end;
|
||||
|
||||
(*
|
||||
Standard procedures.
|
||||
*)
|
||||
proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
|
||||
return realloc(ptr, n * size)
|
||||
end;
|
||||
|
||||
proc substring(string: String, start: Word, count: Word) -> String;
|
||||
return String(string.ptr + start, count)
|
||||
end;
|
||||
|
||||
proc open_substring(string: String, start: Word) -> String;
|
||||
return substring(string, start, string.length - start)
|
||||
end;
|
||||
|
||||
proc string_dup(origin: String) -> String;
|
||||
var
|
||||
copy: ^Char;
|
||||
begin
|
||||
copy := cast(malloc(origin.length): ^Char);
|
||||
strncpy(copy, origin.ptr, origin.length);
|
||||
|
||||
return String(copy, origin.length)
|
||||
end;
|
||||
|
||||
proc string_buffer_new() -> StringBuffer;
|
||||
var
|
||||
result: StringBuffer;
|
||||
begin
|
||||
result.capacity := 64u;
|
||||
result.data := malloc(result.capacity);
|
||||
result.size := 0u;
|
||||
|
||||
return result
|
||||
end;
|
||||
|
||||
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
|
||||
begin
|
||||
if buffer^.size >= buffer^.capacity then
|
||||
buffer^.capacity := buffer^.capacity + 1024u;
|
||||
buffer^.data := realloc(buffer^.data, buffer^.capacity)
|
||||
end;
|
||||
cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
|
||||
buffer^.size := buffer^.size + 1u
|
||||
end;
|
||||
|
||||
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
|
||||
begin
|
||||
buffer^.size := buffer^.size - count
|
||||
end;
|
||||
|
||||
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
|
||||
var
|
||||
result: String;
|
||||
begin
|
||||
result := String(cast(buffer^.data: ^Char), buffer^.size);
|
||||
buffer^.size := 0u;
|
||||
return result
|
||||
end;
|
||||
|
||||
(*
|
||||
Source code stream procedures.
|
||||
*)
|
||||
|
||||
proc read_source(filename: ^Char) -> ^SourceFile;
|
||||
var
|
||||
result: ^SourceFile;
|
||||
file_handle: ^FILE;
|
||||
begin
|
||||
file_handle := fopen(filename, "rb\0".ptr);
|
||||
|
||||
if file_handle <> nil then
|
||||
result := cast(malloc(#size(SourceFile)): ^SourceFile);
|
||||
result^.handle := file_handle;
|
||||
result^.size := 0u;
|
||||
result^.index := 1u
|
||||
end;
|
||||
return result
|
||||
end;
|
||||
|
||||
proc source_file_empty(source_input: Pointer) -> Bool;
|
||||
var
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
source_file := cast(source_input: ^SourceFile);
|
||||
|
||||
if source_file^.index > source_file^.size then
|
||||
source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle);
|
||||
source_file^.index := 1u
|
||||
end;
|
||||
|
||||
return source_file^.size = 0u
|
||||
end;
|
||||
|
||||
proc source_file_head(source_input: Pointer) -> Char;
|
||||
var
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
source_file := cast(source_input: ^SourceFile);
|
||||
|
||||
return source_file^.buffer[source_file^.index]
|
||||
end;
|
||||
|
||||
proc source_file_advance(source_input: Pointer);
|
||||
var
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
source_file := cast(source_input: ^SourceFile);
|
||||
|
||||
source_file^.index := source_file^.index + 1u
|
||||
end;
|
||||
|
||||
proc source_code_empty(source_code: ^SourceCode) -> Bool;
|
||||
return source_code^.empty(source_code^.input)
|
||||
end;
|
||||
|
||||
proc source_code_head(source_code: SourceCode) -> Char;
|
||||
return source_code.head(source_code.input)
|
||||
end;
|
||||
|
||||
proc source_code_advance(source_code: ^SourceCode);
|
||||
begin
|
||||
source_code^.advance(source_code^.input);
|
||||
source_code^.position.column := source_code^.position.column
|
||||
end;
|
||||
|
||||
proc source_code_break(source_code: ^SourceCode);
|
||||
begin
|
||||
source_code^.position.line := source_code^.position.line + 1u;
|
||||
source_code^.position.column := 0u
|
||||
end;
|
||||
|
||||
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
|
||||
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
|
||||
end;
|
||||
|
||||
(*
|
||||
Token procedures.
|
||||
*)
|
||||
|
||||
proc lexer_escape(escape: Char, result: ^Char) -> Bool;
|
||||
var
|
||||
successful: Bool;
|
||||
begin
|
||||
case escape of
|
||||
'n':
|
||||
result^ := '\n';
|
||||
successful := true
|
||||
| 'a':
|
||||
result^ := '\a';
|
||||
successful := true
|
||||
| 'b':
|
||||
result^ := '\b';
|
||||
successful := true
|
||||
| 't':
|
||||
result^ := '\t';
|
||||
successful := true
|
||||
| 'f':
|
||||
result^ := '\f';
|
||||
successful := true
|
||||
| 'r':
|
||||
result^ := '\r';
|
||||
successful := true
|
||||
| 'v':
|
||||
result^ := '\v';
|
||||
successful := true
|
||||
| '\\':
|
||||
result^ := '\\';
|
||||
successful := true
|
||||
| '\'':
|
||||
result^ := '\'';
|
||||
successful := true
|
||||
| '"':
|
||||
result^ := '"';
|
||||
successful := true
|
||||
| '?':
|
||||
result^ := '\?';
|
||||
successful := true
|
||||
| '0':
|
||||
result^ := '\0';
|
||||
successful := true
|
||||
else
|
||||
successful := false
|
||||
end;
|
||||
return successful
|
||||
end;
|
||||
|
||||
(* Skip spaces. *)
|
||||
proc lexer_spaces(source_code: ^SourceCode);
|
||||
var
|
||||
current: Char;
|
||||
begin
|
||||
while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do
|
||||
current := source_code_head(source_code^);
|
||||
|
||||
if current = '\n' then
|
||||
source_code_break(source_code)
|
||||
end;
|
||||
source_code_advance(source_code)
|
||||
end
|
||||
end;
|
||||
|
||||
(* Checker whether the character is allowed in an identificator. *)
|
||||
proc lexer_is_ident(char: Char) -> Bool;
|
||||
return isalnum(cast(char: Int)) <> 0 or char = '_'
|
||||
end;
|
||||
|
||||
proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
|
||||
var
|
||||
content_length: Word;
|
||||
begin
|
||||
while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
|
||||
string_buffer_push(token_content, source_code_head(source_code^));
|
||||
source_code_advance(source_code)
|
||||
end
|
||||
end;
|
||||
|
||||
proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
||||
var
|
||||
trailing: Word;
|
||||
begin
|
||||
trailing := 0u;
|
||||
|
||||
while ~source_code_empty(source_code) & trailing < 2u do
|
||||
if source_code_head(source_code^) = '*' then
|
||||
string_buffer_push(token_content, '*');
|
||||
trailing := 1u
|
||||
elsif source_code_head(source_code^) = ')' & trailing = 1u then
|
||||
string_buffer_pop(token_content, 1u);
|
||||
trailing := 2u
|
||||
else
|
||||
string_buffer_push(token_content, source_code_head(source_code^));
|
||||
trailing := 0u
|
||||
end;
|
||||
source_code_advance(source_code)
|
||||
end;
|
||||
|
||||
return trailing = 2u
|
||||
end;
|
||||
|
||||
proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
|
||||
var
|
||||
successful: Bool;
|
||||
begin
|
||||
successful := ~source_code_empty(source_code);
|
||||
|
||||
if successful then
|
||||
if source_code_head(source_code^) = '\\' then
|
||||
source_code_advance(source_code);
|
||||
|
||||
successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content)
|
||||
else
|
||||
token_content^ := source_code_head(source_code^);
|
||||
successful := true
|
||||
end
|
||||
end;
|
||||
if successful then
|
||||
source_code_advance(source_code)
|
||||
end;
|
||||
return successful
|
||||
end;
|
||||
|
||||
proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
||||
var
|
||||
token_end, constructed_string: ^Char;
|
||||
token_length: Word;
|
||||
is_valid: Bool := true;
|
||||
next_char: Char;
|
||||
begin
|
||||
while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do
|
||||
is_valid := lexer_character(source_code, @next_char);
|
||||
|
||||
if is_valid then
|
||||
string_buffer_push(token_content, next_char)
|
||||
end
|
||||
end;
|
||||
|
||||
if is_valid & source_code_expect(source_code, '"') then
|
||||
source_code_advance(source_code)
|
||||
else
|
||||
is_valid := false
|
||||
end;
|
||||
return is_valid
|
||||
end;
|
||||
|
||||
proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
|
||||
begin
|
||||
token_content^ := 0;
|
||||
|
||||
while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do
|
||||
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int));
|
||||
|
||||
source_code_advance(source_code)
|
||||
end
|
||||
end;
|
||||
|
||||
(* Categorize an identifier. *)
|
||||
proc lexer_categorize(token_content: String) -> Token;
|
||||
var
|
||||
current_token: Token;
|
||||
begin
|
||||
if token_content = "if" then
|
||||
current_token.kind := LexerKind._if
|
||||
elsif token_content = "then" then
|
||||
current_token.kind := LexerKind._then
|
||||
elsif token_content = "else" then
|
||||
current_token.kind := LexerKind._else
|
||||
elsif token_content = "elsif" then
|
||||
current_token.kind := LexerKind._elsif
|
||||
elsif token_content = "while" then
|
||||
current_token.kind := LexerKind._while
|
||||
elsif token_content = "do" then
|
||||
current_token.kind := LexerKind._do
|
||||
elsif token_content = "proc" then
|
||||
current_token.kind := LexerKind._proc
|
||||
elsif token_content = "begin" then
|
||||
current_token.kind := LexerKind._begin
|
||||
elsif token_content = "end" then
|
||||
current_token.kind := LexerKind._end
|
||||
elsif token_content = "extern" then
|
||||
current_token.kind := LexerKind._extern
|
||||
elsif token_content = "const" then
|
||||
current_token.kind := LexerKind._const
|
||||
elsif token_content = "var" then
|
||||
current_token.kind := LexerKind._var
|
||||
elsif token_content = "case" then
|
||||
current_token.kind := LexerKind._case
|
||||
elsif token_content = "of" then
|
||||
current_token.kind := LexerKind._of
|
||||
elsif token_content = "type" then
|
||||
current_token.kind := LexerKind._type
|
||||
elsif token_content = "record" then
|
||||
current_token.kind := LexerKind._record
|
||||
elsif token_content = "union" then
|
||||
current_token.kind := LexerKind._union
|
||||
elsif token_content = "true" then
|
||||
current_token.kind := LexerKind.boolean;
|
||||
current_token.value.boolean_value := true
|
||||
elsif token_content = "false" then
|
||||
current_token.kind := LexerKind.boolean;
|
||||
current_token.value.boolean_value := false
|
||||
elsif token_content = "nil" then
|
||||
current_token.kind := LexerKind.null
|
||||
elsif token_content = "or" then
|
||||
current_token.kind := LexerKind._or
|
||||
elsif token_content = "return" then
|
||||
current_token.kind := LexerKind._return
|
||||
elsif token_content = "cast" then
|
||||
current_token.kind := LexerKind._cast
|
||||
elsif token_content = "defer" then
|
||||
current_token.kind := LexerKind._defer
|
||||
elsif token_content = "program" then
|
||||
current_token.kind := LexerKind._program
|
||||
elsif token_content = "module" then
|
||||
current_token.kind := LexerKind._module
|
||||
elsif token_content = "import" then
|
||||
current_token.kind := LexerKind._import
|
||||
else
|
||||
current_token.kind := LexerKind.identifier;
|
||||
current_token.value.string := string_dup(token_content)
|
||||
end;
|
||||
|
||||
return current_token
|
||||
end;
|
||||
|
||||
proc lexer_add_token(lexer: ^Tokenizer, token: Token);
|
||||
var
|
||||
new_length: Word;
|
||||
begin
|
||||
new_length := lexer^.length + 1u;
|
||||
lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
|
||||
(lexer^.data + lexer^.length)^ := token;
|
||||
lexer^.length := new_length
|
||||
end;
|
||||
|
||||
(* Read the next token from the input. *)
|
||||
proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
|
||||
var
|
||||
current_token: Token;
|
||||
first_char: Char;
|
||||
begin
|
||||
current_token.kind := LexerKind.unknown;
|
||||
|
||||
first_char := source_code_head(source_code);
|
||||
|
||||
if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then
|
||||
lexer_identifier(@source_code, token_buffer);
|
||||
current_token := lexer_categorize(string_buffer_clear(token_buffer))
|
||||
elsif first_char = '#' then
|
||||
source_code_advance(@source_code);
|
||||
lexer_identifier(@source_code, token_buffer);
|
||||
|
||||
current_token.kind := LexerKind.trait;
|
||||
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
|
||||
elsif isdigit(cast(first_char: Int)) <> 0 then
|
||||
lexer_number(@source_code, @current_token.value.int_value);
|
||||
|
||||
if source_code_expect(@source_code, 'u') then
|
||||
current_token.kind := LexerKind.word;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.integer
|
||||
end
|
||||
elsif first_char = '(' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if source_code_empty(@source_code) then
|
||||
current_token.kind := LexerKind.left_paren
|
||||
elsif source_code_head(source_code) = '*' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if lexer_comment(@source_code, token_buffer) then
|
||||
current_token.value.string := string_dup(string_buffer_clear(token_buffer));
|
||||
current_token.kind := LexerKind.comment
|
||||
else
|
||||
current_token.kind := LexerKind.unknown
|
||||
end
|
||||
else
|
||||
current_token.kind := LexerKind.left_paren
|
||||
end
|
||||
elsif first_char = ')' then
|
||||
current_token.kind := LexerKind.right_paren;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '\'' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
|
||||
current_token.kind := LexerKind.character;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.unknown
|
||||
end
|
||||
elsif first_char = '"' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if lexer_string(@source_code, token_buffer) then
|
||||
current_token.kind := LexerKind.string;
|
||||
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
|
||||
else
|
||||
current_token.kind := LexerKind.unknown
|
||||
end
|
||||
elsif first_char = '[' then
|
||||
current_token.kind := LexerKind.left_square;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = ']' then
|
||||
current_token.kind := LexerKind.right_square;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '>' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if source_code_empty(@source_code) then
|
||||
current_token.kind := LexerKind.greater_than
|
||||
elsif source_code_head(source_code) = '=' then
|
||||
current_token.kind := LexerKind.greater_equal;
|
||||
source_code_advance(@source_code)
|
||||
elsif source_code_head(source_code) = '>' then
|
||||
current_token.kind := LexerKind.shift_right;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.greater_than
|
||||
end
|
||||
elsif first_char = '<' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if source_code_empty(@source_code) then
|
||||
current_token.kind := LexerKind.less_than
|
||||
elsif source_code_head(source_code) = '=' then
|
||||
current_token.kind := LexerKind.less_equal;
|
||||
source_code_advance(@source_code)
|
||||
elsif source_code_head(source_code) = '<' then
|
||||
current_token.kind := LexerKind.shift_left;
|
||||
source_code_advance(@source_code)
|
||||
elsif source_code_head(source_code) = '>' then
|
||||
current_token.kind := LexerKind.not_equal;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.less_than
|
||||
end
|
||||
elsif first_char = '=' then
|
||||
current_token.kind := LexerKind.equal;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = ';' then
|
||||
current_token.kind := LexerKind.semicolon;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '.' then
|
||||
current_token.kind := LexerKind.dot;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = ',' then
|
||||
current_token.kind := LexerKind.comma;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '+' then
|
||||
current_token.kind := LexerKind.plus;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '-' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if source_code_empty(@source_code) then
|
||||
current_token.kind := LexerKind.minus
|
||||
elsif source_code_head(source_code) = '>' then
|
||||
current_token.kind := LexerKind.arrow;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.minus
|
||||
end
|
||||
elsif first_char = '*' then
|
||||
current_token.kind := LexerKind.multiplication;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '/' then
|
||||
current_token.kind := LexerKind.division;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '%' then
|
||||
current_token.kind := LexerKind.remainder;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = ':' then
|
||||
source_code_advance(@source_code);
|
||||
|
||||
if source_code_empty(@source_code) then
|
||||
current_token.kind := LexerKind.colon
|
||||
elsif source_code_head(source_code) = '=' then
|
||||
current_token.kind := LexerKind.assignment;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.colon
|
||||
end
|
||||
elsif first_char = '^' then
|
||||
current_token.kind := LexerKind.hat;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '@' then
|
||||
current_token.kind := LexerKind.at;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '!' then
|
||||
current_token.kind := LexerKind.exclamation;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '&' then
|
||||
current_token.kind := LexerKind.and;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '~' then
|
||||
current_token.kind := LexerKind.not;
|
||||
source_code_advance(@source_code)
|
||||
elsif first_char = '|' then
|
||||
current_token.kind := LexerKind.pipe;
|
||||
source_code_advance(@source_code)
|
||||
else
|
||||
current_token.kind := LexerKind.unknown;
|
||||
source_code_advance(@source_code)
|
||||
end;
|
||||
|
||||
return current_token
|
||||
end;
|
||||
|
||||
(* Split the source text into tokens. *)
|
||||
proc lexer_text(source_code: SourceCode) -> Tokenizer;
|
||||
var
|
||||
current_token: Token;
|
||||
token_buffer: StringBuffer;
|
||||
lexer: Tokenizer;
|
||||
begin
|
||||
lexer := Tokenizer(0u, nil);
|
||||
token_buffer := string_buffer_new();
|
||||
|
||||
lexer_spaces(@source_code);
|
||||
|
||||
while ~source_code_empty(@source_code) do
|
||||
current_token := lexer_next(source_code, @token_buffer);
|
||||
|
||||
if current_token.kind <> LexerKind.unknown then
|
||||
lexer_add_token(@lexer, current_token);
|
||||
lexer_spaces(@source_code)
|
||||
else
|
||||
write_s("Lexical analysis error on \"");
|
||||
write_c(source_code_head(source_code));
|
||||
write_s("\".\n")
|
||||
end
|
||||
end;
|
||||
|
||||
return lexer
|
||||
end;
|
||||
|
||||
(*
|
||||
Parser.
|
||||
*)
|
||||
|
||||
proc parse(tokens: ^Token, tokens_size: Word);
|
||||
var
|
||||
current_token: ^Token;
|
||||
i: Word := 0u;
|
||||
begin
|
||||
while i < tokens_size do
|
||||
current_token := tokens + i;
|
||||
|
||||
case current_token^.kind of
|
||||
LexerKind._if:
|
||||
write_s("IF")
|
||||
| LexerKind._then:
|
||||
write_s("THEN")
|
||||
| LexerKind._else:
|
||||
write_s("ELSE")
|
||||
| LexerKind._elsif:
|
||||
write_s("ELSIF")
|
||||
| LexerKind._while:
|
||||
write_s("WHILE")
|
||||
| LexerKind._do:
|
||||
write_s("DO")
|
||||
| LexerKind._proc:
|
||||
write_s("PROC")
|
||||
| LexerKind._begin:
|
||||
write_s("BEGIN")
|
||||
| LexerKind._end:
|
||||
write_s("END")
|
||||
| LexerKind._extern:
|
||||
write_s("EXTERN")
|
||||
| LexerKind._const:
|
||||
write_s("CONST")
|
||||
| LexerKind._var:
|
||||
write_s("VAR")
|
||||
| LexerKind._case:
|
||||
write_s("CASE")
|
||||
| LexerKind._of:
|
||||
write_s("OF")
|
||||
| LexerKind._type:
|
||||
write_s("TYPE")
|
||||
| LexerKind._record:
|
||||
write_s("RECORD")
|
||||
| LexerKind._union:
|
||||
write_s("UNION")
|
||||
| LexerKind.pipe:
|
||||
write_s("|")
|
||||
| LexerKind.to:
|
||||
write_s("TO")
|
||||
| LexerKind.boolean:
|
||||
write_s("BOOLEAN<");
|
||||
write_b(current_token^.value.boolean_value);
|
||||
write_c('>')
|
||||
| LexerKind.null:
|
||||
write_s("NIL")
|
||||
| LexerKind.and:
|
||||
write_s("&")
|
||||
| LexerKind._or:
|
||||
write_s("OR")
|
||||
| LexerKind.not:
|
||||
write_s("~")
|
||||
| LexerKind._return:
|
||||
write_s("RETURN")
|
||||
| LexerKind._cast:
|
||||
write_s("CAST")
|
||||
| LexerKind.shift_left:
|
||||
write_s("<<")
|
||||
| LexerKind.shift_right:
|
||||
write_s(">>")
|
||||
| LexerKind.identifier:
|
||||
write_c('<');
|
||||
write_s(current_token^.value.string);
|
||||
write_c('>')
|
||||
| LexerKind.trait:
|
||||
write_c('#');
|
||||
write_s(current_token^.value.string)
|
||||
| LexerKind.left_paren:
|
||||
write_s("(")
|
||||
| LexerKind.right_paren:
|
||||
write_s(")")
|
||||
| LexerKind.left_square:
|
||||
write_s("[")
|
||||
| LexerKind.right_square:
|
||||
write_s("]")
|
||||
| LexerKind.greater_equal:
|
||||
write_s(">=")
|
||||
| LexerKind.less_equal:
|
||||
write_s("<=")
|
||||
| LexerKind.greater_than:
|
||||
write_s(">")
|
||||
| LexerKind.less_than:
|
||||
write_s("<")
|
||||
| LexerKind.equal:
|
||||
write_s("=")
|
||||
| LexerKind.not_equal:
|
||||
write_s("<>")
|
||||
| LexerKind.semicolon:
|
||||
write_c(';')
|
||||
| LexerKind.dot:
|
||||
write_c('.')
|
||||
| LexerKind.comma:
|
||||
write_c(',')
|
||||
| LexerKind.plus:
|
||||
write_c('+')
|
||||
| LexerKind.minus:
|
||||
write_c('-')
|
||||
| LexerKind.multiplication:
|
||||
write_c('*')
|
||||
| LexerKind.division:
|
||||
write_c('/')
|
||||
| LexerKind.remainder:
|
||||
write_c('%')
|
||||
| LexerKind.assignment:
|
||||
write_s(":=")
|
||||
| LexerKind.colon:
|
||||
write_c(':')
|
||||
| LexerKind.hat:
|
||||
write_c('^')
|
||||
| LexerKind.at:
|
||||
write_c('@')
|
||||
| LexerKind.comment:
|
||||
write_s("(* COMMENT *)")
|
||||
| LexerKind.integer:
|
||||
write_c('<');
|
||||
write_i(current_token^.value.int_value);
|
||||
write_c('>')
|
||||
| LexerKind.word:
|
||||
write_c('<');
|
||||
write_i(current_token^.value.int_value);
|
||||
write_s("u>")
|
||||
| LexerKind.character:
|
||||
write_c('<');
|
||||
write_i(cast(current_token^.value.char_value: Int));
|
||||
write_s("c>")
|
||||
| LexerKind.string:
|
||||
write_s("\"...\"")
|
||||
| LexerKind._defer:
|
||||
write_s("DEFER")
|
||||
| LexerKind.exclamation:
|
||||
write_c('!')
|
||||
| LexerKind.arrow:
|
||||
write_s("->")
|
||||
| LexerKind._program:
|
||||
write_s("PROGRAM")
|
||||
| LexerKind._module:
|
||||
write_s("MODULE")
|
||||
| LexerKind._import:
|
||||
write_s("IMPORT")
|
||||
else
|
||||
write_s("UNKNOWN<");
|
||||
write_i(cast(current_token^.kind: Int));
|
||||
write_c('>')
|
||||
end;
|
||||
write_c(' ');
|
||||
|
||||
i := i + 1u
|
||||
end;
|
||||
write_c('\n')
|
||||
end;
|
||||
|
||||
(*
|
||||
Compilation entry.
|
||||
*)
|
||||
|
||||
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
|
||||
var
|
||||
return_code: Int := 0;
|
||||
lexer: Tokenizer;
|
||||
begin
|
||||
if command_line^.lex or command_line^.parse then
|
||||
lexer := lexer_text(source_code)
|
||||
end;
|
||||
if command_line^.parse then
|
||||
parse(lexer.data, lexer.length)
|
||||
end;
|
||||
|
||||
return return_code
|
||||
end;
|
||||
|
||||
proc process(argc: Int, argv: ^^Char) -> Int;
|
||||
var
|
||||
tokens: ^Token;
|
||||
tokens_size: Word;
|
||||
source_code: SourceCode;
|
||||
command_line: ^CommandLine;
|
||||
return_code: Int := 0;
|
||||
source_file: ^SourceFile;
|
||||
begin
|
||||
command_line := parse_command_line(argc, argv);
|
||||
if command_line = nil then
|
||||
return_code := 2
|
||||
end;
|
||||
|
||||
if return_code = 0 then
|
||||
source_file := read_source(command_line^.input);
|
||||
|
||||
if source_file = nil then
|
||||
perror(command_line^.input);
|
||||
return_code := 3
|
||||
end
|
||||
end;
|
||||
|
||||
if return_code = 0 then
|
||||
defer
|
||||
fclose(source_file^.handle)
|
||||
end;
|
||||
|
||||
source_code.position := TextLocation(1u, 1u);
|
||||
source_code.input := cast(source_file: Pointer);
|
||||
source_code.empty := source_file_empty;
|
||||
source_code.head := source_file_head;
|
||||
source_code.advance := source_file_advance;
|
||||
|
||||
return_code := compile_in_stages(command_line, source_code)
|
||||
end;
|
||||
return return_code
|
||||
end;
|
||||
|
||||
return process(count, parameters)
|
||||
end.
|
||||
Reference in New Issue
Block a user