Merge GCC frontend into the branch

This commit is contained in:
2025-12-02 10:22:06 +01:00
parent 5f7d839741
commit 23b6f074c7
53 changed files with 13330 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
DEFINITION MODULE CommandLineInterface;
FROM Common IMPORT ShortString;
TYPE
CommandLine = RECORD
input: ShortString;
output: ShortString;
lex: BOOLEAN;
parse: BOOLEAN
END;
PCommandLine = POINTER TO CommandLine;
PROCEDURE parse_command_line(): PCommandLine;
END CommandLineInterface.

12
source/Common.def Normal file
View File

@@ -0,0 +1,12 @@
DEFINITION MODULE Common;
TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common.

107
source/Lexer.def Normal file
View File

@@ -0,0 +1,107 @@
DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE
PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD
input: File;
buffer: PLexerBuffer;
size: CARDINAL;
length: CARDINAL;
start: BufferPosition;
current: BufferPosition
END;
PLexer = POINTER TO Lexer;
LexerKind = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
LexerToken = RECORD
CASE kind: LexerKind OF
lexerKindBoolean: booleanKind: BOOLEAN |
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString
END;
start_location: TextLocation;
end_location: TextLocation
END;
PLexerToken = POINTER TO LexerToken;
PROCEDURE lexer_initialize(lexer: PLexer; input: File);
PROCEDURE lexer_destroy(lexer: PLexer);
(* Returns the last read token. *)
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
(* Read and return the next token. *)
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
END Lexer.

200
source/Parser.def Normal file
View File

@@ -0,0 +1,200 @@
DEFINITION MODULE Parser;
FROM Common IMPORT Identifier, PIdentifier, ShortString;
FROM Lexer IMPORT PLexer;
TYPE
Parser = RECORD
lexer: PLexer
END;
PParser = POINTER TO Parser;
AstLiteralKind = (
astLiteralKindInteger,
astLiteralKindString,
astLiteralKindNull,
astLiteralKindBoolean
);
AstLiteral = RECORD
CASE kind: AstLiteralKind OF
astLiteralKindInteger: integer: INTEGER |
astLiteralKindString: string: ShortString |
astLiteralKindNull: |
astLiteralKindBoolean: boolean: BOOLEAN
END
END;
PAstLiteral = POINTER TO AstLiteral;
AstUnaryOperator = (
astUnaryOperatorReference,
astUnaryOperatorNot,
astUnaryOperatorMinus
);
AstBinaryOperator = (
astBinaryOperatorSum,
astBinaryOperatorSubtraction,
astBinaryOperatorMultiplication,
astBinaryOperatorDivision,
astBinaryOperatorRemainder,
astBinaryOperatorEquals,
astBinaryOperatorNotEquals,
astBinaryOperatorLess,
astBinaryOperatorGreater,
astBinaryOperatorLessEqual,
astBinaryOperatorGreaterEqual,
astBinaryOperatorDisjunction,
astBinaryOperatorConjunction,
astBinaryOperatorExclusiveDisjunction,
astBinaryOperatorShiftLeft,
astBinaryOperatorShiftRight
);
AstExpressionKind = (
astExpressionKindLiteral,
astExpressionKindIdentifier,
astExpressionKindArrayAccess,
astExpressionKindDereference,
astExpressionKindFieldAccess,
astExpressionKindUnary,
astExpressionKindBinary,
astExpressionKindCall
);
AstExpression = RECORD
CASE kind: AstExpressionKind OF
astExpressionKindLiteral: literal: PAstLiteral |
astExpressionKindIdentifier: identifier: Identifier |
astExpressionKindDereference: reference: PAstExpression |
astExpressionKindArrayAccess:
array: PAstExpression;
index: PAstExpression |
astExpressionKindFieldAccess:
aggregate: PAstExpression;
field: Identifier |
astExpressionKindUnary:
unary_operator: AstUnaryOperator;
unary_operand: PAstExpression |
astExpressionKindBinary:
binary_operator: AstBinaryOperator;
lhs: PAstExpression;
rhs: PAstExpression |
astExpressionKindCall:
callable: PAstExpression;
argument_count: CARDINAL;
arguments: PPAstExpression
END
END;
PAstExpression = POINTER TO AstExpression;
PPAstExpression = POINTER TO PAstExpression;
AstStatementKind = (
astStatementKindIf,
astStatementKindWhile,
astStatementKindAssignment,
astStatementKindReturn,
astStatementKindCall
);
AstStatement = RECORD
CASE kind: AstStatementKind OF
astStatementKindIf:
if_condition: PAstExpression;
if_branch: AstCompoundStatement |
astStatementKindWhile:
while_condition: PAstExpression;
while_body: AstCompoundStatement |
astStatementKindAssignment:
assignee: PAstExpression;
assignment: PAstExpression |
astStatementKindReturn: returned: PAstExpression |
astStatementKindCall: call: PAstExpression
END
END;
PAstStatement = POINTER TO AstStatement;
PPAstStatement = POINTER TO PAstStatement;
AstCompoundStatement = RECORD
count: CARDINAL;
statements: PPAstStatement
END;
AstImportStatement = RECORD
package: Identifier;
symbols: PIdentifier
END;
PAstImportStatement = POINTER TO AstImportStatement;
PPAstImportStatement = POINTER TO PAstImportStatement;
AstConstantDeclaration = RECORD
constant_name: Identifier;
constant_value: INTEGER
END;
PAstConstantDeclaration = POINTER TO AstConstantDeclaration;
PPAstConstantDeclaration = POINTER TO PAstConstantDeclaration;
AstFieldDeclaration = RECORD
field_name: Identifier;
field_type: PAstTypeExpression
END;
PAstFieldDeclaration = POINTER TO AstFieldDeclaration;
AstTypeExpressionKind = (
astTypeExpressionKindNamed,
astTypeExpressionKindRecord,
astTypeExpressionKindEnumeration,
astTypeExpressionKindArray,
astTypeExpressionKindPointer,
astTypeExpressionKindProcedure
);
AstTypeExpression = RECORD
CASE kind: AstTypeExpressionKind OF
astTypeExpressionKindNamed: name: Identifier |
astTypeExpressionKindEnumeration: cases: PIdentifier |
astTypeExpressionKindPointer: target: PAstTypeExpression |
astTypeExpressionKindRecord: fields: PAstFieldDeclaration |
astTypeExpressionKindArray:
base: PAstTypeExpression;
length: CARDINAL |
astTypeExpressionKindProcedure: parameters: PPAstTypeExpression
END
END;
PAstTypeExpression = POINTER TO AstTypeExpression;
PPAstTypeExpression = POINTER TO PAstTypeExpression;
AstTypedDeclaration = RECORD
identifier: Identifier;
type_expression: PAstTypeExpression
END;
PAstTypedDeclaration = POINTER TO AstTypedDeclaration;
PPAstTypedDeclaration = POINTER TO PAstTypedDeclaration;
AstVariableDeclaration = RECORD
variable_name: Identifier;
variable_type: PAstTypeExpression
END;
PAstVariableDeclaration = POINTER TO AstVariableDeclaration;
PPAstVariableDeclaration = POINTER TO PAstVariableDeclaration;
AstProcedureDeclaration = RECORD
name: Identifier;
parameter_count: CARDINAL;
parameters: PAstTypedDeclaration;
return_type: PAstTypeExpression;
constants: PPAstConstantDeclaration;
variables: PPAstVariableDeclaration;
statements: AstCompoundStatement
END;
PAstProcedureDeclaration = POINTER TO AstProcedureDeclaration;
PPAstProcedureDeclaration = POINTER TO PAstProcedureDeclaration;
AstModule = RECORD
main: BOOLEAN;
imports: PPAstImportStatement;
constants: PPAstConstantDeclaration;
types: PPAstTypedDeclaration;
variables: PPAstVariableDeclaration;
procedures: PPAstProcedureDeclaration;
statements: AstCompoundStatement
END;
PAstModule = POINTER TO AstModule;
PROCEDURE parse(lexer: PLexer): PAstModule;
END Parser.

1174
source/Parser.elna Normal file

File diff suppressed because it is too large Load Diff

20
source/Transpiler.def Normal file
View File

@@ -0,0 +1,20 @@
DEFINITION MODULE Transpiler;
FROM FIO IMPORT File;
FROM Common IMPORT ShortString;
FROM Lexer IMPORT PLexer, Lexer;
FROM Parser IMPORT PAstModule;
TYPE
TranspilerContext = RECORD
input_name: ShortString;
output: File;
definition: File;
indentation: CARDINAL
END;
PTranspilerContext = POINTER TO TranspilerContext;
PROCEDURE transpile(ast_module: PAstModule; output: File; definition: File; input_name: ShortString);
END Transpiler.

631
source/Transpiler.elna Normal file
View File

@@ -0,0 +1,631 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
from FIO import File, WriteNBytes, WriteLine, WriteChar, WriteString;
from NumberIO import IntToStr;
import common, Parser;
type
TranspilerContext* = record
input_name: String;
output: File;
definition: File;
indentation: Word
end;
proc indent(context: ^TranspilerContext);
var
count: Word;
begin
count := 0;
while count < context^.indentation do
WriteString(context^.output, " ");
count := count + 1u
end
end;
(* Write a semicolon followed by a newline. *)
proc write_semicolon(output: File);
begin
WriteChar(output, ';');
WriteLine(output)
end;
proc transpile_import_statement(context: ^TranspilerContext, import_statement: ^AstImportStatement);
var
current_symbol: ^Identifier;
begin
WriteString(context^.output, "FROM ");
transpile_identifier(context, import_statement^.package);
WriteString(context^.output, " IMPORT ");
current_symbol := import_statement^.symbols;
transpile_identifier(context, current_symbol^);
current_symbol := current_symbol + 1;
while current_symbol^[1] <> '\0' do
WriteString(context^.output, ", ");
transpile_identifier(context, current_symbol^);
current_symbol := current_symbol + 1;
end;
write_semicolon(context^.output)
end;
proc transpile_import_part(context: ^TranspilerContext, imports: ^^AstImportStatement);
var
import_statement: ^AstImportStatement;
begin
while imports^ <> nil do
transpile_import_statement(context, imports^);
imports := imports + 1
end;
WriteLine(context^.output)
end;
proc transpile_constant_declaration(context: ^TranspilerContext, declaration: ^AstConstantDeclaration);
var
buffer: [20]Char;
begin
WriteString(context^.output, " ");
transpile_identifier(context, declaration^.constant_name);
WriteString(context^.output, " = ");
IntToStr(declaration^.constant_value, 0, buffer);
WriteString(context^.output, buffer);
write_semicolon(context^.output)
end;
proc transpile_constant_part(context: ^TranspilerContext, declarations: ^^AstConstantDeclaration, extra_newline: Bool);
var
current_declaration: ^^AstConstantDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, "CONST");
WriteLine(context^.output);
current_declaration := declarations;
while current_declaration^ <> nil do
transpile_constant_declaration(context, current_declaration^);
current_declaration := current_declaration + 1
end;
if extra_newline then
WriteLine(context^.output)
end
end
end;
proc transpile_module(context: ^TranspilerContext, result: ^AstModule);
begin
if result^.main = false then
WriteString(context^.output, "IMPLEMENTATION ")
end;
WriteString(context^.output, "MODULE ");
(* Write the module name and end the line with a semicolon and newline. *)
transpile_module_name(context);
write_semicolon(context^.output);
WriteLine(context^.output);
(* Write the module body. *)
transpile_import_part(context, result^.imports);
transpile_constant_part(context, result^.constants, true);
transpile_type_part(context, result^.types);
transpile_variable_part(context, result^.variables, true);
transpile_procedure_part(context, result^.procedures);
transpile_statement_part(context, result^.statements);
WriteString(context^.output, "END ");
transpile_module_name(context);
WriteChar(context^.output, ".");
WriteLine(context^.output)
end;
proc transpile_type_fields(context: ^TranspilerContext, fields: ^AstFieldDeclaration);
var
current_field: ^AstFieldDeclaration;
begin
current_field := fields;
while current_field^.field_name[1] <> '\0' do
WriteString(context^.output, " ");
transpile_identifier(context, current_field^.field_name);
WriteString(context^.output, ": ");
transpile_type_expression(context, current_field^.field_type);
current_field := current_field + 1;
if current_field^.field_name[1] <> '\0' then
WriteChar(context^.output, ';')
end;
WriteLine(context^.output)
end
end;
proc transpile_record_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
begin
WriteString(context^.output, "RECORD");
WriteLine(context^.output);
transpile_type_fields(context, type_expression^.fields);
WriteString(context^.output, " END")
end;
proc transpile_pointer_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
begin
WriteString(context^.output, "POINTER TO ");
transpile_type_expression(context, type_expression^.target)
end;
proc transpile_array_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
var
buffer: [20]Char;
begin
WriteString(context^.output, "ARRAY");
if type_expression^.length <> 0 then
WriteString(context^.output, "[1..");
IntToStr(type_expression^.length, 0, buffer);
WriteString(context^.output, buffer);
WriteChar(context^.output, ']')
end;
WriteString(context^.output, " OF ");
transpile_type_expression(context, type_expression^.base)
end;
proc transpile_enumeration_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
var
current_case: ^Identifier;
begin
current_case := type_expression^.cases;
WriteString(context^.output, "(");
WriteLine(context^.output);
WriteString(context^.output, " ");
transpile_identifier(context, current_case^);
current_case := current_case + 1;
while current_case^[1] <> '\0' do
WriteChar(context^.output, ',');
WriteLine(context^.output);
WriteString(context^.output, " ");
transpile_identifier(context, current_case^);
current_case := current_case + 1
end;
WriteLine(context^.output);
WriteString(context^.output, " )")
end;
proc transpile_identifier(context: ^TranspilerContext, identifier: Identifier);
var
written_bytes: Word;
begin
written_bytes := WriteNBytes(context^.output, cast(identifier[1]: Word), @identifier[2])
end;
proc transpile_procedure_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
var
result: ^AstTypeExpression;
current_parameter: ^^AstTypeExpression;
parameter_count: Word;
begin
WriteString(context^.output, "PROCEDURE(");
current_parameter := type_expression^.parameters;
while current_parameter^ <> nil do
transpile_type_expression(context, current_parameter^);
current_parameter := current_parameter + 1;
if current_parameter^ <> nil then
WriteString(context^.output, ", ")
end
end;
WriteChar(context^.output, ')')
end;
proc transpile_type_expression(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
begin
if type_expression^.kind = astTypeExpressionKindRecord then
transpile_record_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindEnumeration then
transpile_enumeration_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindArray then
transpile_array_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindPointer then
transpile_pointer_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindProcedure then
transpile_procedure_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindNamed then
transpile_identifier(context, type_expression^.name)
end
end;
proc transpile_type_declaration(context: ^TranspilerContext, declaration: ^AstTypedDeclaration);
var
written_bytes: Word;
begin
WriteString(context^.output, " ");
transpile_identifier(context^.output, declaration^.identifier);
WriteString(context^.output, " = ");
transpile_type_expression(context, declaration^.type_expression);
write_semicolon(context^.output)
end;
proc transpile_type_part(context: ^TranspilerContext, declarations: ^^AstTypedDeclaration);
var
current_declaration: ^^AstTypedDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, "TYPE");
WriteLine(context^.output);
current_declaration := declarations;
while current_declaration^ <> nil do
transpile_type_declaration(context, current_declaration^);
current_declaration := current_declaration + 1
end;
WriteLine(context^.output)
end
end;
proc transpile_variable_declaration(context: ^TranspilerContext, declaration: ^AstVariableDeclaration);
begin
WriteString(context^.output, " ");
transpile_identifier(context, declaration^.variable_name);
WriteString(context^.output, ": ");
transpile_type_expression(context, declaration^.variable_type);
write_semicolon(context^.output)
end;
proc transpile_variable_part(context: ^TranspilerContext, declarations: ^^AstVariableDeclaration, extra_newline: Bool);
var
current_declaration: ^^AstVariableDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, "VAR");
WriteLine(context^.output);
current_declaration := declarations;
while current_declaration^ <> nil do
transpile_variable_declaration(context, current_declaration^);
current_declaration := current_declaration + 1
end;
if extra_newline then
WriteLine(context^.output)
end
end
end;
proc transpile_procedure_heading(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration);
var
parameter_index: Word;
current_parameter: ^AstTypedDeclaration;
begin
WriteString(context^.output, "PROCEDURE ");
transpile_identifier(context, declaration^.name);
WriteChar(context^.output, '(');
parameter_index := 0;
current_parameter := declaration^.parameters;
while parameter_index < declaration^.parameter_count do
transpile_identifier(context, current_parameter^.identifier);
WriteString(context^.output, ": ");
transpile_type_expression(context, current_parameter^.type_expression);
parameter_index := parameter_index + 1u;
current_parameter := current_parameter + 1;
if parameter_index <> declaration^.parameter_count then
WriteString(context^.output, "; ")
end
end;
WriteString(context^.output, ")");
(* Check for the return type and write it. *)
if declaration^.return_type <> nil then
WriteString(context^.output, ": ");
transpile_type_expression(context, declaration^.return_type)
end;
write_semicolon(context^.output)
end;
proc transpile_unary_operator(context: ^TranspilerContext, operator: AstUnaryOperator);
begin
if operator = AstUnaryOperator.minus then
WriteChar(context^.output, '-')
end;
if operator = AstUnaryOperator.not then
WriteChar(context^.output, '~')
end
end;
proc transpile_binary_operator(context: ^TranspilerContext, operator: AstBinaryOperator);
begin
case operator of
AstBinaryOperator.sum: WriteChar(context^.output, '+')
| AstBinaryOperator.subtraction: WriteChar(context^.output, '-')
| AstBinaryOperator.multiplication: WriteChar(context^.output, '*')
| AstBinaryOperator.equals: WriteChar(context^.output, '=')
| AstBinaryOperator.not_equals: WriteChar(context^.output, '#')
| AstBinaryOperator.less: WriteChar(context^.output, '<')
| AstBinaryOperator.greater: WriteChar(context^.output, '>')
| AstBinaryOperator.less_equal: WriteString(context^.output, "<=")
| AstBinaryOperator.greater_equal: WriteString(context^.output, ">=")
| AstBinaryOperator.disjunction: WriteString(context^.output, "OR")
| AstBinaryOperatorConjunction: WriteString(context^.output, "AND")
end
end;
proc transpile_expression(context: ^TranspilerContext, expression: ^AstExpression);
var
literal: ^AstLiteral;
buffer: [20]Char;
argument_index: Word;
current_argument: ^^AstExpression;
begin
if expression^.kind = astExpressionKindLiteral then
literal := expression^.literal;
if literal^.kind = AstLiteralKind.integer then
IntToStr(literal^.integer, 0, buffer);
WriteString(context^.output, buffer)
end;
if literal^.kind = AstLiteralKind.string then
WriteString(context^.output, literal^.string)
end;
if literal^.kind = AstLiteralKind.null then
WriteString(context^.output, "NIL")
end;
if (literal^.kind = AstLiteralKind.boolean) & literal^.boolean then
WriteString(context^.output, "TRUE")
end;
if (literal^.kind = AstLiteralKind.boolean) & (literal^.boolean = false) then
WriteString(context^.output, "FALSE")
end
end;
if expression^.kind = astExpressionKindIdentifier then
transpile_identifier(context, expression^.identifier)
end;
if expression^.kind = astExpressionKindDereference then
transpile_expression(context, expression^.reference);
WriteChar(context^.output, '^')
end;
if expression^.kind = astExpressionKindArrayAccess then
transpile_expression(context, expression^.array);
WriteChar(context^.output, '[');
transpile_expression(context, expression^.index);
WriteChar(context^.output, ']')
end;
if expression^.kind = astExpressionKindFieldAccess then
transpile_expression(context, expression^.aggregate);
WriteChar(context^.output, '.');
transpile_identifier(contextexpression^.field)
end;
if expression^.kind = astExpressionKindUnary then
transpile_unary_operator(context, expression^.unary_operator);
transpile_expression(context, expression^.unary_operand)
end;
if expression^.kind = astExpressionKindBinary then
WriteChar(context^.output, '(');
transpile_expression(context, expression^.lhs);
WriteChar(context^.output, ' ');
transpile_binary_operator(context, expression^.binary_operator);
WriteChar(context^.output, ' ');
transpile_expression(context, expression^.rhs);
WriteChar(context^.output, ')')
end;
if expression^.kind = astExpressionKindCall then
transpile_expression(context, expression^.callable);
WriteChar(context^.output, '(');
current_argument := expression^.arguments;
if expression^.argument_count > 0 then
transpile_expression(context, current_argument^);
argument_index := 1u;
current_argument := current_argument + 1;
while argument_index < expression^.argument_count do
WriteString(context^.output, ", ");
transpile_expression(context, current_argument^);
current_argument := current_argument + 1;
argument_index := argument_index + 1u
end
end;
WriteChar(context^.output, ')')
end
end;
proc transpile_if_statement(context: ^TranspilerContext, statement: ^AstStatement);
begin
WriteString(context^.output, "IF ");
transpile_expression(context, statement^.if_condition);
WriteString(context^.output, " THEN");
WriteLine(context^.output);
context^.indentation := context^.indentation + 1u;
transpile_compound_statement(context, statement^.if_branch);
context^.indentation := context^.indentation - 1u;
indent(context);
WriteString(context^.output, "END")
end;
proc transpile_while_statement(context: ^TranspilerContext, statement: ^AstStatement);
begin
WriteString(context^.output, "WHILE ");
transpile_expression(context, statement^.while_condition);
WriteString(context^.output, " DO");
WriteLine(context^.output);
context^.indentation := context^.indentation + 1u;
transpile_compound_statement(context, statement^.while_body);
context^.indentation := context^.indentation - 1u;
indent(context);
WriteString(context^.output, "END")
end;
proc transpile_assignment_statement(context: ^TranspilerContext, statement: ^AstStatement);
begin
transpile_expression(context, statement^.assignee);
WriteString(context^.output, " := ");
transpile_expression(context, statement^.assignment)
end;
proc transpile_return_statement(context: ^TranspilerContext, statement: ^AstStatement);
begin
WriteString(context^.output, "RETURN ");
transpile_expression(context, statement^.returned)
end;
proc transpile_compound_statement(context: ^TranspilerContext, statement: AstCompoundStatement);
var
current_statement: ^^AstStatement;
index: Word;
begin
index := 0;
current_statement := statement.statements;
while index < statement.count do
transpile_statement(context, current_statement^);
current_statement := current_statement + 1;
index := index + 1u;
if index <> statement.count then
WriteChar(context^.output, ';')
end;
WriteLine(context^.output)
end
end;
proc transpile_statement(context: ^TranspilerContext, statement: ^AstStatement);
begin
indent(context);
if statement^.kind = astStatementKindIf then
transpile_if_statement(context, statement)
end;
if statement^.kind = astStatementKindWhile then
transpile_while_statement(context, statement)
end;
if statement^.kind = astStatementKindReturn then
transpile_return_statement(context, statement)
end;
if statement^.kind = astStatementKindAssignment then
transpile_assignment_statement(context, statement)
end;
if statement^.kind = astStatementKindCall then
transpile_expression(context, statement^.call)
end
end;
proc transpile_statement_part(context: ^TranspilerContext, compound: AstCompoundStatement);
begin
if compound.count > 0 then
WriteString(context^.output, "BEGIN");
WriteLine(context^.output);
context^.indentation := context^.indentation + 1u;
transpile_compound_statement(context, compound);
context^.indentation := context^.indentation - 1u;
end
end;
proc transpile_procedure_declaration(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration);
begin
transpile_procedure_heading(context, declaration);
transpile_constant_part(context, declaration^.constants, false);
transpile_variable_part(context, declaration^.variables, false);
transpile_statement_part(context, declaration^.statements);
WriteString(context^.output, "END ");
transpile_identifier(context^.output, declaration^.name);
write_semicolon(context^.output)
end;
proc transpile_procedure_part(context: ^TranspilerContext, declaration: ^^AstProcedureDeclaration);
begin
while declaration^ <> nil do
transpile_procedure_declaration(context, declaration^);
WriteLine(context^.output);
declaration := declaration + 1
end
end;
proc transpile_module_name(context: ^TranspilerContext);
var
counter: Word;
last_slash: Word;
begin
counter := 1u;
last_slash := 0u;
while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do
if context^.input_name[counter] = '/' then
last_slash := counter
end;
counter := counter + 1u
end;
if last_slash = 0u then
counter := 1u
end;
if last_slash <> 0u then
counter := last_slash + 1u
end;
while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do
WriteChar(context^.output, context^.input_name[counter]);
counter := counter + 1u
end
end;
proc transpile*(ast_module: ^AstModule, output: File, definition: File, input_name: String);
var
context: TranspilerContext;
begin
context.input_name := input_name;
context.output := output;
context.definition := definition;
context.indentation := 0u;
transpile_module(@context, ast_module)
end;
end.

14
source/cctype.elna Normal file
View File

@@ -0,0 +1,14 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
proc isdigit*(c: Int ) -> Int; extern;
proc isalnum*(c: Int) -> Int; extern;
proc isalpha*(c: Int) -> Int; extern;
proc isspace*(c: Int) -> Int; extern;
proc tolower*(c: Int) -> Int; extern;
proc toupper*(c: Int) -> Int; extern;
end.

View File

@@ -0,0 +1,93 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
(*
Command line handling.
*)
module;
import cstdlib, cstring, common;
type
CommandLine* = record
input: ^Char;
output: ^Char;
lex: Bool;
parse: Bool
end;
proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
var
parameter: ^Char;
i: Int;
result: ^CommandLine;
parsed: Bool;
begin
i := 1;
result := cast(malloc(#size(CommandLine)): ^CommandLine);
result^.lex := false;
result^.parse := false;
result^.input := nil;
result^.output := nil;
while i < argc & result <> nil do
parameter := (argv + i)^;
parsed := false;
if strcmp(parameter, "--lex\0".ptr) = 0 then
parsed := true;
result^.lex := true
end;
if strcmp(parameter, "--parse\0".ptr) = 0 then
parsed := true;
result^.parse := true
end;
if strcmp(parameter, "-o\0".ptr) = 0 then
i := i + 1;
if i = argc then
write_s("Fatal error: expecting a file name following -o.");
result := nil
end;
if i < argc then
parameter := (argv + i)^;
result^.output := parameter
end;
parsed := true
end;
if (parameter^ <> '-') & ~parsed then
parsed := true;
if result^.input <> nil then
write_s("Fatal error: only one source file can be compiled at once. First given \"");
write_z(result^.input);
write_s("\", then \"");
write_z(parameter);
write_s("\".\n");
result := nil
end;
if result <> nil then
result^.input := parameter
end
end;
if ~parsed then
write_s("Fatal error: unknown command line options: ");
write_z(parameter);
write_s(".\n");
result := nil
end;
i := i + 1
end;
if result <> nil & result^.input = nil then
write_s("Fatal error: no input files.\n");
result := nil
end;
return result
end;
end.

72
source/common.elna Normal file
View File

@@ -0,0 +1,72 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
import cstring, cstdio;
type
Identifier = [256]Char;
TextLocation* = record
line: Word;
column: Word
end;
proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern;
proc write_s*(value: String);
begin
(* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *)
write(1, cast(value.ptr: Pointer), cast(value.length: Int))
end;
proc write_z*(value: ^Char);
begin
write(1, cast(value: Pointer), cast(strlen(value): Int))
end;
proc write_b*(value: Bool);
begin
if value then
write_s("true")
else
write_s("false")
end
end;
proc write_c*(value: Char);
begin
putchar(cast(value: Int));
fflush(nil)
end;
proc write_i*(value: Int);
var
digit: Int;
n: Word;
buffer: [10]Char;
begin
n := 10u;
if value = 0 then
write_c('0')
end;
while value <> 0 do
digit := value % 10;
value := value / 10;
buffer[n] := cast(cast('0': Int) + digit: Char);
n := n - 1u
end;
while n < 10u do
n := n + 1u;
write_c(buffer[n])
end
end;
proc write_u*(value: Word);
begin
write_i(cast(value: Int))
end;
end.

29
source/cstdio.elna Normal file
View File

@@ -0,0 +1,29 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
type
FILE* = record end;
var
stdin*: ^FILE := extern;
stdout*: ^FILE := extern;
stderr*: ^FILE := extern;
proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern;
proc fclose*(stream: ^FILE) -> Int; extern;
proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern;
proc rewind*(stream: ^FILE); extern;
proc ftell*(stream: ^FILE) -> Int; extern;
proc fflush*(stream: ^FILE) -> Int; extern;
proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern;
proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern;
proc perror(s: ^Char); extern;
proc puts(s: ^Char) -> Int; extern;
proc putchar(c: Int) -> Int; extern;
end.

15
source/cstdlib.elna Normal file
View File

@@ -0,0 +1,15 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
proc malloc(size: Word) -> Pointer; extern;
proc free(ptr: Pointer); extern;
proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
proc atoi(str: ^Char) -> Int; extern;
proc exit(code: Int) -> !; extern;
end.

15
source/cstring.elna Normal file
View File

@@ -0,0 +1,15 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern;
proc memcpy(dst: Pointer, src: Pointer, n: Word); extern;
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern;
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern;
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern;
proc strlen(ptr: ^Char) -> Word; extern;
end.

952
source/lexer.elna Normal file
View File

@@ -0,0 +1,952 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
module;
import cstdio, cstring, cctype, cstdlib, common;
const
CHUNK_SIZE := 85536u;
type
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group are handled equivalently.
*
* Classification:
*)
TransitionClass = (
invalid,
digit,
alpha,
space,
colon,
equals,
left_paren,
right_paren,
asterisk,
underscore,
single,
hex,
zero,
x,
eof,
dot,
minus,
single_quote,
double_quote,
greater,
less,
other
);
TransitionState = (
start,
colon,
identifier,
decimal,
greater,
minus,
left_paren,
less,
dot,
comment,
closing_comment,
character,
string,
leading_zero,
decimal_suffix,
finish
);
LexerToken = record
kind: LexerKind;
value: union
booleanKind: Bool;
identifierKind: Identifier;
integerKind: Int;
stringKind: String
end;
start_location: TextLocation;
end_location: TextLocation
end;
TransitionAction = proc(^Lexer, ^LexerToken);
Transition = record
action: TransitionAction;
next_state: TransitionState
end;
TransitionClasses = [22]Transition;
BufferPosition* = record
iterator: ^Char;
location: TextLocation
end;
Lexer* = record
input: ^FILE;
buffer: ^Char;
size: Word;
length: Word;
start: BufferPosition;
current: BufferPosition
end;
LexerKind* = (
unknown,
identifier,
_if,
_then,
_else,
_elsif,
_while,
_do,
_proc,
_begin,
_end,
_extern,
_const,
_var,
_case,
_of,
_type,
_record,
_union,
pipe,
to,
boolean,
null,
and,
_or,
_xor,
not,
_return,
_cast,
shift_left,
shift_right,
left_paren,
right_paren,
left_square,
right_square,
greater_equal,
less_equal,
greater_than,
less_than,
not_equal,
equal,
semicolon,
dot,
comma,
plus,
minus,
multiplication,
division,
remainder,
assignment,
colon,
hat,
at,
comment,
integer,
word,
character,
string,
_defer,
exclamation,
arrow,
trait,
_program,
_module,
_import
);
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: Word;
begin
classification[1] := TransitionClass.eof; (* NUL *)
classification[2] := TransitionClass.invalid; (* SOH *)
classification[3] := TransitionClass.invalid; (* STX *)
classification[4] := TransitionClass.invalid; (* ETX *)
classification[5] := TransitionClass.invalid; (* EOT *)
classification[6] := TransitionClass.invalid; (* EMQ *)
classification[7] := TransitionClass.invalid; (* ACK *)
classification[8] := TransitionClass.invalid; (* BEL *)
classification[9] := TransitionClass.invalid; (* BS *)
classification[10] := TransitionClass.space; (* HT *)
classification[11] := TransitionClass.space; (* LF *)
classification[12] := TransitionClass.invalid; (* VT *)
classification[13] := TransitionClass.invalid; (* FF *)
classification[14] := TransitionClass.space; (* CR *)
classification[15] := TransitionClass.invalid; (* SO *)
classification[16] := TransitionClass.invalid; (* SI *)
classification[17] := TransitionClass.invalid; (* DLE *)
classification[18] := TransitionClass.invalid; (* DC1 *)
classification[19] := TransitionClass.invalid; (* DC2 *)
classification[20] := TransitionClass.invalid; (* DC3 *)
classification[21] := TransitionClass.invalid; (* DC4 *)
classification[22] := TransitionClass.invalid; (* NAK *)
classification[23] := TransitionClass.invalid; (* SYN *)
classification[24] := TransitionClass.invalid; (* ETB *)
classification[25] := TransitionClass.invalid; (* CAN *)
classification[26] := TransitionClass.invalid; (* EM *)
classification[27] := TransitionClass.invalid; (* SUB *)
classification[28] := TransitionClass.invalid; (* ESC *)
classification[29] := TransitionClass.invalid; (* FS *)
classification[30] := TransitionClass.invalid; (* GS *)
classification[31] := TransitionClass.invalid; (* RS *)
classification[32] := TransitionClass.invalid; (* US *)
classification[33] := TransitionClass.space; (* Space *)
classification[34] := TransitionClass.single; (* ! *)
classification[35] := TransitionClass.double_quote; (* " *)
classification[36] := TransitionClass.other; (* # *)
classification[37] := TransitionClass.other; (* $ *)
classification[38] := TransitionClass.single; (* % *)
classification[39] := TransitionClass.single; (* & *)
classification[40] := TransitionClass.single_quote; (* ' *)
classification[41] := TransitionClass.left_paren; (* ( *)
classification[42] := TransitionClass.right_paren; (* ) *)
classification[43] := TransitionClass.asterisk; (* * *)
classification[44] := TransitionClass.single; (* + *)
classification[45] := TransitionClass.single; (* , *)
classification[46] := TransitionClass.minus; (* - *)
classification[47] := TransitionClass.dot; (* . *)
classification[48] := TransitionClass.single; (* / *)
classification[49] := TransitionClass.zero; (* 0 *)
classification[50] := TransitionClass.digit; (* 1 *)
classification[51] := TransitionClass.digit; (* 2 *)
classification[52] := TransitionClass.digit; (* 3 *)
classification[53] := TransitionClass.digit; (* 4 *)
classification[54] := TransitionClass.digit; (* 5 *)
classification[55] := TransitionClass.digit; (* 6 *)
classification[56] := TransitionClass.digit; (* 7 *)
classification[57] := TransitionClass.digit; (* 8 *)
classification[58] := TransitionClass.digit; (* 9 *)
classification[59] := TransitionClass.colon; (* : *)
classification[60] := TransitionClass.single; (* ; *)
classification[61] := TransitionClass.less; (* < *)
classification[62] := TransitionClass.equals; (* = *)
classification[63] := TransitionClass.greater; (* > *)
classification[64] := TransitionClass.other; (* ? *)
classification[65] := TransitionClass.single; (* @ *)
classification[66] := TransitionClass.alpha; (* A *)
classification[67] := TransitionClass.alpha; (* B *)
classification[68] := TransitionClass.alpha; (* C *)
classification[69] := TransitionClass.alpha; (* D *)
classification[70] := TransitionClass.alpha; (* E *)
classification[71] := TransitionClass.alpha; (* F *)
classification[72] := TransitionClass.alpha; (* G *)
classification[73] := TransitionClass.alpha; (* H *)
classification[74] := TransitionClass.alpha; (* I *)
classification[75] := TransitionClass.alpha; (* J *)
classification[76] := TransitionClass.alpha; (* K *)
classification[77] := TransitionClass.alpha; (* L *)
classification[78] := TransitionClass.alpha; (* M *)
classification[79] := TransitionClass.alpha; (* N *)
classification[80] := TransitionClass.alpha; (* O *)
classification[81] := TransitionClass.alpha; (* P *)
classification[82] := TransitionClass.alpha; (* Q *)
classification[83] := TransitionClass.alpha; (* R *)
classification[84] := TransitionClass.alpha; (* S *)
classification[85] := TransitionClass.alpha; (* T *)
classification[86] := TransitionClass.alpha; (* U *)
classification[87] := TransitionClass.alpha; (* V *)
classification[88] := TransitionClass.alpha; (* W *)
classification[89] := TransitionClass.alpha; (* X *)
classification[90] := TransitionClass.alpha; (* Y *)
classification[91] := TransitionClass.alpha; (* Z *)
classification[92] := TransitionClass.single; (* [ *)
classification[93] := TransitionClass.other; (* \ *)
classification[94] := TransitionClass.single; (* ] *)
classification[95] := TransitionClass.single; (* ^ *)
classification[96] := TransitionClass.underscore; (* _ *)
classification[97] := TransitionClass.other; (* ` *)
classification[98] := TransitionClass.hex; (* a *)
classification[99] := TransitionClass.hex; (* b *)
classification[100] := TransitionClass.hex; (* c *)
classification[101] := TransitionClass.hex; (* d *)
classification[102] := TransitionClass.hex; (* e *)
classification[103] := TransitionClass.hex; (* f *)
classification[104] := TransitionClass.alpha; (* g *)
classification[105] := TransitionClass.alpha; (* h *)
classification[106] := TransitionClass.alpha; (* i *)
classification[107] := TransitionClass.alpha; (* j *)
classification[108] := TransitionClass.alpha; (* k *)
classification[109] := TransitionClass.alpha; (* l *)
classification[110] := TransitionClass.alpha; (* m *)
classification[111] := TransitionClass.alpha; (* n *)
classification[112] := TransitionClass.alpha; (* o *)
classification[113] := TransitionClass.alpha; (* p *)
classification[114] := TransitionClass.alpha; (* q *)
classification[115] := TransitionClass.alpha; (* r *)
classification[116] := TransitionClass.alpha; (* s *)
classification[117] := TransitionClass.alpha; (* t *)
classification[118] := TransitionClass.alpha; (* u *)
classification[119] := TransitionClass.alpha; (* v *)
classification[120] := TransitionClass.alpha; (* w *)
classification[121] := TransitionClass.x; (* x *)
classification[122] := TransitionClass.alpha; (* y *)
classification[123] := TransitionClass.alpha; (* z *)
classification[124] := TransitionClass.other; (* { *)
classification[125] := TransitionClass.single; (* | *)
classification[126] := TransitionClass.other; (* } *)
classification[127] := TransitionClass.single; (* ~ *)
classification[128] := TransitionClass.invalid; (* DEL *)
i := 129u;
while i <= 256u do
classification[i] := TransitionClass.other;
i := i + 1u
end
end;
proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
var
result: Bool;
index: Word;
continue: Bool;
begin
index := 0u;
result := true;
continue := (index < keyword.length) & (token_start.iterator <> token_end);
while continue & result do
result := keyword[index] = token_start.iterator^
or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^;
token_start.iterator := token_start.iterator + 1;
index := index + 1u;
continue := (index < keyword.length) & (token_start.iterator <> token_end)
end;
result := result & index = keyword.length;
return result & (token_start.iterator = token_end)
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := LexerKind.unknown
end;
proc increment(position: ^BufferPosition);
begin
position^.iterator := position^.iterator + 1
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
begin
increment(@lexer^.current)
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = ':' then
token^.kind := LexerKind.colon
end;
if lexer^.start.iterator^ = '>' then
token^.kind := LexerKind.greater_than
end;
if lexer^.start.iterator^ = '<' then
token^.kind := LexerKind.less_than
end;
if lexer^.start.iterator^ = '(' then
token^.kind := LexerKind.left_paren
end;
if lexer^.start.iterator^ = '-' then
token^.kind := LexerKind.minus
end;
if lexer^.start.iterator^ = '.' then
token^.kind := LexerKind.dot
end
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then
token^.kind := LexerKind.not_equal
end;
if lexer^.current.iterator^ = '=' then
token^.kind := LexerKind.less_equal
end
end;
if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
token^.kind := LexerKind.greater_equal
end;
if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
token^.kind := LexerKind.assignment
end;
if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
token^.kind := LexerKind.arrow
end;
increment(@lexer^.current)
end;
(* Skip a space. *)
proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
begin
increment(@lexer^.start);
if lexer^.start.iterator^ = '\n' then
lexer^.start.location.line := lexer^.start.location.line + 1u;
lexer^.start.location.column := 1u
end;
lexer^.current := lexer^.start
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
var
text_length: Word;
begin
if lexer^.start.iterator^ = '(' then
token^.kind := LexerKind.comment
end;
if lexer^.start.iterator^ = '"' then
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
token^.kind := LexerKind.character
end;
if lexer^.start.iterator^ = '\'' then
text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
token^.kind := LexerKind.string
end;
increment(@lexer^.current)
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := LexerKind.identifier;
token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word));
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._program
end;
if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._import
end;
if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._const
end;
if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._var
end;
if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._if
end;
if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._then
end;
if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._elsif
end;
if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._else
end;
if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._while
end;
if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._do
end;
if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._proc
end;
if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._begin
end;
if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._end
end;
if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._type
end;
if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._record
end;
if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._union
end;
if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.null
end;
if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._or
end;
if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._return
end;
if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._defer
end;
if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.to
end;
if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._case
end;
if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._of
end;
if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._module
end;
if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind._xor
end;
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.boolean;
token^.value.booleanKind := true
end;
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
token^.kind := LexerKind.boolean;
token^.value.booleanKind := false
end
end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.current.iterator^ = '&' then
token^.kind := LexerKind.and
end;
if lexer^.current.iterator^ = ';' then
token^.kind := LexerKind.semicolon
end;
if lexer^.current.iterator^ = ',' then
token^.kind := LexerKind.comma
end;
if lexer^.current.iterator^ = '~' then
token^.kind := LexerKind.not
end;
if lexer^.current.iterator^ = ')' then
token^.kind := LexerKind.right_paren
end;
if lexer^.current.iterator^ = '[' then
token^.kind := LexerKind.left_square
end;
if lexer^.current.iterator^ = ']' then
token^.kind := LexerKind.right_square
end;
if lexer^.current.iterator^ = '^' then
token^.kind := LexerKind.hat
end;
if lexer^.current.iterator^ = '=' then
token^.kind := LexerKind.equal
end;
if lexer^.current.iterator^ = '+' then
token^.kind := LexerKind.plus
end;
if lexer^.current.iterator^ = '*' then
token^.kind := LexerKind.multiplication
end;
if lexer^.current.iterator^ = '/' then
token^.kind := LexerKind.division
end;
if lexer^.current.iterator^ = '%' then
token^.kind := LexerKind.remainder
end;
if lexer^.current.iterator^ = '@' then
token^.kind := LexerKind.at
end;
if lexer^.current.iterator^ = '|' then
token^.kind := LexerKind.pipe
end;
increment(@lexer^.current)
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
var
buffer: String;
integer_length: Word;
found: Bool;
begin
token^.kind := LexerKind.integer;
integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word);
memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier));
memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length);
token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0';
token^.value.integerKind := atoi(@token^.value.identifierKind[2])
end;
proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int;
var
default_transition: Transition;
state_index: Int;
begin
default_transition.action := default_action;
default_transition.next_state := next_state;
state_index := cast(current_state: Int) + 1;
transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition;
transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition;
return state_index
end;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
proc initialize_transitions();
var
state_index: Int;
begin
(* Start state. *)
state_index := cast(TransitionState.start: Int) + 1;
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip;
transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start;
transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon;
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single;
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren;
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single;
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single;
transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero;
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot;
transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus;
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character;
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string;
transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater;
transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less;
transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish;
(* Colon state. *)
state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish);
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
(* Identifier state. *)
state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish);
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier;
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
(* Decimal state. *)
state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish);
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal;
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix;
(* Greater state. *)
state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish);
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
(* Minus state. *)
state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish);
transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
(* Left paren state. *)
state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish);
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment;
(* Less state. *)
state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish);
transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
(* Hexadecimal after 0x. *)
state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish);
transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite;
transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish;
(* Comment. *)
state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment);
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
(* Closing comment. *)
state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment);
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited;
transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
(* Character. *)
state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character);
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited;
transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish;
(* String. *)
state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string);
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited;
transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish;
(* Leading zero. *)
state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish);
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish;
(* Digit with a character suffix. *)
state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish);
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish
end;
proc lexer_make*(lexer: ^Lexer, input: ^FILE);
begin
lexer^.input := input;
lexer^.length := 0u;
lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char);
memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE);
lexer^.size := CHUNK_SIZE
end;
(* Returns the last read token. *)
proc lexer_current*(lexer: ^Lexer) -> LexerToken;
var
current_class: TransitionClass;
current_state: TransitionState;
current_transition: Transition;
result: LexerToken;
index1: Word;
index2: Word;
begin
lexer^.current := lexer^.start;
current_state := TransitionState.start;
while current_state <> TransitionState.finish do
index1 := cast(lexer^.current.iterator^: Word) + 1u;
current_class := classification[index1];
index1 := cast(current_state: Word) + 1u;
index2 := cast(current_class: Word) + 1u;
current_transition := transitions[index1][index2];
if current_transition.action <> nil then
current_transition.action(lexer, @result)
end;
current_state := current_transition.next_state
end;
result.start_location := lexer^.start.location;
result.end_location := lexer^.current.location;
return result
end;
(* Read and return the next token. *)
proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
var
result: LexerToken;
begin
if lexer^.length = 0u then
lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input);
lexer^.current.location.column := 1u;
lexer^.current.location.line := 1u;
lexer^.current.iterator := lexer^.buffer
end;
lexer^.start := lexer^.current;
result := lexer_current(lexer);
return result
end;
proc lexer_destroy*(lexer: ^Lexer);
begin
free(cast(lexer^.buffer: Pointer))
end;
proc lexer_initialize();
begin
initialize_classification();
initialize_transitions()
end;
end.

841
source/main.elna Normal file
View File

@@ -0,0 +1,841 @@
(* This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can
obtain one at https://mozilla.org/MPL/2.0/. *)
program;
import cstdio, cctype, common, command_line_interface, lexer;
type
SourceFile* = record
buffer: [1024]Char;
handle: ^FILE;
size: Word;
index: Word
end;
StringBuffer* = record
data: Pointer;
size: Word;
capacity: Word
end;
SourceCode = record
position: TextLocation;
input: Pointer;
empty: proc(Pointer) -> Bool;
advance: proc(Pointer);
head: proc(Pointer) -> Char
end;
Token* = record
kind: LexerKind;
value: union
int_value: Int;
string: String;
boolean_value: Bool;
char_value: Char
end
end;
Tokenizer* = record
length: Word;
data: ^Token
end;
(*
Standard procedures.
*)
proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
return realloc(ptr, n * size)
end;
proc substring(string: String, start: Word, count: Word) -> String;
return String(string.ptr + start, count)
end;
proc open_substring(string: String, start: Word) -> String;
return substring(string, start, string.length - start)
end;
proc string_dup(origin: String) -> String;
var
copy: ^Char;
begin
copy := cast(malloc(origin.length): ^Char);
strncpy(copy, origin.ptr, origin.length);
return String(copy, origin.length)
end;
proc string_buffer_new() -> StringBuffer;
var
result: StringBuffer;
begin
result.capacity := 64u;
result.data := malloc(result.capacity);
result.size := 0u;
return result
end;
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
begin
if buffer^.size >= buffer^.capacity then
buffer^.capacity := buffer^.capacity + 1024u;
buffer^.data := realloc(buffer^.data, buffer^.capacity)
end;
cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
buffer^.size := buffer^.size + 1u
end;
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
begin
buffer^.size := buffer^.size - count
end;
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
var
result: String;
begin
result := String(cast(buffer^.data: ^Char), buffer^.size);
buffer^.size := 0u;
return result
end;
(*
Source code stream procedures.
*)
proc read_source(filename: ^Char) -> ^SourceFile;
var
result: ^SourceFile;
file_handle: ^FILE;
begin
file_handle := fopen(filename, "rb\0".ptr);
if file_handle <> nil then
result := cast(malloc(#size(SourceFile)): ^SourceFile);
result^.handle := file_handle;
result^.size := 0u;
result^.index := 1u
end;
return result
end;
proc source_file_empty(source_input: Pointer) -> Bool;
var
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
if source_file^.index > source_file^.size then
source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle);
source_file^.index := 1u
end;
return source_file^.size = 0u
end;
proc source_file_head(source_input: Pointer) -> Char;
var
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
return source_file^.buffer[source_file^.index]
end;
proc source_file_advance(source_input: Pointer);
var
source_file: ^SourceFile;
begin
source_file := cast(source_input: ^SourceFile);
source_file^.index := source_file^.index + 1u
end;
proc source_code_empty(source_code: ^SourceCode) -> Bool;
return source_code^.empty(source_code^.input)
end;
proc source_code_head(source_code: SourceCode) -> Char;
return source_code.head(source_code.input)
end;
proc source_code_advance(source_code: ^SourceCode);
begin
source_code^.advance(source_code^.input);
source_code^.position.column := source_code^.position.column
end;
proc source_code_break(source_code: ^SourceCode);
begin
source_code^.position.line := source_code^.position.line + 1u;
source_code^.position.column := 0u
end;
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
end;
(*
Token procedures.
*)
proc lexer_escape(escape: Char, result: ^Char) -> Bool;
var
successful: Bool;
begin
case escape of
'n':
result^ := '\n';
successful := true
| 'a':
result^ := '\a';
successful := true
| 'b':
result^ := '\b';
successful := true
| 't':
result^ := '\t';
successful := true
| 'f':
result^ := '\f';
successful := true
| 'r':
result^ := '\r';
successful := true
| 'v':
result^ := '\v';
successful := true
| '\\':
result^ := '\\';
successful := true
| '\'':
result^ := '\'';
successful := true
| '"':
result^ := '"';
successful := true
| '?':
result^ := '\?';
successful := true
| '0':
result^ := '\0';
successful := true
else
successful := false
end;
return successful
end;
(* Skip spaces. *)
proc lexer_spaces(source_code: ^SourceCode);
var
current: Char;
begin
while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do
current := source_code_head(source_code^);
if current = '\n' then
source_code_break(source_code)
end;
source_code_advance(source_code)
end
end;
(* Checker whether the character is allowed in an identificator. *)
proc lexer_is_ident(char: Char) -> Bool;
return isalnum(cast(char: Int)) <> 0 or char = '_'
end;
proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
var
content_length: Word;
begin
while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
string_buffer_push(token_content, source_code_head(source_code^));
source_code_advance(source_code)
end
end;
proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
trailing: Word;
begin
trailing := 0u;
while ~source_code_empty(source_code) & trailing < 2u do
if source_code_head(source_code^) = '*' then
string_buffer_push(token_content, '*');
trailing := 1u
elsif source_code_head(source_code^) = ')' & trailing = 1u then
string_buffer_pop(token_content, 1u);
trailing := 2u
else
string_buffer_push(token_content, source_code_head(source_code^));
trailing := 0u
end;
source_code_advance(source_code)
end;
return trailing = 2u
end;
proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
var
successful: Bool;
begin
successful := ~source_code_empty(source_code);
if successful then
if source_code_head(source_code^) = '\\' then
source_code_advance(source_code);
successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content)
else
token_content^ := source_code_head(source_code^);
successful := true
end
end;
if successful then
source_code_advance(source_code)
end;
return successful
end;
proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
token_end, constructed_string: ^Char;
token_length: Word;
is_valid: Bool := true;
next_char: Char;
begin
while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do
is_valid := lexer_character(source_code, @next_char);
if is_valid then
string_buffer_push(token_content, next_char)
end
end;
if is_valid & source_code_expect(source_code, '"') then
source_code_advance(source_code)
else
is_valid := false
end;
return is_valid
end;
proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
begin
token_content^ := 0;
while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int));
source_code_advance(source_code)
end
end;
(* Categorize an identifier. *)
proc lexer_categorize(token_content: String) -> Token;
var
current_token: Token;
begin
if token_content = "if" then
current_token.kind := LexerKind._if
elsif token_content = "then" then
current_token.kind := LexerKind._then
elsif token_content = "else" then
current_token.kind := LexerKind._else
elsif token_content = "elsif" then
current_token.kind := LexerKind._elsif
elsif token_content = "while" then
current_token.kind := LexerKind._while
elsif token_content = "do" then
current_token.kind := LexerKind._do
elsif token_content = "proc" then
current_token.kind := LexerKind._proc
elsif token_content = "begin" then
current_token.kind := LexerKind._begin
elsif token_content = "end" then
current_token.kind := LexerKind._end
elsif token_content = "extern" then
current_token.kind := LexerKind._extern
elsif token_content = "const" then
current_token.kind := LexerKind._const
elsif token_content = "var" then
current_token.kind := LexerKind._var
elsif token_content = "case" then
current_token.kind := LexerKind._case
elsif token_content = "of" then
current_token.kind := LexerKind._of
elsif token_content = "type" then
current_token.kind := LexerKind._type
elsif token_content = "record" then
current_token.kind := LexerKind._record
elsif token_content = "union" then
current_token.kind := LexerKind._union
elsif token_content = "true" then
current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := true
elsif token_content = "false" then
current_token.kind := LexerKind.boolean;
current_token.value.boolean_value := false
elsif token_content = "nil" then
current_token.kind := LexerKind.null
elsif token_content = "or" then
current_token.kind := LexerKind._or
elsif token_content = "return" then
current_token.kind := LexerKind._return
elsif token_content = "cast" then
current_token.kind := LexerKind._cast
elsif token_content = "defer" then
current_token.kind := LexerKind._defer
elsif token_content = "program" then
current_token.kind := LexerKind._program
elsif token_content = "module" then
current_token.kind := LexerKind._module
elsif token_content = "import" then
current_token.kind := LexerKind._import
else
current_token.kind := LexerKind.identifier;
current_token.value.string := string_dup(token_content)
end;
return current_token
end;
proc lexer_add_token(lexer: ^Tokenizer, token: Token);
var
new_length: Word;
begin
new_length := lexer^.length + 1u;
lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
(lexer^.data + lexer^.length)^ := token;
lexer^.length := new_length
end;
(* Read the next token from the input. *)
proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
var
current_token: Token;
first_char: Char;
begin
current_token.kind := LexerKind.unknown;
first_char := source_code_head(source_code);
if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then
lexer_identifier(@source_code, token_buffer);
current_token := lexer_categorize(string_buffer_clear(token_buffer))
elsif first_char = '#' then
source_code_advance(@source_code);
lexer_identifier(@source_code, token_buffer);
current_token.kind := LexerKind.trait;
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
elsif isdigit(cast(first_char: Int)) <> 0 then
lexer_number(@source_code, @current_token.value.int_value);
if source_code_expect(@source_code, 'u') then
current_token.kind := LexerKind.word;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.integer
end
elsif first_char = '(' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.left_paren
elsif source_code_head(source_code) = '*' then
source_code_advance(@source_code);
if lexer_comment(@source_code, token_buffer) then
current_token.value.string := string_dup(string_buffer_clear(token_buffer));
current_token.kind := LexerKind.comment
else
current_token.kind := LexerKind.unknown
end
else
current_token.kind := LexerKind.left_paren
end
elsif first_char = ')' then
current_token.kind := LexerKind.right_paren;
source_code_advance(@source_code)
elsif first_char = '\'' then
source_code_advance(@source_code);
if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
current_token.kind := LexerKind.character;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.unknown
end
elsif first_char = '"' then
source_code_advance(@source_code);
if lexer_string(@source_code, token_buffer) then
current_token.kind := LexerKind.string;
current_token.value.string := string_dup(string_buffer_clear(token_buffer))
else
current_token.kind := LexerKind.unknown
end
elsif first_char = '[' then
current_token.kind := LexerKind.left_square;
source_code_advance(@source_code)
elsif first_char = ']' then
current_token.kind := LexerKind.right_square;
source_code_advance(@source_code)
elsif first_char = '>' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.greater_than
elsif source_code_head(source_code) = '=' then
current_token.kind := LexerKind.greater_equal;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token.kind := LexerKind.shift_right;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.greater_than
end
elsif first_char = '<' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.less_than
elsif source_code_head(source_code) = '=' then
current_token.kind := LexerKind.less_equal;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '<' then
current_token.kind := LexerKind.shift_left;
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token.kind := LexerKind.not_equal;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.less_than
end
elsif first_char = '=' then
current_token.kind := LexerKind.equal;
source_code_advance(@source_code)
elsif first_char = ';' then
current_token.kind := LexerKind.semicolon;
source_code_advance(@source_code)
elsif first_char = '.' then
current_token.kind := LexerKind.dot;
source_code_advance(@source_code)
elsif first_char = ',' then
current_token.kind := LexerKind.comma;
source_code_advance(@source_code)
elsif first_char = '+' then
current_token.kind := LexerKind.plus;
source_code_advance(@source_code)
elsif first_char = '-' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.minus
elsif source_code_head(source_code) = '>' then
current_token.kind := LexerKind.arrow;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.minus
end
elsif first_char = '*' then
current_token.kind := LexerKind.multiplication;
source_code_advance(@source_code)
elsif first_char = '/' then
current_token.kind := LexerKind.division;
source_code_advance(@source_code)
elsif first_char = '%' then
current_token.kind := LexerKind.remainder;
source_code_advance(@source_code)
elsif first_char = ':' then
source_code_advance(@source_code);
if source_code_empty(@source_code) then
current_token.kind := LexerKind.colon
elsif source_code_head(source_code) = '=' then
current_token.kind := LexerKind.assignment;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.colon
end
elsif first_char = '^' then
current_token.kind := LexerKind.hat;
source_code_advance(@source_code)
elsif first_char = '@' then
current_token.kind := LexerKind.at;
source_code_advance(@source_code)
elsif first_char = '!' then
current_token.kind := LexerKind.exclamation;
source_code_advance(@source_code)
elsif first_char = '&' then
current_token.kind := LexerKind.and;
source_code_advance(@source_code)
elsif first_char = '~' then
current_token.kind := LexerKind.not;
source_code_advance(@source_code)
elsif first_char = '|' then
current_token.kind := LexerKind.pipe;
source_code_advance(@source_code)
else
current_token.kind := LexerKind.unknown;
source_code_advance(@source_code)
end;
return current_token
end;
(* Split the source text into tokens. *)
proc lexer_text(source_code: SourceCode) -> Tokenizer;
var
current_token: Token;
token_buffer: StringBuffer;
lexer: Tokenizer;
begin
lexer := Tokenizer(0u, nil);
token_buffer := string_buffer_new();
lexer_spaces(@source_code);
while ~source_code_empty(@source_code) do
current_token := lexer_next(source_code, @token_buffer);
if current_token.kind <> LexerKind.unknown then
lexer_add_token(@lexer, current_token);
lexer_spaces(@source_code)
else
write_s("Lexical analysis error on \"");
write_c(source_code_head(source_code));
write_s("\".\n")
end
end;
return lexer
end;
(*
Parser.
*)
proc parse(tokens: ^Token, tokens_size: Word);
var
current_token: ^Token;
i: Word := 0u;
begin
while i < tokens_size do
current_token := tokens + i;
case current_token^.kind of
LexerKind._if:
write_s("IF")
| LexerKind._then:
write_s("THEN")
| LexerKind._else:
write_s("ELSE")
| LexerKind._elsif:
write_s("ELSIF")
| LexerKind._while:
write_s("WHILE")
| LexerKind._do:
write_s("DO")
| LexerKind._proc:
write_s("PROC")
| LexerKind._begin:
write_s("BEGIN")
| LexerKind._end:
write_s("END")
| LexerKind._extern:
write_s("EXTERN")
| LexerKind._const:
write_s("CONST")
| LexerKind._var:
write_s("VAR")
| LexerKind._case:
write_s("CASE")
| LexerKind._of:
write_s("OF")
| LexerKind._type:
write_s("TYPE")
| LexerKind._record:
write_s("RECORD")
| LexerKind._union:
write_s("UNION")
| LexerKind.pipe:
write_s("|")
| LexerKind.to:
write_s("TO")
| LexerKind.boolean:
write_s("BOOLEAN<");
write_b(current_token^.value.boolean_value);
write_c('>')
| LexerKind.null:
write_s("NIL")
| LexerKind.and:
write_s("&")
| LexerKind._or:
write_s("OR")
| LexerKind.not:
write_s("~")
| LexerKind._return:
write_s("RETURN")
| LexerKind._cast:
write_s("CAST")
| LexerKind.shift_left:
write_s("<<")
| LexerKind.shift_right:
write_s(">>")
| LexerKind.identifier:
write_c('<');
write_s(current_token^.value.string);
write_c('>')
| LexerKind.trait:
write_c('#');
write_s(current_token^.value.string)
| LexerKind.left_paren:
write_s("(")
| LexerKind.right_paren:
write_s(")")
| LexerKind.left_square:
write_s("[")
| LexerKind.right_square:
write_s("]")
| LexerKind.greater_equal:
write_s(">=")
| LexerKind.less_equal:
write_s("<=")
| LexerKind.greater_than:
write_s(">")
| LexerKind.less_than:
write_s("<")
| LexerKind.equal:
write_s("=")
| LexerKind.not_equal:
write_s("<>")
| LexerKind.semicolon:
write_c(';')
| LexerKind.dot:
write_c('.')
| LexerKind.comma:
write_c(',')
| LexerKind.plus:
write_c('+')
| LexerKind.minus:
write_c('-')
| LexerKind.multiplication:
write_c('*')
| LexerKind.division:
write_c('/')
| LexerKind.remainder:
write_c('%')
| LexerKind.assignment:
write_s(":=")
| LexerKind.colon:
write_c(':')
| LexerKind.hat:
write_c('^')
| LexerKind.at:
write_c('@')
| LexerKind.comment:
write_s("(* COMMENT *)")
| LexerKind.integer:
write_c('<');
write_i(current_token^.value.int_value);
write_c('>')
| LexerKind.word:
write_c('<');
write_i(current_token^.value.int_value);
write_s("u>")
| LexerKind.character:
write_c('<');
write_i(cast(current_token^.value.char_value: Int));
write_s("c>")
| LexerKind.string:
write_s("\"...\"")
| LexerKind._defer:
write_s("DEFER")
| LexerKind.exclamation:
write_c('!')
| LexerKind.arrow:
write_s("->")
| LexerKind._program:
write_s("PROGRAM")
| LexerKind._module:
write_s("MODULE")
| LexerKind._import:
write_s("IMPORT")
else
write_s("UNKNOWN<");
write_i(cast(current_token^.kind: Int));
write_c('>')
end;
write_c(' ');
i := i + 1u
end;
write_c('\n')
end;
(*
Compilation entry.
*)
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
var
return_code: Int := 0;
lexer: Tokenizer;
begin
if command_line^.lex or command_line^.parse then
lexer := lexer_text(source_code)
end;
if command_line^.parse then
parse(lexer.data, lexer.length)
end;
return return_code
end;
proc process(argc: Int, argv: ^^Char) -> Int;
var
tokens: ^Token;
tokens_size: Word;
source_code: SourceCode;
command_line: ^CommandLine;
return_code: Int := 0;
source_file: ^SourceFile;
begin
command_line := parse_command_line(argc, argv);
if command_line = nil then
return_code := 2
end;
if return_code = 0 then
source_file := read_source(command_line^.input);
if source_file = nil then
perror(command_line^.input);
return_code := 3
end
end;
if return_code = 0 then
defer
fclose(source_file^.handle)
end;
source_code.position := TextLocation(1u, 1u);
source_code.input := cast(source_file: Pointer);
source_code.empty := source_file_empty;
source_code.head := source_file_head;
source_code.advance := source_file_advance;
return_code := compile_in_stages(command_line, source_code)
end;
return return_code
end;
return process(count, parameters)
end.