summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2026-02-15 04:10:38 +0100
committerEugen Wissner <belka@caraus.de>2026-02-15 04:10:38 +0100
commit5959fbb5524bbeb05a96eb15aba59e961a3efcb7 (patch)
tree811be9bb8fba9bec6ae549c50f9cf92000b259c9 /source
downloadelna-5959fbb5524bbeb05a96eb15aba59e961a3efcb7.tar.gz
Initial commit
Diffstat (limited to 'source')
-rw-r--r--source/Parser.elna1174
-rw-r--r--source/Transpiler.elna631
-rw-r--r--source/cctype.elna14
-rw-r--r--source/command_line_interface.elna93
-rw-r--r--source/common.elna72
-rw-r--r--source/cstdio.elna29
-rw-r--r--source/cstdlib.elna15
-rw-r--r--source/cstring.elna15
-rw-r--r--source/lexer.elna952
-rw-r--r--source/main.elna841
10 files changed, 3836 insertions, 0 deletions
diff --git a/source/Parser.elna b/source/Parser.elna
new file mode 100644
index 0000000..1225750
--- /dev/null
+++ b/source/Parser.elna
@@ -0,0 +1,1174 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+import cstdlib, common, Lexer;
+
+type
+ Parser = record
+ lexer: ^Lexer
+ end;
+
+ AstLiteralKind* = (
+ integer,
+ string,
+ null,
+ boolean
+ );
+ AstLiteral* = record
+ kind: AstLiteralKind;
+ value: union
+ integer: Int;
+ string: String;
+ boolean: Bool
+ end
+ end;
+
+ AstUnaryOperator* = (
+ reference,
+ not,
+ minus
+ );
+ AstBinaryOperator* = (
+ sum,
+ subtraction,
+ multiplication,
+ division,
+ remainder,
+ equals,
+ not_equals,
+ less,
+ greater,
+ less_equal,
+ greater_equal,
+ disjunction,
+ conjunction,
+ exclusive_disjunction,
+ shift_left,
+ shift_right
+ );
+
+ AstExpressionKind* = (
+ literal,
+ identifier,
+ array_access,
+ dereference,
+ field_access,
+ unary,
+ binary,
+ call
+ );
+ AstExpression* = record
+ kind: AstExpressionKind
+ value: union
+ literal: ^AstLiteral;
+ identifier: Identifier;
+ reference: ^AstExpression;
+ array_access: record
+ array: ^AstExpression;
+ index: ^AstExpression
+ end;
+ field_access: record
+ aggregate: ^AstExpression;
+ field: Identifier
+ end;
+ unary: record
+ operator: AstUnaryOperator;
+ operand: ^AstExpression
+ end;
+ binary: record
+ operator: AstBinaryOperator;
+ lhs: ^AstExpression;
+ rhs: ^AstExpression
+ end;
+ call: record
+ callable: ^AstExpression;
+ argument_count: Word;
+ arguments: ^^AstExpression
+ end
+ end
+ end;
+
+ ConditionalStatement = record
+ condition: ^AstExpression;
+ branch: AstCompoundStatement
+ end;
+
+ AstStatementKind* = (
+ if_statement,
+ while_statement,
+ assignment_statement,
+ return_statement,
+ call_statement
+ );
+ AstStatement* = record
+ kind: AstStatementKind
+ value: union
+ if_statement: ConditionalStatement;
+ while_statement: ConditionalStatement;
+ assignment_statement: record
+ assignee: ^AstExpression;
+ assignment: ^AstExpression
+ end;
+ return_statement: ^AstExpression;
+ call_statement: ^AstExpression
+ end
+ end;
+ AstCompoundStatement* = record
+ count: Word;
+ statements: ^^AstStatement
+ end;
+
+ AstImportStatement* = record
+ package: Identifier;
+ symbols: ^Identifier
+ end;
+
+ AstConstantDeclaration* = record
+ constant_name: Identifier;
+ constant_value: Int
+ end;
+
+ AstFieldDeclaration* = record
+ field_name: Identifier;
+ field_type: ^AstTypeExpression
+ end;
+
+ AstTypeExpressionKind* = (
+ named_expression,
+ record_expression,
+ enumeration_expression,
+ array_expression,
+ pointer_expression,
+ procedure_expression
+ );
+ AstTypeExpression* = record
+ kind: AstTypeExpressionKind;
+ value: union
+ name: Identifier;
+ cases: ^Identifier;
+ target: ^AstTypeExpression;
+ fields: ^AstFieldDeclaration;
+ array_expression: record
+ base: ^AstTypeExpression;
+ length: Word
+ end;
+ parameters: ^^AstTypeExpression
+ end
+ end;
+
+ AstTypedDeclaration* = record
+ identifier: Identifier;
+ type_expression: ^AstTypeExpression
+ end;
+
+ AstVariableDeclaration* = record
+ variable_name: Identifier;
+ variable_type: ^AstTypeExpression
+ end;
+
+ AstProcedureDeclaration* = record
+ name: Identifier;
+ parameter_count: Word;
+ parameters: ^AstTypedDeclaration;
+ return_type: ^AstTypeExpression;
+ constants: ^^AstConstantDeclaration;
+ variables: ^^AstVariableDeclaration;
+ statements: AstCompoundStatement
+ end;
+
+ AstModule* = record
+ main: Bool;
+ imports: ^^AstImportStatement;
+ constants: ^^AstConstantDeclaration;
+ types: ^^AstTypedDeclaration;
+ variables: ^^AstVariableDeclaration;
+ procedures: ^^AstProcedureDeclaration;
+ statements: AstCompoundStatement
+ end;
+
+(* Calls lexer_lex() but skips the comments. *)
+proc parser_lex(lexer: ^Lexer) -> LexerToken;
+var
+ result: LexerToken;
+begin
+ result := lexer_lex(lexer);
+
+ while result.kind = lexerKindComment do
+ result := lexer_lex(lexer)
+ end;
+
+ return result
+end;
+
+proc parse_type_fields(parser: ^Parser) -> ^AstFieldDeclaration;
+var
+ token: LexerToken;
+ field_declarations: ^AstFieldDeclaration;
+ field_count: Word;
+ current_field: ^AstFieldDeclaration;
+begin
+ field_declarations := malloc(#size(AstFieldDeclaration));
+ token := parser_lex(parser^.lexer);
+ field_count := 0;
+
+ while token.kind <> lexerKindEnd do
+ field_count := field_count + 2u;
+
+ field_declarations := realloc(field_declarations, #size(AstFieldDeclaration) * field_count);
+ field_count := field_count - 1u;
+ current_field := field_declarations + (field_count - 1u);
+
+ token := parser_lex(parser^.lexer);
+
+ current_field^.field_name := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+ current_field^.field_type := parse_type_expression(parser);
+ token := parser_lex(parser^.lexer);
+
+ if token.kind = lexerKindSemicolon then
+ token := parser_lex(parser^.lexer)
+ end
+ end;
+ current_field := current_field + 1;
+ memset(current_field, 0, #size(AstFieldDeclaration));
+
+ return field_declarations
+end;
+
+proc parse_record_type(parser: ^Parser) -> ^AstTypeExpression;
+var
+ result: ^AstTypeExpression;
+begin
+ NEW(result);
+ result^.kind := AstTypeExpressionKind.record_expression;
+ result^.fields := parse_type_fields(parser);
+
+ return result
+end;
+
+proc parse_pointer_type(parser: ^Parser) -> ^AstTypeExpression;
+var
+ token: LexerToken;
+ result: ^AstTypeExpression;
+begin
+ NEW(result);
+ result^.kind := AstTypeExpressionKind.pointer_expression;
+
+ token := lexer_current(parser^.lexer);
+
+ if token.kind = lexerKindPointer then
+ token := parser_lex(parser^.lexer)
+ end;
+ token := lexer_current(parser^.lexer);
+ result^.target := parse_type_expression(parser);
+
+ return result
+end;
+
+proc parse_array_type(parser: ^Parser) -> ^AstTypeExpression;
+var
+ token: LexerToken;
+ buffer: [20]Char;
+ result: ^AstTypeExpression;
+begin
+ NEW(result);
+ result^.kind := AstTypeExpressionKind.array_expression;
+ result^.array_expression.length := 0u;
+
+ token := lexer_current(parser^.lexer);
+
+ if token.kind = lexerKindArray then
+ token := parser_lex(parser^.lexer)
+ end;
+ if token.kind <> lexerKindOf then
+ token := parser_lex(parser^.lexer);
+
+ result^.array_expression.length := token.integerKind;
+
+ token := parser_lex(parser^.lexer)
+ end;
+ token := parser_lex(parser^.lexer);
+ result^.array_expression.base := parse_type_expression(parser);
+
+ return result
+end;
+
+proc parse_enumeration_type(parser: ^Parser) -> ^AstTypeExpression;
+var
+ token: LexerToken;
+ result: ^AstTypeExpression;
+ current_case: ^Identifier;
+ case_count: Word;
+begin
+ NEW(result);
+ result^.kind := AstTypeExpressionKind.enumeration_expression;
+
+ case_count := 1u;
+ result^.cases := malloc(#size(Identifier) * 2);
+ token := parser_lex(parser^.lexer);
+ current_case := result^.cases;
+ current_case^ := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+
+ while token.kind = lexerKindComma do
+ token := parser_lex(parser^.lexer);
+
+ case_count := case_count + 2u;
+
+ result^.cases := realloc(result^.cases, #size(Identifier) * case_count);
+ case_count := case_count - 1u;
+ current_case := result^.cases + (case_count - 1u);
+ current_case^ := token.identifierKind;
+
+ token := parser_lex(parser^.lexer)
+ end;
+ current_case := current_case + 1;
+ memset(current_case, 0, #size(Identifier));
+
+ return result
+end;
+
+proc parse_named_type(parser: ^Parser) -> ^AstTypeExpression;
+var
+ token: LexerToken;
+ result: ^AstTypeExpression;
+begin
+ token := lexer_current(parser^.lexer);
+ NEW(result);
+
+ result^.kind := AstTypeExpressionKind.named_expression;
+ result^.name := token.identifierKind;
+
+ return result
+end;
+
+proc parse_procedure_type(parser: ^Parser) -> ^AstTypeExpression;
+var
+ token: LexerToken;
+ result: ^AstTypeExpression;
+ current_parameter: ^^AstTypeExpression;
+ parameter_count: Word;
+begin
+ parameter_count := 0u;
+ NEW(result);
+ result^.kind := AstTypeExpressionKind.procedure_expression;
+
+ result^.parameters := malloc(#size(^AstTypeExpression));
+
+ token := parser_lex(parser^.lexer);
+ token := parser_lex(parser^.lexer);
+
+ while token.kind <> lexerKindRightParen do
+ parameter_count := parameter_count + 2u;
+
+ result^.parameters := realloc(result^.parameters, #size(^AstTypeExpression) * parameter_count);
+ parameter_count := parameter_count - 1u;
+ current_parameter := result^.parameters + (parameter_count - 1u);
+
+ current_parameter^ := parse_type_expression(parser);
+
+ token := parser_lex(parser^.lexer);
+ if token.kind = lexerKindComma then
+ token := parser_lex(parser^.lexer)
+ end
+ end;
+ current_parameter := result^.parameters + parameter_count;
+ current_parameter^ := nil;
+
+ return result
+end;
+
+proc parse_type_expression(parser: ^Parser) -> ^AstTypeExpression;
+var
+ token: LexerToken;
+ result: ^AstTypeExpression;
+begin
+ result := nil;
+ token := lexer_current(parser^.lexer);
+
+ if token.kind = lexerKindRecord then
+ result := parse_record_type(parser)
+ end;
+ if token.kind = lexerKindLeftParen then
+ result := parse_enumeration_type(parser)
+ end;
+ if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then
+ result := parse_array_type(parser)
+ end;
+ if token.kind = lexerKindHat then
+ result := parse_pointer_type(parser)
+ end;
+ if token.kind = lexerKindProc then
+ result := parse_procedure_type(parser)
+ end;
+ if token.kind = lexerKindIdentifier then
+ result := parse_named_type(parser)
+ end;
+ return result
+end;
+
+proc parse_type_declaration(parser: ^Parser) -> ^AstTypedDeclaration;
+var
+ token: LexerToken;
+ result: ^AstTypedDeclaration;
+begin
+ token := lexer_current(parser^.lexer);
+
+ NEW(result);
+ result^.identifier := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+ token := parser_lex(parser^.lexer);
+
+ result^.type_expression := parse_type_expression(parser);
+ token := parser_lex(parser^.lexer);
+
+ return result
+end;
+
+proc parse_type_part(parser: ^Parser) -> ^^AstTypedDeclaration;
+var
+ token: LexerToken;
+ result: ^^AstTypedDeclaration;
+ current_declaration: ^^AstTypedDeclaration;
+ declaration_count: Word;
+begin
+ token := lexer_current(parser^.lexer);
+
+ result := malloc(#size(^AstTypedDeclaration));
+ current_declaration := result;
+ declaration_count := 0u;
+
+ if token.kind = lexerKindType then
+ token := parser_lex(parser^.lexer);
+
+ while token.kind = lexerKindIdentifier do
+ declaration_count := declaration_count + 1u;
+
+ result := realloc(result, #size(^AstTypedDeclaration) * (declaration_count + 1));
+ current_declaration := result + (declaration_count - 1u);
+
+ current_declaration^ := parse_type_declaration(parser);
+ token := parser_lex(parser^.lexer)
+ end
+ end;
+ if declaration_count <> 0u then
+ current_declaration := current_declaration + 1
+ end;
+ current_declaration^ := nil;
+
+ return result
+end;
+
+proc parse_variable_declaration(parser: ^Parser) -> ^AstVariableDeclaration;
+var
+ token: LexerToken;
+ result: ^AstVariableDeclaration;
+begin
+ NEW(result);
+
+ token := lexer_current(parser^.lexer);
+ result^.variable_name := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+
+ token := parser_lex(parser^.lexer);
+ result^.variable_type := parse_type_expression(parser);
+
+ token := parser_lex(parser^.lexer);
+ return result
+end;
+
+proc parse_variable_part(parser: ^Parser) -> ^^AstVariableDeclaration;
+var
+ token: LexerToken;
+ result: ^^AstVariableDeclaration;
+ current_declaration: ^^AstVariableDeclaration;
+ declaration_count: Word;
+begin
+ token := lexer_current(parser^.lexer);
+
+ result := malloc(#size(^AstVariableDeclaration));
+ current_declaration := result;
+ declaration_count := 0u;
+
+ if token.kind = lexerKindVar then
+ token := parser_lex(parser^.lexer);
+
+ while token.kind = lexerKindIdentifier do
+ declaration_count := declaration_count + 1u;
+
+ result := realloc(result, #size(^AstVariableDeclaration) * (declaration_count + 1));
+ current_declaration := result + (declaration_count - 1u);
+
+ current_declaration^ := parse_variable_declaration(parser);
+ token := parser_lex(parser^.lexer)
+ end
+ end;
+ if declaration_count <> 0 then
+ current_declaration := current_declaration + 1
+ end;
+ current_declaration^ := nil;
+
+ return result
+end;
+
+proc parse_constant_declaration(parser: ^Parser) -> ^AstConstantDeclaration;
+var
+ token: LexerToken;
+ result: ^AstConstantDeclaration;
+begin
+ NEW(result);
+
+ token := lexer_current(parser^.lexer);
+ result^.constant_name := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+
+ token := parser_lex(parser^.lexer);
+ result^.constant_value := token.integerKind;
+
+ token := parser_lex(parser^.lexer);
+
+ return result
+end;
+
+proc parse_constant_part(parser: ^Parser) -> ^^AstConstantDeclaration;
+var
+ token: LexerToken;
+ result: ^^AstConstantDeclaration;
+ current_declaration: ^^AstConstantDeclaration;
+ declaration_count: Word;
+begin
+ token := lexer_current(parser^.lexer);
+
+ result := malloc(#size(^AstConstantDeclaration));
+ current_declaration := result;
+ declaration_count := 0u;
+
+ if token.kind = lexerKindConst then
+ token := parser_lex(parser^.lexer);
+
+ while token.kind = lexerKindIdentifier do
+ declaration_count := declaration_count + 1u;
+
+ result := realloc(result, #size(^AstConstantDeclaration) * (declaration_count + 1u));
+ current_declaration := result + (declaration_count - 1u);
+
+ current_declaration^ := parse_constant_declaration(parser);
+ token := parser_lex(parser^.lexer)
+ end
+ end;
+ if declaration_count <> 0 then
+ current_declaration := current_declaration + 1
+ end;
+ current_declaration^ := nil;
+
+ return result
+end;
+
+proc parse_import_statement(parser: ^Parser) -> ^AstImportStatement;
+var
+ result: ^AstImportStatement;
+ token: LexerToken;
+ symbol_count: Word;
+ current_symbol: ^Identifier;
+begin
+ NEW(result);
+ symbol_count := 1u;
+
+ token := parser_lex(parser^.lexer);
+ result^.package := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+ result^.symbols := malloc(#size(Identifier) * 2);
+
+ current_symbol := result^.symbols;
+
+ token := parser_lex(parser^.lexer);
+ current_symbol^ := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+ while token.kind <> lexerKindSemicolon do
+ token := parser_lex(parser^.lexer);
+ symbol_count := symbol_count + 1u;
+
+ result^.symbols := realloc(result^.symbols, #size(Identifier) * (symbol_count + 1u));
+ current_symbol := result^.symbols + (symbol_count - 1u);
+
+ current_symbol^ := token.identifierKind;
+ token := parser_lex(parser^.lexer)
+ end;
+ current_symbol := current_symbol + 1;
+ memset(current_symbol, 0, #size(Identifier));
+
+ token := parser_lex(parser^.lexer);
+
+ return result
+end;
+
+proc parse_import_part(parser: ^Parser) -> ^^AstImportStatement;
+var
+ token: LexerToken;
+ import_statement: ^^AstImportStatement;
+ result: ^^AstImportStatement;
+ import_count: Word;
+begin
+ token := lexer_current(parser^.lexer);
+ result := malloc(#size(^AstImportStatement));
+ import_statement := result;
+ import_count := 0u;
+
+ while token.kind = lexerKindFrom do
+ import_count := import_count + 1u;
+
+ result := realloc(result, #size(^AstImportStatement) * (import_count + 1u));
+ import_statement := result + (import_count - 1u);
+
+ import_statement^ := parse_import_statement(parser);
+ token := lexer_current(parser^.lexer)
+ end;
+ if import_count > 0u then
+ import_statement := import_count + 1
+ end;
+ import_statement^ := nil;
+
+ return result
+end;
+
+proc parse_literal(parser: ^Parser) -> ^AstLiteral;
+var
+ literal: ^AstLiteral;
+ token: LexerToken;
+begin
+ literal := nil;
+ token := lexer_current(parser^.lexer);
+
+ if token.kind = lexerKindInteger then
+ NEW(literal);
+
+ literal^.kind := AstLiteralKind.integer;
+ literal^.integer := token.integerKind
+ end;
+ if (token.kind = lexerKindCharacter) or (token.kind = lexerKindString) then
+ NEW(literal);
+
+ literal^.kind := AstLiteralKind.string;
+ literal^.string := token.stringKind
+ end;
+ if token.kind = lexerKindNull then
+ NEW(literal);
+
+ literal^.kind := AstLiteralKind.null
+ end;
+ if token.kind = lexerKindBoolean then
+ NEW(literal);
+
+ literal^.kind := AstLiteralKind.boolean;
+ literal^.boolean := token.booleanKind
+ end;
+ if literal <> nil then
+ token := parser_lex(parser^.lexer)
+ end;
+
+ return literal
+end;
+
+proc parse_factor(parser: ^Parser) -> ^AstExpression;
+var
+ next_token: LexerToken;
+ result: ^AstExpression;
+ literal: ^AstLiteral;
+begin
+ result := nil;
+ next_token := lexer_current(parser^.lexer);
+
+ literal := parse_literal(parser);
+
+ if (result = nil) & (literal <> nil) then
+ NEW(result);
+
+ result^.kind := AstExpressionKind.literal;
+ result^.literal := literal
+ end;
+ if (result = nil) & (next_token.kind = lexerKindMinus) then
+ NEW(result);
+ next_token := parser_lex(parser^.lexer);
+
+ result^.kind := AstExpressionKind.unary;
+ result^.unary.operator := AstUnaryOperator.minus;
+ result^.unary.operand := parse_factor(parser)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindTilde) then
+ NEW(result);
+ next_token := parser_lex(parser^.lexer);
+
+ result^.kind := AstExpressionKind.unary;
+ result^.unary.operator := AstUnaryOperator.not;
+ result^.unary.operand := parse_factor(parser)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindLeftParen) then
+ next_token := parser_lex(parser^.lexer);
+ result := parse_expression(parser);
+ if result <> nil then
+ next_token := parser_lex(parser^.lexer)
+ end
+ end;
+ if (result = nil) & (next_token.kind = lexerKindIdentifier) then
+ NEW(result);
+
+ result^.kind := AstExpressionKind.identifier;
+ result^.identifier := next_token.identifierKind;
+
+ next_token := parser_lex(parser^.lexer)
+ end;
+
+ return result
+end;
+
+proc parse_designator(parser: ^Parser) -> ^AstExpression;
+var
+ next_token: LexerToken;
+ inner_expression: ^AstExpression;
+ designator: ^AstExpression;
+ arguments: ^^AstExpression;
+ handled: Bool;
+begin
+ designator := parse_factor(parser);
+ handled := designator <> nil;
+ next_token := lexer_current(parser^.lexer);
+
+ while handled do
+ inner_expression := designator;
+ handled := false;
+
+ if ~handled & (next_token.kind = lexerKindHat) then
+ NEW(designator);
+
+ designator^.kind := AstExpressionKind.dereference;
+ designator^.reference := inner_expression;
+
+ next_token := parser_lex(parser^.lexer);
+ handled := true
+ end;
+ if ~handled & (next_token.kind = lexerKindLeftSquare) then
+ NEW(designator);
+ next_token := parser_lex(parser^.lexer);
+
+ designator^.kind := AstExpressionKind.array_access;
+ designator^.array_access.array := inner_expression;
+ designator^.array_access.index := parse_expression(parser);
+
+ next_token := parser_lex(parser^.lexer);
+ handled := true
+ end;
+ if ~handled & (next_token.kind = lexerKindDot) then
+ NEW(designator);
+ next_token := parser_lex(parser^.lexer);
+
+ designator^.kind := AstExpressionKind.field_access;
+ designator^.field_access.aggregate := inner_expression;
+ designator^.field_access.field := next_token.identifierKind;
+
+ next_token := parser_lex(parser^.lexer);
+ handled := true
+ end;
+ if ~handled & (next_token.kind = lexerKindLeftParen) then
+ NEW(designator);
+ next_token := parser_lex(parser^.lexer);
+
+ designator^.kind := AstExpressionKind.call;
+ designator^.call.callable := inner_expression;
+ designator^.call.argument_count := 0;
+ designator^.call.arguments := nil;
+
+ if next_token.kind <> lexerKindRightParen then
+ designator^.arguments := malloc(#size(^AstExpression));
+ designator^.argument_count := 1;
+ designator^.arguments^ := parse_expression(parser);
+
+ next_token := lexer_current(parser^.lexer);
+
+ while next_token.kind = lexerKindComma do
+ next_token := parser_lex(parser^.lexer);
+
+ designator^.argument_count := designator^.argument_count + 1;
+ designator^.arguments := realloc(designator^.arguments, #size(^AstExpression) * designator^.argument_count);
+ arguments := designator^.arguments + (designator^.argument_count - 1u);
+ arguments^ := parse_expression(parser);
+
+ next_token := lexer_current(parser^.lexer)
+ end
+ end;
+
+ next_token := parser_lex(parser^.lexer);
+ handled := true
+ end
+ end;
+
+ return designator
+end;
+
+proc parse_binary_expression(parser: ^Parser, left: ^AstExpression, operator: AstBinaryOperator) -> ^AstExpression;
+var
+ next_token: LexerToken;
+ result: ^AstExpression;
+ right: ^AstExpression;
+begin
+ next_token := parser_lex(parser^.lexer);
+ right := parse_designator(parser);
+ result := nil;
+
+ if right <> nil then
+ NEW(result);
+ result^.kind := AstExpressionKind.binary;
+ result^.binary.operator := operator;
+ result^.binary.lhs := left;
+ result^.binary.rhs := right
+ end;
+
+ return result
+end;
+
+proc parse_expression(parser: ^Parser) -> ^AstExpression;
+var
+ next_token: LexerToken;
+ left: ^AstExpression;
+ result: ^AstExpression;
+ written_bytes: Word;
+begin
+ left := parse_designator(parser);
+ result := nil;
+ next_token := lexer_current(parser^.lexer);
+
+ if left <> nil then
+ if (result = nil) & (next_token.kind = lexerKindNotEqual) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.not_equals)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindEqual) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.equals)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindGreaterThan) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.greater)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindLessThan) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.less)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindGreaterEqual) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.greater_equal)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindLessEqual) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.less_equal)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindAnd) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.conjunction)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindOr) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.disjunction)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindMinus) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.subtraction)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindPlus) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.sum)
+ end;
+ if (result = nil) & (next_token.kind = lexerKindAsterisk) then
+ result := parse_binary_expression(parser, left, AstBinaryOperator.multiplication)
+ end
+ end;
+ if (result = nil) & (left <> nil) then
+ result := left
+ end;
+
+ return result
+end;
+
+proc parse_return_statement(parser: ^Parser) -> ^AstStatement;
+var
+ token: LexerToken;
+ result: ^AstStatement;
+begin
+ NEW(result);
+ result^.kind := AstStatementKind.return_statement;
+
+ token := parser_lex(parser^.lexer);
+ result^.return_statement := parse_expression(parser);
+
+ return result
+end;
+
+proc parse_assignment_statement(parser: ^Parser, assignee: ^AstExpression) -> ^AstStatement;
+var
+ token: LexerToken;
+ result: ^AstStatement;
+begin
+ NEW(result);
+ result^.kind := AstStatementKind.assignment_statement;
+ result^.assignment_statement.assignee := assignee;
+
+ token := parser_lex(parser^.lexer);
+ result^.assignment_statement.assignment := parse_expression(parser);
+
+ return result
+end;
+
+proc parse_call_statement(parser: ^Parser, call: ^AstExpression) -> ^AstStatement;
+var
+ result: ^AstStatement;
+begin
+ NEW(result);
+ result^.kind := AstStatementKind.call_statement;
+ result^.call_statement := call;
+
+ return result
+end;
+
+proc parse_compound_statement(parser: ^Parser) -> AstCompoundStatement;
+var
+ result: AstCompoundStatement;
+ token: LexerToken;
+ current_statement: ^^AstStatement;
+ old_count: Word;
+begin
+ result.count := 0u;
+ result.statements := nil;
+
+ token := lexer_current(parser^.lexer);
+
+ while token.kind <> lexerKindEnd do
+ old_count := result.count;
+ result.count := result.count + 1u;
+
+ result.statements := realloc(result.statements, #size(^AstStatement) * result.count);
+ current_statement := result.statements + old_count;
+
+ current_statement^ := parse_statement(parser);
+
+ token := lexer_current(parser^.lexer)
+ end;
+
+ return result
+end;
+
+proc parse_statement(parser: ^Parser) -> ^AstStatement;
+var
+ token: LexerToken;
+ statement: ^AstStatement;
+ designator: ^AstExpression;
+begin
+ statement := nil;
+ token := parser_lex(parser^.lexer);
+
+ if token.kind = lexerKindIf then
+ statement := parse_if_statement(parser)
+ end;
+ if token.kind = lexerKindWhile then
+ statement := parse_while_statement(parser)
+ end;
+ if token.kind = lexerKindReturn then
+ statement := parse_return_statement(parser)
+ end;
+ if token.kind = lexerKindIdentifier then
+ designator := parse_designator(parser);
+ token := lexer_current(parser^.lexer);
+
+ if token.kind = lexerKindAssignment then
+ statement := parse_assignment_statement(parser, designator)
+ end;
+ if token.kind <> lexerKindAssignment then
+ statement := parse_call_statement(parser, designator)
+ end
+ end;
+ return statement
+end;
+
+proc parse_if_statement(parser: ^Parser) -> ^AstStatement;
+var
+ token: LexerToken;
+ result: ^AstStatement;
+begin
+ NEW(result);
+ result^.kind := AstStatementKind.if_statement;
+
+ token := parser_lex(parser^.lexer);
+ result^.if_statement.condition := parse_expression(parser);
+ result^.if_statement.branch := parse_compound_statement(parser);
+
+ token := parser_lex(parser^.lexer);
+ return result
+end;
+
+proc parse_while_statement(parser: ^Parser) -> ^AstStatement;
+var
+ token: LexerToken;
+ result: ^AstStatement;
+begin
+ NEW(result);
+ result^.kind := AstStatementKind.while_statement;
+
+ token := parser_lex(parser^.lexer);
+ result^.while_statement.condition := parse_expression(parser);
+ result^.while_statement.body := parse_compound_statement(parser);
+
+ token := parser_lex(parser^.lexer);
+ return result
+end;
+
+proc parse_statement_part(parser: ^Parser) -> AstCompoundStatement;
+var
+ token: LexerToken;
+ compound: AstCompoundStatement;
+begin
+ compound.count := 0;
+ compound.statements := nil;
+ token := lexer_current(parser^.lexer);
+
+ if token.kind = lexerKindBegin then
+ compound := parse_compound_statement(parser)
+ end;
+
+ return compound
+end;
+
+proc parse_procedure_heading(parser: ^Parser) -> ^AstProcedureDeclaration;
+var
+ token: LexerToken;
+ declaration: ^AstProcedureDeclaration;
+ parameter_index: Word;
+ current_parameter: ^AstTypedDeclaration;
+begin
+ NEW(declaration);
+
+ token := parser_lex(parser^.lexer);
+ declaration^.name := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+
+ declaration^.parameters := nil;
+ declaration^.parameter_count := 0u;
+
+ token := parser_lex(parser^.lexer);
+ while token.kind <> lexerKindRightParen do
+ parameter_index := declaration^.parameter_count;
+ declaration^.parameter_count := declaration^.parameter_count + 1;
+ declaration^.parameters := realloc(declaration^.parameters, #size(AstTypedDeclaration) * declaration^.parameter_count);
+
+ current_parameter := declaration^.parameters + parameter_index;
+
+ current_parameter^.identifier := token.identifierKind;
+
+ token := parser_lex(parser^.lexer);
+ token := parser_lex(parser^.lexer);
+
+ current_parameter^.type_expression := parse_type_expression(parser);
+
+ token := parser_lex(parser^.lexer);
+ if token.kind = lexerKindComma then
+ token := parser_lex(parser^.lexer)
+ end
+ end;
+ token := parser_lex(parser^.lexer);
+ declaration^.return_type := nil;
+
+ (* Check for the return type and write it. *)
+ if token.kind = lexerKindArrow then
+ token := parser_lex(parser^.lexer);
+ declaration^.return_type := parse_type_expression(parser);
+ token := parser_lex(parser^.lexer)
+ end;
+ token := parser_lex(parser^.lexer);
+
+ return declaration
+end;
+
+proc parse_procedure_declaration(parser: ^Parser) -> ^AstProcedureDeclaration;
+var
+ token: LexerToken;
+ declaration: ^AstProcedureDeclaration;
+begin
+ declaration := parse_procedure_heading(parser);
+
+ declaration^.constants := parse_constant_part(parser);
+ declaration^.variables := parse_variable_part(parser);
+ declaration^.statements := parse_statement_part(parser);
+
+ token := parser_lex(parser^.lexer);
+ token := parser_lex(parser^.lexer);
+
+ return declaration
+end;
+
+proc parse_procedure_part(parser: ^Parser) -> ^^AstProcedureDeclaration;
+var
+ token: LexerToken;
+ current_declaration: ^^AstProcedureDeclaration;
+ result: ^^AstProcedureDeclaration;
+ declaration_count: Word;
+ declaration_index: Word;
+begin
+ token := lexer_current(parser^.lexer);
+ declaration_count := 0u;
+ declaration_index := 0u;
+
+ result := malloc(#size(^AstProcedureDeclaration));
+
+ while token.kind = lexerKindProc do
+ declaration_count := declaration_count + 1u;
+ result := realloc(result, #size(^AstProcedureDeclaration) * (declaration_count + 1));
+ current_declaration := result + declaration_index;
+
+ current_declaration^ := parse_procedure_declaration(parser);
+ token := lexer_current(parser^.lexer);
+ declaration_index := declaration_count
+ end;
+ current_declaration := result + declaration_index;
+ current_declaration^ := nil;
+
+ return result
+end;
+
+proc parse_module(parser: ^Parser) -> ^AstModule;
+var
+ token: LexerToken;
+ result: ^AstModule;
+begin
+ NEW(result);
+ token := parser_lex(parser^.lexer);
+ result^.main := true;
+
+ if token.kind = lexerKindModule then
+ result^.main := false
+ end;
+ token := parser_lex(parser^.lexer);
+
+ (* Write the module body. *)
+ token := parser_lex(parser^.lexer);
+
+ result^.imports := parse_import_part(parser);
+ result^.constants := parse_constant_part(parser);
+ result^.types := parse_type_part(parser);
+
+ result^.variables := parse_variable_part(parser);
+ result^.procedures := parse_procedure_part(parser);
+ result^.statements := parse_statement_part(parser);
+
+ token := parser_lex(parser^.lexer);
+ token := parser_lex(parser^.lexer);
+
+ return result
+end;
+
+proc parse*(lexer: ^Lexer) -> ^AstModule;
+var
+ parser: Parser;
+begin
+ parser.lexer := lexer;
+
+ return parse_module(@parser)
+end;
+
+end.
diff --git a/source/Transpiler.elna b/source/Transpiler.elna
new file mode 100644
index 0000000..5a65036
--- /dev/null
+++ b/source/Transpiler.elna
@@ -0,0 +1,631 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+from FIO import File, WriteNBytes, WriteLine, WriteChar, WriteString;
+from NumberIO import IntToStr;
+
+import common, Parser;
+
+type
+ TranspilerContext* = record
+ input_name: String;
+ output: File;
+ definition: File;
+ indentation: Word
+ end;
+
+proc indent(context: ^TranspilerContext);
+var
+ count: Word;
+begin
+ count := 0;
+
+ while count < context^.indentation do
+ WriteString(context^.output, " ");
+ count := count + 1u
+ end
+end;
+
+(* Write a semicolon followed by a newline. *)
+proc write_semicolon(output: File);
+begin
+ WriteChar(output, ';');
+ WriteLine(output)
+end;
+
+proc transpile_import_statement(context: ^TranspilerContext, import_statement: ^AstImportStatement);
+var
+ current_symbol: ^Identifier;
+begin
+ WriteString(context^.output, "FROM ");
+ transpile_identifier(context, import_statement^.package);
+
+ WriteString(context^.output, " IMPORT ");
+
+ current_symbol := import_statement^.symbols;
+ transpile_identifier(context, current_symbol^);
+ current_symbol := current_symbol + 1;
+
+ while current_symbol^[1] <> '\0' do
+ WriteString(context^.output, ", ");
+ transpile_identifier(context, current_symbol^);
+ current_symbol := current_symbol + 1;
+ end;
+ write_semicolon(context^.output)
+end;
+
+proc transpile_import_part(context: ^TranspilerContext, imports: ^^AstImportStatement);
+var
+ import_statement: ^AstImportStatement;
+begin
+ while imports^ <> nil do
+ transpile_import_statement(context, imports^);
+ imports := imports + 1
+ end;
+ WriteLine(context^.output)
+end;
+
+proc transpile_constant_declaration(context: ^TranspilerContext, declaration: ^AstConstantDeclaration);
+var
+ buffer: [20]Char;
+begin
+ WriteString(context^.output, " ");
+ transpile_identifier(context, declaration^.constant_name);
+
+ WriteString(context^.output, " = ");
+
+ IntToStr(declaration^.constant_value, 0, buffer);
+ WriteString(context^.output, buffer);
+
+ write_semicolon(context^.output)
+end;
+
+proc transpile_constant_part(context: ^TranspilerContext, declarations: ^^AstConstantDeclaration, extra_newline: Bool);
+var
+ current_declaration: ^^AstConstantDeclaration;
+begin
+ if declarations^ <> nil then
+ WriteString(context^.output, "CONST");
+ WriteLine(context^.output);
+
+ current_declaration := declarations;
+ while current_declaration^ <> nil do
+ transpile_constant_declaration(context, current_declaration^);
+
+ current_declaration := current_declaration + 1
+ end;
+ if extra_newline then
+ WriteLine(context^.output)
+ end
+ end
+end;
+
+proc transpile_module(context: ^TranspilerContext, result: ^AstModule);
+begin
+ if result^.main = false then
+ WriteString(context^.output, "IMPLEMENTATION ")
+ end;
+ WriteString(context^.output, "MODULE ");
+
+ (* Write the module name and end the line with a semicolon and newline. *)
+ transpile_module_name(context);
+
+ write_semicolon(context^.output);
+ WriteLine(context^.output);
+
+ (* Write the module body. *)
+
+ transpile_import_part(context, result^.imports);
+ transpile_constant_part(context, result^.constants, true);
+ transpile_type_part(context, result^.types);
+ transpile_variable_part(context, result^.variables, true);
+ transpile_procedure_part(context, result^.procedures);
+ transpile_statement_part(context, result^.statements);
+
+ WriteString(context^.output, "END ");
+ transpile_module_name(context);
+
+ WriteChar(context^.output, ".");
+ WriteLine(context^.output)
+end;
+
+proc transpile_type_fields(context: ^TranspilerContext, fields: ^AstFieldDeclaration);
+var
+ current_field: ^AstFieldDeclaration;
+begin
+ current_field := fields;
+
+ while current_field^.field_name[1] <> '\0' do
+ WriteString(context^.output, " ");
+ transpile_identifier(context, current_field^.field_name);
+
+ WriteString(context^.output, ": ");
+ transpile_type_expression(context, current_field^.field_type);
+
+ current_field := current_field + 1;
+
+ if current_field^.field_name[1] <> '\0' then
+ WriteChar(context^.output, ';')
+ end;
+ WriteLine(context^.output)
+ end
+end;
+
+proc transpile_record_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
+begin
+ WriteString(context^.output, "RECORD");
+ WriteLine(context^.output);
+ transpile_type_fields(context, type_expression^.fields);
+ WriteString(context^.output, " END")
+end;
+
+proc transpile_pointer_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
+begin
+ WriteString(context^.output, "POINTER TO ");
+
+ transpile_type_expression(context, type_expression^.target)
+end;
+
+proc transpile_array_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
+var
+ buffer: [20]Char;
+begin
+ WriteString(context^.output, "ARRAY");
+
+ if type_expression^.length <> 0 then
+ WriteString(context^.output, "[1..");
+
+ IntToStr(type_expression^.length, 0, buffer);
+ WriteString(context^.output, buffer);
+
+ WriteChar(context^.output, ']')
+ end;
+ WriteString(context^.output, " OF ");
+
+ transpile_type_expression(context, type_expression^.base)
+end;
+
+proc transpile_enumeration_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
+var
+ current_case: ^Identifier;
+begin
+ current_case := type_expression^.cases;
+
+ WriteString(context^.output, "(");
+ WriteLine(context^.output);
+ WriteString(context^.output, " ");
+ transpile_identifier(context, current_case^);
+ current_case := current_case + 1;
+
+ while current_case^[1] <> '\0' do
+ WriteChar(context^.output, ',');
+ WriteLine(context^.output);
+ WriteString(context^.output, " ");
+ transpile_identifier(context, current_case^);
+
+ current_case := current_case + 1
+ end;
+ WriteLine(context^.output);
+ WriteString(context^.output, " )")
+end;
+
+proc transpile_identifier(context: ^TranspilerContext, identifier: Identifier);
+var
+ written_bytes: Word;
+begin
+ written_bytes := WriteNBytes(context^.output, cast(identifier[1]: Word), @identifier[2])
+end;
+
+proc transpile_procedure_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
+var
+ result: ^AstTypeExpression;
+ current_parameter: ^^AstTypeExpression;
+ parameter_count: Word;
+begin
+ WriteString(context^.output, "PROCEDURE(");
+ current_parameter := type_expression^.parameters;
+
+ while current_parameter^ <> nil do
+ transpile_type_expression(context, current_parameter^);
+
+ current_parameter := current_parameter + 1;
+
+ if current_parameter^ <> nil then
+ WriteString(context^.output, ", ")
+ end
+ end;
+ WriteChar(context^.output, ')')
+end;
+
+proc transpile_type_expression(context: ^TranspilerContext, type_expression: ^AstTypeExpression);
+begin
+ if type_expression^.kind = astTypeExpressionKindRecord then
+ transpile_record_type(context, type_expression)
+ end;
+ if type_expression^.kind = astTypeExpressionKindEnumeration then
+ transpile_enumeration_type(context, type_expression)
+ end;
+ if type_expression^.kind = astTypeExpressionKindArray then
+ transpile_array_type(context, type_expression)
+ end;
+ if type_expression^.kind = astTypeExpressionKindPointer then
+ transpile_pointer_type(context, type_expression)
+ end;
+ if type_expression^.kind = astTypeExpressionKindProcedure then
+ transpile_procedure_type(context, type_expression)
+ end;
+ if type_expression^.kind = astTypeExpressionKindNamed then
+ transpile_identifier(context, type_expression^.name)
+ end
+end;
+
+proc transpile_type_declaration(context: ^TranspilerContext, declaration: ^AstTypedDeclaration);
+var
+ written_bytes: Word;
+begin
+ WriteString(context^.output, " ");
+
+ transpile_identifier(context^.output, declaration^.identifier);
+ WriteString(context^.output, " = ");
+
+ transpile_type_expression(context, declaration^.type_expression);
+ write_semicolon(context^.output)
+end;
+
+proc transpile_type_part(context: ^TranspilerContext, declarations: ^^AstTypedDeclaration);
+var
+ current_declaration: ^^AstTypedDeclaration;
+begin
+ if declarations^ <> nil then
+ WriteString(context^.output, "TYPE");
+ WriteLine(context^.output);
+
+ current_declaration := declarations;
+ while current_declaration^ <> nil do
+ transpile_type_declaration(context, current_declaration^);
+
+ current_declaration := current_declaration + 1
+ end;
+ WriteLine(context^.output)
+ end
+end;
+
+proc transpile_variable_declaration(context: ^TranspilerContext, declaration: ^AstVariableDeclaration);
+begin
+ WriteString(context^.output, " ");
+ transpile_identifier(context, declaration^.variable_name);
+
+ WriteString(context^.output, ": ");
+
+ transpile_type_expression(context, declaration^.variable_type);
+ write_semicolon(context^.output)
+end;
+
+proc transpile_variable_part(context: ^TranspilerContext, declarations: ^^AstVariableDeclaration, extra_newline: Bool);
+var
+ current_declaration: ^^AstVariableDeclaration;
+begin
+ if declarations^ <> nil then
+ WriteString(context^.output, "VAR");
+ WriteLine(context^.output);
+
+ current_declaration := declarations;
+ while current_declaration^ <> nil do
+ transpile_variable_declaration(context, current_declaration^);
+
+ current_declaration := current_declaration + 1
+ end;
+ if extra_newline then
+ WriteLine(context^.output)
+ end
+ end
+end;
+
+proc transpile_procedure_heading(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration);
+var
+ parameter_index: Word;
+ current_parameter: ^AstTypedDeclaration;
+begin
+ WriteString(context^.output, "PROCEDURE ");
+ transpile_identifier(context, declaration^.name);
+ WriteChar(context^.output, '(');
+
+ parameter_index := 0;
+ current_parameter := declaration^.parameters;
+
+ while parameter_index < declaration^.parameter_count do
+ transpile_identifier(context, current_parameter^.identifier);
+ WriteString(context^.output, ": ");
+ transpile_type_expression(context, current_parameter^.type_expression);
+
+ parameter_index := parameter_index + 1u;
+ current_parameter := current_parameter + 1;
+
+ if parameter_index <> declaration^.parameter_count then
+ WriteString(context^.output, "; ")
+ end
+ end;
+
+ WriteString(context^.output, ")");
+
+ (* Check for the return type and write it. *)
+ if declaration^.return_type <> nil then
+ WriteString(context^.output, ": ");
+ transpile_type_expression(context, declaration^.return_type)
+ end;
+ write_semicolon(context^.output)
+end;
+
+proc transpile_unary_operator(context: ^TranspilerContext, operator: AstUnaryOperator);
+begin
+ if operator = AstUnaryOperator.minus then
+ WriteChar(context^.output, '-')
+ end;
+ if operator = AstUnaryOperator.not then
+ WriteChar(context^.output, '~')
+ end
+end;
+
+proc transpile_binary_operator(context: ^TranspilerContext, operator: AstBinaryOperator);
+begin
+ case operator of
+ AstBinaryOperator.sum: WriteChar(context^.output, '+')
+ | AstBinaryOperator.subtraction: WriteChar(context^.output, '-')
+ | AstBinaryOperator.multiplication: WriteChar(context^.output, '*')
+ | AstBinaryOperator.equals: WriteChar(context^.output, '=')
+ | AstBinaryOperator.not_equals: WriteChar(context^.output, '#')
+ | AstBinaryOperator.less: WriteChar(context^.output, '<')
+ | AstBinaryOperator.greater: WriteChar(context^.output, '>')
+ | AstBinaryOperator.less_equal: WriteString(context^.output, "<=")
+ | AstBinaryOperator.greater_equal: WriteString(context^.output, ">=")
+ | AstBinaryOperator.disjunction: WriteString(context^.output, "OR")
+ | AstBinaryOperatorConjunction: WriteString(context^.output, "AND")
+ end
+end;
+
+proc transpile_expression(context: ^TranspilerContext, expression: ^AstExpression);
+var
+ literal: ^AstLiteral;
+ buffer: [20]Char;
+ argument_index: Word;
+ current_argument: ^^AstExpression;
+begin
+ if expression^.kind = astExpressionKindLiteral then
+ literal := expression^.literal;
+
+ if literal^.kind = AstLiteralKind.integer then
+ IntToStr(literal^.integer, 0, buffer);
+ WriteString(context^.output, buffer)
+ end;
+ if literal^.kind = AstLiteralKind.string then
+ WriteString(context^.output, literal^.string)
+ end;
+ if literal^.kind = AstLiteralKind.null then
+ WriteString(context^.output, "NIL")
+ end;
+ if (literal^.kind = AstLiteralKind.boolean) & literal^.boolean then
+ WriteString(context^.output, "TRUE")
+ end;
+ if (literal^.kind = AstLiteralKind.boolean) & (literal^.boolean = false) then
+ WriteString(context^.output, "FALSE")
+ end
+ end;
+ if expression^.kind = astExpressionKindIdentifier then
+ transpile_identifier(context, expression^.identifier)
+ end;
+ if expression^.kind = astExpressionKindDereference then
+ transpile_expression(context, expression^.reference);
+ WriteChar(context^.output, '^')
+ end;
+ if expression^.kind = astExpressionKindArrayAccess then
+ transpile_expression(context, expression^.array);
+ WriteChar(context^.output, '[');
+ transpile_expression(context, expression^.index);
+ WriteChar(context^.output, ']')
+ end;
+ if expression^.kind = astExpressionKindFieldAccess then
+ transpile_expression(context, expression^.aggregate);
+ WriteChar(context^.output, '.');
+ transpile_identifier(contextexpression^.field)
+ end;
+ if expression^.kind = astExpressionKindUnary then
+ transpile_unary_operator(context, expression^.unary_operator);
+ transpile_expression(context, expression^.unary_operand)
+ end;
+ if expression^.kind = astExpressionKindBinary then
+ WriteChar(context^.output, '(');
+ transpile_expression(context, expression^.lhs);
+ WriteChar(context^.output, ' ');
+ transpile_binary_operator(context, expression^.binary_operator);
+ WriteChar(context^.output, ' ');
+ transpile_expression(context, expression^.rhs);
+ WriteChar(context^.output, ')')
+ end;
+ if expression^.kind = astExpressionKindCall then
+ transpile_expression(context, expression^.callable);
+ WriteChar(context^.output, '(');
+
+ current_argument := expression^.arguments;
+ if expression^.argument_count > 0 then
+ transpile_expression(context, current_argument^);
+
+ argument_index := 1u;
+ current_argument := current_argument + 1;
+
+ while argument_index < expression^.argument_count do
+ WriteString(context^.output, ", ");
+
+ transpile_expression(context, current_argument^);
+
+ current_argument := current_argument + 1;
+ argument_index := argument_index + 1u
+ end
+ end;
+ WriteChar(context^.output, ')')
+ end
+end;
+
+proc transpile_if_statement(context: ^TranspilerContext, statement: ^AstStatement);
+begin
+ WriteString(context^.output, "IF ");
+ transpile_expression(context, statement^.if_condition);
+
+ WriteString(context^.output, " THEN");
+ WriteLine(context^.output);
+ context^.indentation := context^.indentation + 1u;
+
+ transpile_compound_statement(context, statement^.if_branch);
+ context^.indentation := context^.indentation - 1u;
+ indent(context);
+ WriteString(context^.output, "END")
+end;
+
+proc transpile_while_statement(context: ^TranspilerContext, statement: ^AstStatement);
+begin
+ WriteString(context^.output, "WHILE ");
+ transpile_expression(context, statement^.while_condition);
+
+ WriteString(context^.output, " DO");
+ WriteLine(context^.output);
+ context^.indentation := context^.indentation + 1u;
+
+ transpile_compound_statement(context, statement^.while_body);
+ context^.indentation := context^.indentation - 1u;
+ indent(context);
+ WriteString(context^.output, "END")
+end;
+
+proc transpile_assignment_statement(context: ^TranspilerContext, statement: ^AstStatement);
+begin
+ transpile_expression(context, statement^.assignee);
+ WriteString(context^.output, " := ");
+ transpile_expression(context, statement^.assignment)
+end;
+
+proc transpile_return_statement(context: ^TranspilerContext, statement: ^AstStatement);
+begin
+ WriteString(context^.output, "RETURN ");
+
+ transpile_expression(context, statement^.returned)
+end;
+
+proc transpile_compound_statement(context: ^TranspilerContext, statement: AstCompoundStatement);
+var
+ current_statement: ^^AstStatement;
+ index: Word;
+begin
+ index := 0;
+ current_statement := statement.statements;
+
+ while index < statement.count do
+ transpile_statement(context, current_statement^);
+
+ current_statement := current_statement + 1;
+ index := index + 1u;
+
+ if index <> statement.count then
+ WriteChar(context^.output, ';')
+ end;
+ WriteLine(context^.output)
+ end
+end;
+
+proc transpile_statement(context: ^TranspilerContext, statement: ^AstStatement);
+begin
+ indent(context);
+
+ if statement^.kind = astStatementKindIf then
+ transpile_if_statement(context, statement)
+ end;
+ if statement^.kind = astStatementKindWhile then
+ transpile_while_statement(context, statement)
+ end;
+ if statement^.kind = astStatementKindReturn then
+ transpile_return_statement(context, statement)
+ end;
+ if statement^.kind = astStatementKindAssignment then
+ transpile_assignment_statement(context, statement)
+ end;
+ if statement^.kind = astStatementKindCall then
+ transpile_expression(context, statement^.call)
+ end
+end;
+
+proc transpile_statement_part(context: ^TranspilerContext, compound: AstCompoundStatement);
+begin
+ if compound.count > 0 then
+ WriteString(context^.output, "BEGIN");
+ WriteLine(context^.output);
+
+ context^.indentation := context^.indentation + 1u;
+ transpile_compound_statement(context, compound);
+ context^.indentation := context^.indentation - 1u;
+ end
+end;
+
+proc transpile_procedure_declaration(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration);
+begin
+ transpile_procedure_heading(context, declaration);
+
+ transpile_constant_part(context, declaration^.constants, false);
+ transpile_variable_part(context, declaration^.variables, false);
+ transpile_statement_part(context, declaration^.statements);
+
+ WriteString(context^.output, "END ");
+ transpile_identifier(context^.output, declaration^.name);
+
+ write_semicolon(context^.output)
+end;
+
+proc transpile_procedure_part(context: ^TranspilerContext, declaration: ^^AstProcedureDeclaration);
+begin
+ while declaration^ <> nil do
+ transpile_procedure_declaration(context, declaration^);
+ WriteLine(context^.output);
+
+ declaration := declaration + 1
+ end
+end;
+
+proc transpile_module_name(context: ^TranspilerContext);
+var
+ counter: Word;
+ last_slash: Word;
+begin
+ counter := 1u;
+ last_slash := 0u;
+
+ while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do
+ if context^.input_name[counter] = '/' then
+ last_slash := counter
+ end;
+ counter := counter + 1u
+ end;
+
+ if last_slash = 0u then
+ counter := 1u
+ end;
+ if last_slash <> 0u then
+ counter := last_slash + 1u
+ end;
+ while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do
+ WriteChar(context^.output, context^.input_name[counter]);
+ counter := counter + 1u
+ end
+end;
+
+proc transpile*(ast_module: ^AstModule, output: File, definition: File, input_name: String);
+var
+ context: TranspilerContext;
+begin
+ context.input_name := input_name;
+ context.output := output;
+ context.definition := definition;
+ context.indentation := 0u;
+
+ transpile_module(@context, ast_module)
+end;
+
+end.
diff --git a/source/cctype.elna b/source/cctype.elna
new file mode 100644
index 0000000..3906cd1
--- /dev/null
+++ b/source/cctype.elna
@@ -0,0 +1,14 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+proc isdigit*(c: Int ) -> Int; extern;
+proc isalnum*(c: Int) -> Int; extern;
+proc isalpha*(c: Int) -> Int; extern;
+proc isspace*(c: Int) -> Int; extern;
+
+proc tolower*(c: Int) -> Int; extern;
+proc toupper*(c: Int) -> Int; extern;
+
+end.
diff --git a/source/command_line_interface.elna b/source/command_line_interface.elna
new file mode 100644
index 0000000..040fdeb
--- /dev/null
+++ b/source/command_line_interface.elna
@@ -0,0 +1,93 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+
+(*
+ Command line handling.
+*)
+module;
+
+import cstdlib, cstring, common;
+
+type
+ CommandLine* = record
+ input: ^Char;
+ output: ^Char;
+ lex: Bool;
+ parse: Bool
+ end;
+
+proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
+var
+ parameter: ^Char;
+ i: Int;
+ result: ^CommandLine;
+ parsed: Bool;
+begin
+ i := 1;
+ result := cast(malloc(#size(CommandLine)): ^CommandLine);
+ result^.lex := false;
+ result^.parse := false;
+ result^.input := nil;
+ result^.output := nil;
+
+ while i < argc & result <> nil do
+ parameter := (argv + i)^;
+ parsed := false;
+
+ if strcmp(parameter, "--lex\0".ptr) = 0 then
+ parsed := true;
+ result^.lex := true
+ end;
+ if strcmp(parameter, "--parse\0".ptr) = 0 then
+ parsed := true;
+ result^.parse := true
+ end;
+ if strcmp(parameter, "-o\0".ptr) = 0 then
+ i := i + 1;
+
+ if i = argc then
+ write_s("Fatal error: expecting a file name following -o.");
+ result := nil
+ end;
+ if i < argc then
+ parameter := (argv + i)^;
+ result^.output := parameter
+ end;
+ parsed := true
+ end;
+ if (parameter^ <> '-') & ~parsed then
+ parsed := true;
+
+ if result^.input <> nil then
+ write_s("Fatal error: only one source file can be compiled at once. First given \"");
+ write_z(result^.input);
+ write_s("\", then \"");
+ write_z(parameter);
+ write_s("\".\n");
+ result := nil
+ end;
+ if result <> nil then
+ result^.input := parameter
+ end
+ end;
+ if ~parsed then
+ write_s("Fatal error: unknown command line options: ");
+
+ write_z(parameter);
+ write_s(".\n");
+
+ result := nil
+ end;
+
+ i := i + 1
+ end;
+ if result <> nil & result^.input = nil then
+ write_s("Fatal error: no input files.\n");
+ result := nil
+ end;
+
+ return result
+end;
+
+end.
diff --git a/source/common.elna b/source/common.elna
new file mode 100644
index 0000000..e7b30ca
--- /dev/null
+++ b/source/common.elna
@@ -0,0 +1,72 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+import cstring, cstdio;
+
+type
+ Identifier = [256]Char;
+ TextLocation* = record
+ line: Word;
+ column: Word
+ end;
+
+proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern;
+
+proc write_s*(value: String);
+begin
+ (* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *)
+ write(1, cast(value.ptr: Pointer), cast(value.length: Int))
+end;
+
+proc write_z*(value: ^Char);
+begin
+ write(1, cast(value: Pointer), cast(strlen(value): Int))
+end;
+
+proc write_b*(value: Bool);
+begin
+ if value then
+ write_s("true")
+ else
+ write_s("false")
+ end
+end;
+
+proc write_c*(value: Char);
+begin
+ putchar(cast(value: Int));
+ fflush(nil)
+end;
+
+proc write_i*(value: Int);
+var
+ digit: Int;
+ n: Word;
+ buffer: [10]Char;
+begin
+ n := 10u;
+
+ if value = 0 then
+ write_c('0')
+ end;
+ while value <> 0 do
+ digit := value % 10;
+ value := value / 10;
+
+ buffer[n] := cast(cast('0': Int) + digit: Char);
+ n := n - 1u
+ end;
+ while n < 10u do
+ n := n + 1u;
+ write_c(buffer[n])
+ end
+end;
+
+proc write_u*(value: Word);
+begin
+ write_i(cast(value: Int))
+end;
+
+end.
diff --git a/source/cstdio.elna b/source/cstdio.elna
new file mode 100644
index 0000000..c7507ff
--- /dev/null
+++ b/source/cstdio.elna
@@ -0,0 +1,29 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+type
+ FILE* = record end;
+
+var
+ stdin*: ^FILE := extern;
+ stdout*: ^FILE := extern;
+ stderr*: ^FILE := extern;
+
+proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern;
+proc fclose*(stream: ^FILE) -> Int; extern;
+proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern;
+proc rewind*(stream: ^FILE); extern;
+proc ftell*(stream: ^FILE) -> Int; extern;
+proc fflush*(stream: ^FILE) -> Int; extern;
+
+proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern;
+proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern;
+
+proc perror(s: ^Char); extern;
+
+proc puts(s: ^Char) -> Int; extern;
+proc putchar(c: Int) -> Int; extern;
+
+end.
diff --git a/source/cstdlib.elna b/source/cstdlib.elna
new file mode 100644
index 0000000..da2029c
--- /dev/null
+++ b/source/cstdlib.elna
@@ -0,0 +1,15 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+proc malloc(size: Word) -> Pointer; extern;
+proc free(ptr: Pointer); extern;
+proc calloc(nmemb: Word, size: Word) -> Pointer; extern;
+proc realloc(ptr: Pointer, size: Word) -> Pointer; extern;
+
+proc atoi(str: ^Char) -> Int; extern;
+
+proc exit(code: Int) -> !; extern;
+
+end.
diff --git a/source/cstring.elna b/source/cstring.elna
new file mode 100644
index 0000000..24d852a
--- /dev/null
+++ b/source/cstring.elna
@@ -0,0 +1,15 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern;
+proc memcpy(dst: Pointer, src: Pointer, n: Word); extern;
+
+proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern;
+proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern;
+proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern;
+proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern;
+proc strlen(ptr: ^Char) -> Word; extern;
+
+end.
diff --git a/source/lexer.elna b/source/lexer.elna
new file mode 100644
index 0000000..d5f529b
--- /dev/null
+++ b/source/lexer.elna
@@ -0,0 +1,952 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+module;
+
+import cstdio, cstring, cctype, cstdlib, common;
+
+const
+ CHUNK_SIZE := 85536u;
+
+type
+ (*
+ * Classification table assigns each possible character to a group (class). All
+ * characters of the same group are handled equivalently.
+ *
+ * Classification:
+ *)
+ TransitionClass = (
+ invalid,
+ digit,
+ alpha,
+ space,
+ colon,
+ equals,
+ left_paren,
+ right_paren,
+ asterisk,
+ underscore,
+ single,
+ hex,
+ zero,
+ x,
+ eof,
+ dot,
+ minus,
+ single_quote,
+ double_quote,
+ greater,
+ less,
+ other
+ );
+ TransitionState = (
+ start,
+ colon,
+ identifier,
+ decimal,
+ greater,
+ minus,
+ left_paren,
+ less,
+ dot,
+ comment,
+ closing_comment,
+ character,
+ string,
+ leading_zero,
+ decimal_suffix,
+ finish
+ );
+ LexerToken = record
+ kind: LexerKind;
+ value: union
+ booleanKind: Bool;
+ identifierKind: Identifier;
+ integerKind: Int;
+ stringKind: String
+ end;
+ start_location: TextLocation;
+ end_location: TextLocation
+ end;
+ TransitionAction = proc(^Lexer, ^LexerToken);
+ Transition = record
+ action: TransitionAction;
+ next_state: TransitionState
+ end;
+ TransitionClasses = [22]Transition;
+
+ BufferPosition* = record
+ iterator: ^Char;
+ location: TextLocation
+ end;
+ Lexer* = record
+ input: ^FILE;
+ buffer: ^Char;
+ size: Word;
+ length: Word;
+ start: BufferPosition;
+ current: BufferPosition
+ end;
+ LexerKind* = (
+ unknown,
+ identifier,
+ _if,
+ _then,
+ _else,
+ _elsif,
+ _while,
+ _do,
+ _proc,
+ _begin,
+ _end,
+ _extern,
+ _const,
+ _var,
+ _case,
+ _of,
+ _type,
+ _record,
+ _union,
+ pipe,
+ to,
+ boolean,
+ null,
+ and,
+ _or,
+ _xor,
+ not,
+ _return,
+ _cast,
+ shift_left,
+ shift_right,
+ left_paren,
+ right_paren,
+ left_square,
+ right_square,
+ greater_equal,
+ less_equal,
+ greater_than,
+ less_than,
+ not_equal,
+ equal,
+ semicolon,
+ dot,
+ comma,
+ plus,
+ minus,
+ multiplication,
+ division,
+ remainder,
+ assignment,
+ colon,
+ hat,
+ at,
+ comment,
+ integer,
+ word,
+ character,
+ string,
+ _defer,
+ exclamation,
+ arrow,
+ trait,
+ _program,
+ _module,
+ _import
+ );
+
+var
+ classification: [128]TransitionClass;
+ transitions: [16]TransitionClasses;
+
+proc initialize_classification();
+var
+ i: Word;
+begin
+ classification[1] := TransitionClass.eof; (* NUL *)
+ classification[2] := TransitionClass.invalid; (* SOH *)
+ classification[3] := TransitionClass.invalid; (* STX *)
+ classification[4] := TransitionClass.invalid; (* ETX *)
+ classification[5] := TransitionClass.invalid; (* EOT *)
+ classification[6] := TransitionClass.invalid; (* EMQ *)
+ classification[7] := TransitionClass.invalid; (* ACK *)
+ classification[8] := TransitionClass.invalid; (* BEL *)
+ classification[9] := TransitionClass.invalid; (* BS *)
+ classification[10] := TransitionClass.space; (* HT *)
+ classification[11] := TransitionClass.space; (* LF *)
+ classification[12] := TransitionClass.invalid; (* VT *)
+ classification[13] := TransitionClass.invalid; (* FF *)
+ classification[14] := TransitionClass.space; (* CR *)
+ classification[15] := TransitionClass.invalid; (* SO *)
+ classification[16] := TransitionClass.invalid; (* SI *)
+ classification[17] := TransitionClass.invalid; (* DLE *)
+ classification[18] := TransitionClass.invalid; (* DC1 *)
+ classification[19] := TransitionClass.invalid; (* DC2 *)
+ classification[20] := TransitionClass.invalid; (* DC3 *)
+ classification[21] := TransitionClass.invalid; (* DC4 *)
+ classification[22] := TransitionClass.invalid; (* NAK *)
+ classification[23] := TransitionClass.invalid; (* SYN *)
+ classification[24] := TransitionClass.invalid; (* ETB *)
+ classification[25] := TransitionClass.invalid; (* CAN *)
+ classification[26] := TransitionClass.invalid; (* EM *)
+ classification[27] := TransitionClass.invalid; (* SUB *)
+ classification[28] := TransitionClass.invalid; (* ESC *)
+ classification[29] := TransitionClass.invalid; (* FS *)
+ classification[30] := TransitionClass.invalid; (* GS *)
+ classification[31] := TransitionClass.invalid; (* RS *)
+ classification[32] := TransitionClass.invalid; (* US *)
+ classification[33] := TransitionClass.space; (* Space *)
+ classification[34] := TransitionClass.single; (* ! *)
+ classification[35] := TransitionClass.double_quote; (* " *)
+ classification[36] := TransitionClass.other; (* # *)
+ classification[37] := TransitionClass.other; (* $ *)
+ classification[38] := TransitionClass.single; (* % *)
+ classification[39] := TransitionClass.single; (* & *)
+ classification[40] := TransitionClass.single_quote; (* ' *)
+ classification[41] := TransitionClass.left_paren; (* ( *)
+ classification[42] := TransitionClass.right_paren; (* ) *)
+ classification[43] := TransitionClass.asterisk; (* * *)
+ classification[44] := TransitionClass.single; (* + *)
+ classification[45] := TransitionClass.single; (* , *)
+ classification[46] := TransitionClass.minus; (* - *)
+ classification[47] := TransitionClass.dot; (* . *)
+ classification[48] := TransitionClass.single; (* / *)
+ classification[49] := TransitionClass.zero; (* 0 *)
+ classification[50] := TransitionClass.digit; (* 1 *)
+ classification[51] := TransitionClass.digit; (* 2 *)
+ classification[52] := TransitionClass.digit; (* 3 *)
+ classification[53] := TransitionClass.digit; (* 4 *)
+ classification[54] := TransitionClass.digit; (* 5 *)
+ classification[55] := TransitionClass.digit; (* 6 *)
+ classification[56] := TransitionClass.digit; (* 7 *)
+ classification[57] := TransitionClass.digit; (* 8 *)
+ classification[58] := TransitionClass.digit; (* 9 *)
+ classification[59] := TransitionClass.colon; (* : *)
+ classification[60] := TransitionClass.single; (* ; *)
+ classification[61] := TransitionClass.less; (* < *)
+ classification[62] := TransitionClass.equals; (* = *)
+ classification[63] := TransitionClass.greater; (* > *)
+ classification[64] := TransitionClass.other; (* ? *)
+ classification[65] := TransitionClass.single; (* @ *)
+ classification[66] := TransitionClass.alpha; (* A *)
+ classification[67] := TransitionClass.alpha; (* B *)
+ classification[68] := TransitionClass.alpha; (* C *)
+ classification[69] := TransitionClass.alpha; (* D *)
+ classification[70] := TransitionClass.alpha; (* E *)
+ classification[71] := TransitionClass.alpha; (* F *)
+ classification[72] := TransitionClass.alpha; (* G *)
+ classification[73] := TransitionClass.alpha; (* H *)
+ classification[74] := TransitionClass.alpha; (* I *)
+ classification[75] := TransitionClass.alpha; (* J *)
+ classification[76] := TransitionClass.alpha; (* K *)
+ classification[77] := TransitionClass.alpha; (* L *)
+ classification[78] := TransitionClass.alpha; (* M *)
+ classification[79] := TransitionClass.alpha; (* N *)
+ classification[80] := TransitionClass.alpha; (* O *)
+ classification[81] := TransitionClass.alpha; (* P *)
+ classification[82] := TransitionClass.alpha; (* Q *)
+ classification[83] := TransitionClass.alpha; (* R *)
+ classification[84] := TransitionClass.alpha; (* S *)
+ classification[85] := TransitionClass.alpha; (* T *)
+ classification[86] := TransitionClass.alpha; (* U *)
+ classification[87] := TransitionClass.alpha; (* V *)
+ classification[88] := TransitionClass.alpha; (* W *)
+ classification[89] := TransitionClass.alpha; (* X *)
+ classification[90] := TransitionClass.alpha; (* Y *)
+ classification[91] := TransitionClass.alpha; (* Z *)
+ classification[92] := TransitionClass.single; (* [ *)
+ classification[93] := TransitionClass.other; (* \ *)
+ classification[94] := TransitionClass.single; (* ] *)
+ classification[95] := TransitionClass.single; (* ^ *)
+ classification[96] := TransitionClass.underscore; (* _ *)
+ classification[97] := TransitionClass.other; (* ` *)
+ classification[98] := TransitionClass.hex; (* a *)
+ classification[99] := TransitionClass.hex; (* b *)
+ classification[100] := TransitionClass.hex; (* c *)
+ classification[101] := TransitionClass.hex; (* d *)
+ classification[102] := TransitionClass.hex; (* e *)
+ classification[103] := TransitionClass.hex; (* f *)
+ classification[104] := TransitionClass.alpha; (* g *)
+ classification[105] := TransitionClass.alpha; (* h *)
+ classification[106] := TransitionClass.alpha; (* i *)
+ classification[107] := TransitionClass.alpha; (* j *)
+ classification[108] := TransitionClass.alpha; (* k *)
+ classification[109] := TransitionClass.alpha; (* l *)
+ classification[110] := TransitionClass.alpha; (* m *)
+ classification[111] := TransitionClass.alpha; (* n *)
+ classification[112] := TransitionClass.alpha; (* o *)
+ classification[113] := TransitionClass.alpha; (* p *)
+ classification[114] := TransitionClass.alpha; (* q *)
+ classification[115] := TransitionClass.alpha; (* r *)
+ classification[116] := TransitionClass.alpha; (* s *)
+ classification[117] := TransitionClass.alpha; (* t *)
+ classification[118] := TransitionClass.alpha; (* u *)
+ classification[119] := TransitionClass.alpha; (* v *)
+ classification[120] := TransitionClass.alpha; (* w *)
+ classification[121] := TransitionClass.x; (* x *)
+ classification[122] := TransitionClass.alpha; (* y *)
+ classification[123] := TransitionClass.alpha; (* z *)
+ classification[124] := TransitionClass.other; (* { *)
+ classification[125] := TransitionClass.single; (* | *)
+ classification[126] := TransitionClass.other; (* } *)
+ classification[127] := TransitionClass.single; (* ~ *)
+ classification[128] := TransitionClass.invalid; (* DEL *)
+
+ i := 129u;
+ while i <= 256u do
+ classification[i] := TransitionClass.other;
+ i := i + 1u
+ end
+end;
+
+proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
+var
+ result: Bool;
+ index: Word;
+ continue: Bool;
+begin
+ index := 0u;
+ result := true;
+ continue := (index < keyword.length) & (token_start.iterator <> token_end);
+
+ while continue & result do
+ result := keyword[index] = token_start.iterator^
+ or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^;
+ token_start.iterator := token_start.iterator + 1;
+ index := index + 1u;
+ continue := (index < keyword.length) & (token_start.iterator <> token_end)
+ end;
+ result := result & index = keyword.length;
+
+ return result & (token_start.iterator = token_end)
+end;
+
+(* Reached the end of file. *)
+proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
+begin
+ token^.kind := LexerKind.unknown
+end;
+
+proc increment(position: ^BufferPosition);
+begin
+ position^.iterator := position^.iterator + 1
+end;
+
+(* Add the character to the token currently read and advance to the next character. *)
+proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
+begin
+ increment(@lexer^.current)
+end;
+
+(* The current character is not a part of the token. Finish the token already
+ * read. Don't advance to the next character. *)
+proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
+begin
+ if lexer^.start.iterator^ = ':' then
+ token^.kind := LexerKind.colon
+ end;
+ if lexer^.start.iterator^ = '>' then
+ token^.kind := LexerKind.greater_than
+ end;
+ if lexer^.start.iterator^ = '<' then
+ token^.kind := LexerKind.less_than
+ end;
+ if lexer^.start.iterator^ = '(' then
+ token^.kind := LexerKind.left_paren
+ end;
+ if lexer^.start.iterator^ = '-' then
+ token^.kind := LexerKind.minus
+ end;
+ if lexer^.start.iterator^ = '.' then
+ token^.kind := LexerKind.dot
+ end
+end;
+
+(* An action for tokens containing multiple characters. *)
+proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
+begin
+ if lexer^.start.iterator^ = '<' then
+ if lexer^.current.iterator^ = '>' then
+ token^.kind := LexerKind.not_equal
+ end;
+ if lexer^.current.iterator^ = '=' then
+ token^.kind := LexerKind.less_equal
+ end
+ end;
+ if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then
+ token^.kind := LexerKind.greater_equal
+ end;
+ if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then
+ token^.kind := LexerKind.assignment
+ end;
+ if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then
+ token^.kind := LexerKind.arrow
+ end;
+ increment(@lexer^.current)
+end;
+
+(* Skip a space. *)
+proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
+begin
+ increment(@lexer^.start);
+
+ if lexer^.start.iterator^ = '\n' then
+ lexer^.start.location.line := lexer^.start.location.line + 1u;
+ lexer^.start.location.column := 1u
+ end;
+ lexer^.current := lexer^.start
+end;
+
+(* Delimited string action. *)
+proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
+var
+ text_length: Word;
+begin
+ if lexer^.start.iterator^ = '(' then
+ token^.kind := LexerKind.comment
+ end;
+ if lexer^.start.iterator^ = '"' then
+ text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
+
+ token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
+ memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
+
+ token^.kind := LexerKind.character
+ end;
+ if lexer^.start.iterator^ = '\'' then
+ text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word);
+
+ token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length);
+ memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length);
+
+ token^.kind := LexerKind.string
+ end;
+ increment(@lexer^.current)
+end;
+
+(* Finalize keyword or identifier. *)
+proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
+begin
+ token^.kind := LexerKind.identifier;
+
+ token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char);
+ memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word));
+
+ if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._program
+ end;
+ if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._import
+ end;
+ if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._const
+ end;
+ if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._var
+ end;
+ if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._if
+ end;
+ if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._then
+ end;
+ if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._elsif
+ end;
+ if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._else
+ end;
+ if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._while
+ end;
+ if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._do
+ end;
+ if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._proc
+ end;
+ if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._begin
+ end;
+ if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._end
+ end;
+ if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._type
+ end;
+ if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._record
+ end;
+ if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._union
+ end;
+ if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind.null
+ end;
+ if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._or
+ end;
+ if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._return
+ end;
+ if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._defer
+ end;
+ if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind.to
+ end;
+ if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._case
+ end;
+ if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._of
+ end;
+ if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._module
+ end;
+ if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind._xor
+ end;
+ if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind.boolean;
+ token^.value.booleanKind := true
+ end;
+ if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
+ token^.kind := LexerKind.boolean;
+ token^.value.booleanKind := false
+ end
+end;
+
+(* Action for tokens containing only one character. The character cannot be
+ * followed by other characters forming a composite token. *)
+proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
+begin
+ if lexer^.current.iterator^ = '&' then
+ token^.kind := LexerKind.and
+ end;
+ if lexer^.current.iterator^ = ';' then
+ token^.kind := LexerKind.semicolon
+ end;
+ if lexer^.current.iterator^ = ',' then
+ token^.kind := LexerKind.comma
+ end;
+ if lexer^.current.iterator^ = '~' then
+ token^.kind := LexerKind.not
+ end;
+ if lexer^.current.iterator^ = ')' then
+ token^.kind := LexerKind.right_paren
+ end;
+ if lexer^.current.iterator^ = '[' then
+ token^.kind := LexerKind.left_square
+ end;
+ if lexer^.current.iterator^ = ']' then
+ token^.kind := LexerKind.right_square
+ end;
+ if lexer^.current.iterator^ = '^' then
+ token^.kind := LexerKind.hat
+ end;
+ if lexer^.current.iterator^ = '=' then
+ token^.kind := LexerKind.equal
+ end;
+ if lexer^.current.iterator^ = '+' then
+ token^.kind := LexerKind.plus
+ end;
+ if lexer^.current.iterator^ = '*' then
+ token^.kind := LexerKind.multiplication
+ end;
+ if lexer^.current.iterator^ = '/' then
+ token^.kind := LexerKind.division
+ end;
+ if lexer^.current.iterator^ = '%' then
+ token^.kind := LexerKind.remainder
+ end;
+ if lexer^.current.iterator^ = '@' then
+ token^.kind := LexerKind.at
+ end;
+ if lexer^.current.iterator^ = '|' then
+ token^.kind := LexerKind.pipe
+ end;
+ increment(@lexer^.current)
+end;
+
+(* Handle an integer literal. *)
+proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
+var
+ buffer: String;
+ integer_length: Word;
+ found: Bool;
+begin
+ token^.kind := LexerKind.integer;
+
+ integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word);
+ memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier));
+ memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length);
+
+ token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0';
+ token^.value.integerKind := atoi(@token^.value.identifierKind[2])
+end;
+
+proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int;
+var
+ default_transition: Transition;
+ state_index: Int;
+begin
+ default_transition.action := default_action;
+ default_transition.next_state := next_state;
+ state_index := cast(current_state: Int) + 1;
+
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition;
+ transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition;
+
+ return state_index
+end;
+
+(*
+ * The transition table describes transitions from one state to another, given
+ * a symbol (character class).
+ *
+ * The table has m rows and n columns, where m is the amount of states and n is
+ * the amount of classes. So given the current state and a classified character
+ * the table can be used to look up the next state.
+ *
+ * Each cell is a word long.
+ * - The least significant byte of the word is a row number (beginning with 0).
+ * It specifies the target state. "ff" means that this is an end state and no
+ * transition is possible.
+ * - The next byte is the action that should be performed when transitioning.
+ * For the meaning of actions see labels in the lex_next function, which
+ * handles each action.
+ *)
+proc initialize_transitions();
+var
+ state_index: Int;
+begin
+ (* Start state. *)
+ state_index := cast(TransitionState.start: Int) + 1;
+
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
+
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip;
+ transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start;
+
+ transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon;
+
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single;
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren;
+
+ transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single;
+ transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single;
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single;
+ transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero;
+
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof;
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot;
+
+ transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus;
+
+ transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character;
+
+ transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string;
+
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater;
+
+ transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less;
+
+ transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish;
+
+ (* Colon state. *)
+ state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
+
+ (* Identifier state. *)
+ state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier;
+
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier;
+
+ (* Decimal state. *)
+ state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal;
+
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix;
+
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix;
+
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal;
+
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix;
+
+ (* Greater state. *)
+ state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
+
+ (* Minus state. *)
+ state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
+
+ (* Left paren state. *)
+ state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment;
+
+ (* Less state. *)
+ state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite;
+ transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite;
+ transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish;
+
+ (* Hexadecimal after 0x. *)
+ state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite;
+ transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish;
+
+ (* Comment. *)
+ state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment);
+
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
+
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
+
+ (* Closing comment. *)
+ state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment);
+
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited;
+ transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate;
+ transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment;
+
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
+
+ (* Character. *)
+ state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character);
+
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited;
+ transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish;
+
+ (* String. *)
+ state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string);
+
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited;
+ transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish;
+
+ (* Leading zero. *)
+ state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish;
+
+ (* Digit with a character suffix. *)
+ state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish);
+
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish;
+
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil;
+ transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish
+end;
+
+proc lexer_make*(lexer: ^Lexer, input: ^FILE);
+begin
+ lexer^.input := input;
+ lexer^.length := 0u;
+
+ lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char);
+ memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE);
+ lexer^.size := CHUNK_SIZE
+end;
+
+(* Returns the last read token. *)
+proc lexer_current*(lexer: ^Lexer) -> LexerToken;
+var
+ current_class: TransitionClass;
+ current_state: TransitionState;
+ current_transition: Transition;
+ result: LexerToken;
+ index1: Word;
+ index2: Word;
+begin
+ lexer^.current := lexer^.start;
+ current_state := TransitionState.start;
+
+ while current_state <> TransitionState.finish do
+ index1 := cast(lexer^.current.iterator^: Word) + 1u;
+ current_class := classification[index1];
+
+ index1 := cast(current_state: Word) + 1u;
+ index2 := cast(current_class: Word) + 1u;
+
+ current_transition := transitions[index1][index2];
+ if current_transition.action <> nil then
+ current_transition.action(lexer, @result)
+ end;
+ current_state := current_transition.next_state
+ end;
+ result.start_location := lexer^.start.location;
+ result.end_location := lexer^.current.location;
+
+ return result
+end;
+
+(* Read and return the next token. *)
+proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
+var
+ result: LexerToken;
+begin
+ if lexer^.length = 0u then
+ lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input);
+ lexer^.current.location.column := 1u;
+ lexer^.current.location.line := 1u;
+ lexer^.current.iterator := lexer^.buffer
+ end;
+ lexer^.start := lexer^.current;
+
+ result := lexer_current(lexer);
+ return result
+end;
+
+proc lexer_destroy*(lexer: ^Lexer);
+begin
+ free(cast(lexer^.buffer: Pointer))
+end;
+
+proc lexer_initialize();
+begin
+ initialize_classification();
+ initialize_transitions()
+end;
+
+end.
diff --git a/source/main.elna b/source/main.elna
new file mode 100644
index 0000000..dae045b
--- /dev/null
+++ b/source/main.elna
@@ -0,0 +1,841 @@
+(* This Source Code Form is subject to the terms of the Mozilla Public License,
+ v. 2.0. If a copy of the MPL was not distributed with this file, You can
+ obtain one at https://mozilla.org/MPL/2.0/. *)
+program;
+
+import cstdio, cctype, common, command_line_interface, lexer;
+
+type
+ SourceFile* = record
+ buffer: [1024]Char;
+ handle: ^FILE;
+ size: Word;
+ index: Word
+ end;
+ StringBuffer* = record
+ data: Pointer;
+ size: Word;
+ capacity: Word
+ end;
+ SourceCode = record
+ position: TextLocation;
+
+ input: Pointer;
+ empty: proc(Pointer) -> Bool;
+ advance: proc(Pointer);
+ head: proc(Pointer) -> Char
+ end;
+ Token* = record
+ kind: LexerKind;
+ value: union
+ int_value: Int;
+ string: String;
+ boolean_value: Bool;
+ char_value: Char
+ end
+ end;
+ Tokenizer* = record
+ length: Word;
+ data: ^Token
+ end;
+
+(*
+ Standard procedures.
+*)
+proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer;
+ return realloc(ptr, n * size)
+end;
+
+proc substring(string: String, start: Word, count: Word) -> String;
+ return String(string.ptr + start, count)
+end;
+
+proc open_substring(string: String, start: Word) -> String;
+ return substring(string, start, string.length - start)
+end;
+
+proc string_dup(origin: String) -> String;
+var
+ copy: ^Char;
+begin
+ copy := cast(malloc(origin.length): ^Char);
+ strncpy(copy, origin.ptr, origin.length);
+
+ return String(copy, origin.length)
+end;
+
+proc string_buffer_new() -> StringBuffer;
+var
+ result: StringBuffer;
+begin
+ result.capacity := 64u;
+ result.data := malloc(result.capacity);
+ result.size := 0u;
+
+ return result
+end;
+
+proc string_buffer_push(buffer: ^StringBuffer, char: Char);
+begin
+ if buffer^.size >= buffer^.capacity then
+ buffer^.capacity := buffer^.capacity + 1024u;
+ buffer^.data := realloc(buffer^.data, buffer^.capacity)
+ end;
+ cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char);
+ buffer^.size := buffer^.size + 1u
+end;
+
+proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
+begin
+ buffer^.size := buffer^.size - count
+end;
+
+proc string_buffer_clear(buffer: ^StringBuffer) -> String;
+var
+ result: String;
+begin
+ result := String(cast(buffer^.data: ^Char), buffer^.size);
+ buffer^.size := 0u;
+ return result
+end;
+
+(*
+ Source code stream procedures.
+*)
+
+proc read_source(filename: ^Char) -> ^SourceFile;
+var
+ result: ^SourceFile;
+ file_handle: ^FILE;
+begin
+ file_handle := fopen(filename, "rb\0".ptr);
+
+ if file_handle <> nil then
+ result := cast(malloc(#size(SourceFile)): ^SourceFile);
+ result^.handle := file_handle;
+ result^.size := 0u;
+ result^.index := 1u
+ end;
+ return result
+end;
+
+proc source_file_empty(source_input: Pointer) -> Bool;
+var
+ source_file: ^SourceFile;
+begin
+ source_file := cast(source_input: ^SourceFile);
+
+ if source_file^.index > source_file^.size then
+ source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle);
+ source_file^.index := 1u
+ end;
+
+ return source_file^.size = 0u
+end;
+
+proc source_file_head(source_input: Pointer) -> Char;
+var
+ source_file: ^SourceFile;
+begin
+ source_file := cast(source_input: ^SourceFile);
+
+ return source_file^.buffer[source_file^.index]
+end;
+
+proc source_file_advance(source_input: Pointer);
+var
+ source_file: ^SourceFile;
+begin
+ source_file := cast(source_input: ^SourceFile);
+
+ source_file^.index := source_file^.index + 1u
+end;
+
+proc source_code_empty(source_code: ^SourceCode) -> Bool;
+ return source_code^.empty(source_code^.input)
+end;
+
+proc source_code_head(source_code: SourceCode) -> Char;
+ return source_code.head(source_code.input)
+end;
+
+proc source_code_advance(source_code: ^SourceCode);
+begin
+ source_code^.advance(source_code^.input);
+ source_code^.position.column := source_code^.position.column
+end;
+
+proc source_code_break(source_code: ^SourceCode);
+begin
+ source_code^.position.line := source_code^.position.line + 1u;
+ source_code^.position.column := 0u
+end;
+
+proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
+ return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
+end;
+
+(*
+ Token procedures.
+*)
+
+proc lexer_escape(escape: Char, result: ^Char) -> Bool;
+var
+ successful: Bool;
+begin
+ case escape of
+ 'n':
+ result^ := '\n';
+ successful := true
+ | 'a':
+ result^ := '\a';
+ successful := true
+ | 'b':
+ result^ := '\b';
+ successful := true
+ | 't':
+ result^ := '\t';
+ successful := true
+ | 'f':
+ result^ := '\f';
+ successful := true
+ | 'r':
+ result^ := '\r';
+ successful := true
+ | 'v':
+ result^ := '\v';
+ successful := true
+ | '\\':
+ result^ := '\\';
+ successful := true
+ | '\'':
+ result^ := '\'';
+ successful := true
+ | '"':
+ result^ := '"';
+ successful := true
+ | '?':
+ result^ := '\?';
+ successful := true
+ | '0':
+ result^ := '\0';
+ successful := true
+ else
+ successful := false
+ end;
+ return successful
+end;
+
+(* Skip spaces. *)
+proc lexer_spaces(source_code: ^SourceCode);
+var
+ current: Char;
+begin
+ while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do
+ current := source_code_head(source_code^);
+
+ if current = '\n' then
+ source_code_break(source_code)
+ end;
+ source_code_advance(source_code)
+ end
+end;
+
+(* Checker whether the character is allowed in an identificator. *)
+proc lexer_is_ident(char: Char) -> Bool;
+ return isalnum(cast(char: Int)) <> 0 or char = '_'
+end;
+
+proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
+var
+ content_length: Word;
+begin
+ while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do
+ string_buffer_push(token_content, source_code_head(source_code^));
+ source_code_advance(source_code)
+ end
+end;
+
+proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
+var
+ trailing: Word;
+begin
+ trailing := 0u;
+
+ while ~source_code_empty(source_code) & trailing < 2u do
+ if source_code_head(source_code^) = '*' then
+ string_buffer_push(token_content, '*');
+ trailing := 1u
+ elsif source_code_head(source_code^) = ')' & trailing = 1u then
+ string_buffer_pop(token_content, 1u);
+ trailing := 2u
+ else
+ string_buffer_push(token_content, source_code_head(source_code^));
+ trailing := 0u
+ end;
+ source_code_advance(source_code)
+ end;
+
+ return trailing = 2u
+end;
+
+proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
+var
+ successful: Bool;
+begin
+ successful := ~source_code_empty(source_code);
+
+ if successful then
+ if source_code_head(source_code^) = '\\' then
+ source_code_advance(source_code);
+
+ successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content)
+ else
+ token_content^ := source_code_head(source_code^);
+ successful := true
+ end
+ end;
+ if successful then
+ source_code_advance(source_code)
+ end;
+ return successful
+end;
+
+proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
+var
+ token_end, constructed_string: ^Char;
+ token_length: Word;
+ is_valid: Bool := true;
+ next_char: Char;
+begin
+ while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do
+ is_valid := lexer_character(source_code, @next_char);
+
+ if is_valid then
+ string_buffer_push(token_content, next_char)
+ end
+ end;
+
+ if is_valid & source_code_expect(source_code, '"') then
+ source_code_advance(source_code)
+ else
+ is_valid := false
+ end;
+ return is_valid
+end;
+
+proc lexer_number(source_code: ^SourceCode, token_content: ^Int);
+begin
+ token_content^ := 0;
+
+ while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do
+ token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int));
+
+ source_code_advance(source_code)
+ end
+end;
+
+(* Categorize an identifier. *)
+proc lexer_categorize(token_content: String) -> Token;
+var
+ current_token: Token;
+begin
+ if token_content = "if" then
+ current_token.kind := LexerKind._if
+ elsif token_content = "then" then
+ current_token.kind := LexerKind._then
+ elsif token_content = "else" then
+ current_token.kind := LexerKind._else
+ elsif token_content = "elsif" then
+ current_token.kind := LexerKind._elsif
+ elsif token_content = "while" then
+ current_token.kind := LexerKind._while
+ elsif token_content = "do" then
+ current_token.kind := LexerKind._do
+ elsif token_content = "proc" then
+ current_token.kind := LexerKind._proc
+ elsif token_content = "begin" then
+ current_token.kind := LexerKind._begin
+ elsif token_content = "end" then
+ current_token.kind := LexerKind._end
+ elsif token_content = "extern" then
+ current_token.kind := LexerKind._extern
+ elsif token_content = "const" then
+ current_token.kind := LexerKind._const
+ elsif token_content = "var" then
+ current_token.kind := LexerKind._var
+ elsif token_content = "case" then
+ current_token.kind := LexerKind._case
+ elsif token_content = "of" then
+ current_token.kind := LexerKind._of
+ elsif token_content = "type" then
+ current_token.kind := LexerKind._type
+ elsif token_content = "record" then
+ current_token.kind := LexerKind._record
+ elsif token_content = "union" then
+ current_token.kind := LexerKind._union
+ elsif token_content = "true" then
+ current_token.kind := LexerKind.boolean;
+ current_token.value.boolean_value := true
+ elsif token_content = "false" then
+ current_token.kind := LexerKind.boolean;
+ current_token.value.boolean_value := false
+ elsif token_content = "nil" then
+ current_token.kind := LexerKind.null
+ elsif token_content = "or" then
+ current_token.kind := LexerKind._or
+ elsif token_content = "return" then
+ current_token.kind := LexerKind._return
+ elsif token_content = "cast" then
+ current_token.kind := LexerKind._cast
+ elsif token_content = "defer" then
+ current_token.kind := LexerKind._defer
+ elsif token_content = "program" then
+ current_token.kind := LexerKind._program
+ elsif token_content = "module" then
+ current_token.kind := LexerKind._module
+ elsif token_content = "import" then
+ current_token.kind := LexerKind._import
+ else
+ current_token.kind := LexerKind.identifier;
+ current_token.value.string := string_dup(token_content)
+ end;
+
+ return current_token
+end;
+
+proc lexer_add_token(lexer: ^Tokenizer, token: Token);
+var
+ new_length: Word;
+begin
+ new_length := lexer^.length + 1u;
+ lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token);
+ (lexer^.data + lexer^.length)^ := token;
+ lexer^.length := new_length
+end;
+
+(* Read the next token from the input. *)
+proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token;
+var
+ current_token: Token;
+ first_char: Char;
+begin
+ current_token.kind := LexerKind.unknown;
+
+ first_char := source_code_head(source_code);
+
+ if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then
+ lexer_identifier(@source_code, token_buffer);
+ current_token := lexer_categorize(string_buffer_clear(token_buffer))
+ elsif first_char = '#' then
+ source_code_advance(@source_code);
+ lexer_identifier(@source_code, token_buffer);
+
+ current_token.kind := LexerKind.trait;
+ current_token.value.string := string_dup(string_buffer_clear(token_buffer))
+ elsif isdigit(cast(first_char: Int)) <> 0 then
+ lexer_number(@source_code, @current_token.value.int_value);
+
+ if source_code_expect(@source_code, 'u') then
+ current_token.kind := LexerKind.word;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.integer
+ end
+ elsif first_char = '(' then
+ source_code_advance(@source_code);
+
+ if source_code_empty(@source_code) then
+ current_token.kind := LexerKind.left_paren
+ elsif source_code_head(source_code) = '*' then
+ source_code_advance(@source_code);
+
+ if lexer_comment(@source_code, token_buffer) then
+ current_token.value.string := string_dup(string_buffer_clear(token_buffer));
+ current_token.kind := LexerKind.comment
+ else
+ current_token.kind := LexerKind.unknown
+ end
+ else
+ current_token.kind := LexerKind.left_paren
+ end
+ elsif first_char = ')' then
+ current_token.kind := LexerKind.right_paren;
+ source_code_advance(@source_code)
+ elsif first_char = '\'' then
+ source_code_advance(@source_code);
+
+ if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then
+ current_token.kind := LexerKind.character;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.unknown
+ end
+ elsif first_char = '"' then
+ source_code_advance(@source_code);
+
+ if lexer_string(@source_code, token_buffer) then
+ current_token.kind := LexerKind.string;
+ current_token.value.string := string_dup(string_buffer_clear(token_buffer))
+ else
+ current_token.kind := LexerKind.unknown
+ end
+ elsif first_char = '[' then
+ current_token.kind := LexerKind.left_square;
+ source_code_advance(@source_code)
+ elsif first_char = ']' then
+ current_token.kind := LexerKind.right_square;
+ source_code_advance(@source_code)
+ elsif first_char = '>' then
+ source_code_advance(@source_code);
+
+ if source_code_empty(@source_code) then
+ current_token.kind := LexerKind.greater_than
+ elsif source_code_head(source_code) = '=' then
+ current_token.kind := LexerKind.greater_equal;
+ source_code_advance(@source_code)
+ elsif source_code_head(source_code) = '>' then
+ current_token.kind := LexerKind.shift_right;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.greater_than
+ end
+ elsif first_char = '<' then
+ source_code_advance(@source_code);
+
+ if source_code_empty(@source_code) then
+ current_token.kind := LexerKind.less_than
+ elsif source_code_head(source_code) = '=' then
+ current_token.kind := LexerKind.less_equal;
+ source_code_advance(@source_code)
+ elsif source_code_head(source_code) = '<' then
+ current_token.kind := LexerKind.shift_left;
+ source_code_advance(@source_code)
+ elsif source_code_head(source_code) = '>' then
+ current_token.kind := LexerKind.not_equal;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.less_than
+ end
+ elsif first_char = '=' then
+ current_token.kind := LexerKind.equal;
+ source_code_advance(@source_code)
+ elsif first_char = ';' then
+ current_token.kind := LexerKind.semicolon;
+ source_code_advance(@source_code)
+ elsif first_char = '.' then
+ current_token.kind := LexerKind.dot;
+ source_code_advance(@source_code)
+ elsif first_char = ',' then
+ current_token.kind := LexerKind.comma;
+ source_code_advance(@source_code)
+ elsif first_char = '+' then
+ current_token.kind := LexerKind.plus;
+ source_code_advance(@source_code)
+ elsif first_char = '-' then
+ source_code_advance(@source_code);
+
+ if source_code_empty(@source_code) then
+ current_token.kind := LexerKind.minus
+ elsif source_code_head(source_code) = '>' then
+ current_token.kind := LexerKind.arrow;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.minus
+ end
+ elsif first_char = '*' then
+ current_token.kind := LexerKind.multiplication;
+ source_code_advance(@source_code)
+ elsif first_char = '/' then
+ current_token.kind := LexerKind.division;
+ source_code_advance(@source_code)
+ elsif first_char = '%' then
+ current_token.kind := LexerKind.remainder;
+ source_code_advance(@source_code)
+ elsif first_char = ':' then
+ source_code_advance(@source_code);
+
+ if source_code_empty(@source_code) then
+ current_token.kind := LexerKind.colon
+ elsif source_code_head(source_code) = '=' then
+ current_token.kind := LexerKind.assignment;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.colon
+ end
+ elsif first_char = '^' then
+ current_token.kind := LexerKind.hat;
+ source_code_advance(@source_code)
+ elsif first_char = '@' then
+ current_token.kind := LexerKind.at;
+ source_code_advance(@source_code)
+ elsif first_char = '!' then
+ current_token.kind := LexerKind.exclamation;
+ source_code_advance(@source_code)
+ elsif first_char = '&' then
+ current_token.kind := LexerKind.and;
+ source_code_advance(@source_code)
+ elsif first_char = '~' then
+ current_token.kind := LexerKind.not;
+ source_code_advance(@source_code)
+ elsif first_char = '|' then
+ current_token.kind := LexerKind.pipe;
+ source_code_advance(@source_code)
+ else
+ current_token.kind := LexerKind.unknown;
+ source_code_advance(@source_code)
+ end;
+
+ return current_token
+end;
+
+(* Split the source text into tokens. *)
+proc lexer_text(source_code: SourceCode) -> Tokenizer;
+var
+ current_token: Token;
+ token_buffer: StringBuffer;
+ lexer: Tokenizer;
+begin
+ lexer := Tokenizer(0u, nil);
+ token_buffer := string_buffer_new();
+
+ lexer_spaces(@source_code);
+
+ while ~source_code_empty(@source_code) do
+ current_token := lexer_next(source_code, @token_buffer);
+
+ if current_token.kind <> LexerKind.unknown then
+ lexer_add_token(@lexer, current_token);
+ lexer_spaces(@source_code)
+ else
+ write_s("Lexical analysis error on \"");
+ write_c(source_code_head(source_code));
+ write_s("\".\n")
+ end
+ end;
+
+ return lexer
+end;
+
+(*
+ Parser.
+*)
+
+proc parse(tokens: ^Token, tokens_size: Word);
+var
+ current_token: ^Token;
+ i: Word := 0u;
+begin
+ while i < tokens_size do
+ current_token := tokens + i;
+
+ case current_token^.kind of
+ LexerKind._if:
+ write_s("IF")
+ | LexerKind._then:
+ write_s("THEN")
+ | LexerKind._else:
+ write_s("ELSE")
+ | LexerKind._elsif:
+ write_s("ELSIF")
+ | LexerKind._while:
+ write_s("WHILE")
+ | LexerKind._do:
+ write_s("DO")
+ | LexerKind._proc:
+ write_s("PROC")
+ | LexerKind._begin:
+ write_s("BEGIN")
+ | LexerKind._end:
+ write_s("END")
+ | LexerKind._extern:
+ write_s("EXTERN")
+ | LexerKind._const:
+ write_s("CONST")
+ | LexerKind._var:
+ write_s("VAR")
+ | LexerKind._case:
+ write_s("CASE")
+ | LexerKind._of:
+ write_s("OF")
+ | LexerKind._type:
+ write_s("TYPE")
+ | LexerKind._record:
+ write_s("RECORD")
+ | LexerKind._union:
+ write_s("UNION")
+ | LexerKind.pipe:
+ write_s("|")
+ | LexerKind.to:
+ write_s("TO")
+ | LexerKind.boolean:
+ write_s("BOOLEAN<");
+ write_b(current_token^.value.boolean_value);
+ write_c('>')
+ | LexerKind.null:
+ write_s("NIL")
+ | LexerKind.and:
+ write_s("&")
+ | LexerKind._or:
+ write_s("OR")
+ | LexerKind.not:
+ write_s("~")
+ | LexerKind._return:
+ write_s("RETURN")
+ | LexerKind._cast:
+ write_s("CAST")
+ | LexerKind.shift_left:
+ write_s("<<")
+ | LexerKind.shift_right:
+ write_s(">>")
+ | LexerKind.identifier:
+ write_c('<');
+ write_s(current_token^.value.string);
+ write_c('>')
+ | LexerKind.trait:
+ write_c('#');
+ write_s(current_token^.value.string)
+ | LexerKind.left_paren:
+ write_s("(")
+ | LexerKind.right_paren:
+ write_s(")")
+ | LexerKind.left_square:
+ write_s("[")
+ | LexerKind.right_square:
+ write_s("]")
+ | LexerKind.greater_equal:
+ write_s(">=")
+ | LexerKind.less_equal:
+ write_s("<=")
+ | LexerKind.greater_than:
+ write_s(">")
+ | LexerKind.less_than:
+ write_s("<")
+ | LexerKind.equal:
+ write_s("=")
+ | LexerKind.not_equal:
+ write_s("<>")
+ | LexerKind.semicolon:
+ write_c(';')
+ | LexerKind.dot:
+ write_c('.')
+ | LexerKind.comma:
+ write_c(',')
+ | LexerKind.plus:
+ write_c('+')
+ | LexerKind.minus:
+ write_c('-')
+ | LexerKind.multiplication:
+ write_c('*')
+ | LexerKind.division:
+ write_c('/')
+ | LexerKind.remainder:
+ write_c('%')
+ | LexerKind.assignment:
+ write_s(":=")
+ | LexerKind.colon:
+ write_c(':')
+ | LexerKind.hat:
+ write_c('^')
+ | LexerKind.at:
+ write_c('@')
+ | LexerKind.comment:
+ write_s("(* COMMENT *)")
+ | LexerKind.integer:
+ write_c('<');
+ write_i(current_token^.value.int_value);
+ write_c('>')
+ | LexerKind.word:
+ write_c('<');
+ write_i(current_token^.value.int_value);
+ write_s("u>")
+ | LexerKind.character:
+ write_c('<');
+ write_i(cast(current_token^.value.char_value: Int));
+ write_s("c>")
+ | LexerKind.string:
+ write_s("\"...\"")
+ | LexerKind._defer:
+ write_s("DEFER")
+ | LexerKind.exclamation:
+ write_c('!')
+ | LexerKind.arrow:
+ write_s("->")
+ | LexerKind._program:
+ write_s("PROGRAM")
+ | LexerKind._module:
+ write_s("MODULE")
+ | LexerKind._import:
+ write_s("IMPORT")
+ else
+ write_s("UNKNOWN<");
+ write_i(cast(current_token^.kind: Int));
+ write_c('>')
+ end;
+ write_c(' ');
+
+ i := i + 1u
+ end;
+ write_c('\n')
+end;
+
+(*
+ Compilation entry.
+*)
+
+proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
+var
+ return_code: Int := 0;
+ lexer: Tokenizer;
+begin
+ if command_line^.lex or command_line^.parse then
+ lexer := lexer_text(source_code)
+ end;
+ if command_line^.parse then
+ parse(lexer.data, lexer.length)
+ end;
+
+ return return_code
+end;
+
+proc process(argc: Int, argv: ^^Char) -> Int;
+var
+ tokens: ^Token;
+ tokens_size: Word;
+ source_code: SourceCode;
+ command_line: ^CommandLine;
+ return_code: Int := 0;
+ source_file: ^SourceFile;
+begin
+ command_line := parse_command_line(argc, argv);
+ if command_line = nil then
+ return_code := 2
+ end;
+
+ if return_code = 0 then
+ source_file := read_source(command_line^.input);
+
+ if source_file = nil then
+ perror(command_line^.input);
+ return_code := 3
+ end
+ end;
+
+ if return_code = 0 then
+ defer
+ fclose(source_file^.handle)
+ end;
+
+ source_code.position := TextLocation(1u, 1u);
+ source_code.input := cast(source_file: Pointer);
+ source_code.empty := source_file_empty;
+ source_code.head := source_file_head;
+ source_code.advance := source_file_advance;
+
+ return_code := compile_in_stages(command_line, source_code)
+ end;
+ return return_code
+end;
+
+ return process(count, parameters)
+end.