diff options
| author | Eugen Wissner <belka@caraus.de> | 2026-02-15 04:10:38 +0100 |
|---|---|---|
| committer | Eugen Wissner <belka@caraus.de> | 2026-02-15 04:10:38 +0100 |
| commit | 5959fbb5524bbeb05a96eb15aba59e961a3efcb7 (patch) | |
| tree | 811be9bb8fba9bec6ae549c50f9cf92000b259c9 /source | |
| download | elna-5959fbb5524bbeb05a96eb15aba59e961a3efcb7.tar.gz | |
Initial commit
Diffstat (limited to 'source')
| -rw-r--r-- | source/Parser.elna | 1174 | ||||
| -rw-r--r-- | source/Transpiler.elna | 631 | ||||
| -rw-r--r-- | source/cctype.elna | 14 | ||||
| -rw-r--r-- | source/command_line_interface.elna | 93 | ||||
| -rw-r--r-- | source/common.elna | 72 | ||||
| -rw-r--r-- | source/cstdio.elna | 29 | ||||
| -rw-r--r-- | source/cstdlib.elna | 15 | ||||
| -rw-r--r-- | source/cstring.elna | 15 | ||||
| -rw-r--r-- | source/lexer.elna | 952 | ||||
| -rw-r--r-- | source/main.elna | 841 |
10 files changed, 3836 insertions, 0 deletions
diff --git a/source/Parser.elna b/source/Parser.elna new file mode 100644 index 0000000..1225750 --- /dev/null +++ b/source/Parser.elna @@ -0,0 +1,1174 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +import cstdlib, common, Lexer; + +type + Parser = record + lexer: ^Lexer + end; + + AstLiteralKind* = ( + integer, + string, + null, + boolean + ); + AstLiteral* = record + kind: AstLiteralKind; + value: union + integer: Int; + string: String; + boolean: Bool + end + end; + + AstUnaryOperator* = ( + reference, + not, + minus + ); + AstBinaryOperator* = ( + sum, + subtraction, + multiplication, + division, + remainder, + equals, + not_equals, + less, + greater, + less_equal, + greater_equal, + disjunction, + conjunction, + exclusive_disjunction, + shift_left, + shift_right + ); + + AstExpressionKind* = ( + literal, + identifier, + array_access, + dereference, + field_access, + unary, + binary, + call + ); + AstExpression* = record + kind: AstExpressionKind + value: union + literal: ^AstLiteral; + identifier: Identifier; + reference: ^AstExpression; + array_access: record + array: ^AstExpression; + index: ^AstExpression + end; + field_access: record + aggregate: ^AstExpression; + field: Identifier + end; + unary: record + operator: AstUnaryOperator; + operand: ^AstExpression + end; + binary: record + operator: AstBinaryOperator; + lhs: ^AstExpression; + rhs: ^AstExpression + end; + call: record + callable: ^AstExpression; + argument_count: Word; + arguments: ^^AstExpression + end + end + end; + + ConditionalStatement = record + condition: ^AstExpression; + branch: AstCompoundStatement + end; + + AstStatementKind* = ( + if_statement, + while_statement, + assignment_statement, + return_statement, + call_statement + ); + AstStatement* = record + kind: AstStatementKind + value: union + if_statement: ConditionalStatement; + while_statement: ConditionalStatement; + assignment_statement: record + assignee: ^AstExpression; + assignment: ^AstExpression + end; + return_statement: ^AstExpression; + call_statement: ^AstExpression + end + end; + AstCompoundStatement* = record + count: Word; + statements: ^^AstStatement + end; + + AstImportStatement* = record + package: Identifier; + symbols: ^Identifier + end; + + AstConstantDeclaration* = record + constant_name: Identifier; + constant_value: Int + end; + + AstFieldDeclaration* = record + field_name: Identifier; + field_type: ^AstTypeExpression + end; + + AstTypeExpressionKind* = ( + named_expression, + record_expression, + enumeration_expression, + array_expression, + pointer_expression, + procedure_expression + ); + AstTypeExpression* = record + kind: AstTypeExpressionKind; + value: union + name: Identifier; + cases: ^Identifier; + target: ^AstTypeExpression; + fields: ^AstFieldDeclaration; + array_expression: record + base: ^AstTypeExpression; + length: Word + end; + parameters: ^^AstTypeExpression + end + end; + + AstTypedDeclaration* = record + identifier: Identifier; + type_expression: ^AstTypeExpression + end; + + AstVariableDeclaration* = record + variable_name: Identifier; + variable_type: ^AstTypeExpression + end; + + AstProcedureDeclaration* = record + name: Identifier; + parameter_count: Word; + parameters: ^AstTypedDeclaration; + return_type: ^AstTypeExpression; + constants: ^^AstConstantDeclaration; + variables: ^^AstVariableDeclaration; + statements: AstCompoundStatement + end; + + AstModule* = record + main: Bool; + imports: ^^AstImportStatement; + constants: ^^AstConstantDeclaration; + types: ^^AstTypedDeclaration; + variables: ^^AstVariableDeclaration; + procedures: ^^AstProcedureDeclaration; + statements: AstCompoundStatement + end; + +(* Calls lexer_lex() but skips the comments. *) +proc parser_lex(lexer: ^Lexer) -> LexerToken; +var + result: LexerToken; +begin + result := lexer_lex(lexer); + + while result.kind = lexerKindComment do + result := lexer_lex(lexer) + end; + + return result +end; + +proc parse_type_fields(parser: ^Parser) -> ^AstFieldDeclaration; +var + token: LexerToken; + field_declarations: ^AstFieldDeclaration; + field_count: Word; + current_field: ^AstFieldDeclaration; +begin + field_declarations := malloc(#size(AstFieldDeclaration)); + token := parser_lex(parser^.lexer); + field_count := 0; + + while token.kind <> lexerKindEnd do + field_count := field_count + 2u; + + field_declarations := realloc(field_declarations, #size(AstFieldDeclaration) * field_count); + field_count := field_count - 1u; + current_field := field_declarations + (field_count - 1u); + + token := parser_lex(parser^.lexer); + + current_field^.field_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + current_field^.field_type := parse_type_expression(parser); + token := parser_lex(parser^.lexer); + + if token.kind = lexerKindSemicolon then + token := parser_lex(parser^.lexer) + end + end; + current_field := current_field + 1; + memset(current_field, 0, #size(AstFieldDeclaration)); + + return field_declarations +end; + +proc parse_record_type(parser: ^Parser) -> ^AstTypeExpression; +var + result: ^AstTypeExpression; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.record_expression; + result^.fields := parse_type_fields(parser); + + return result +end; + +proc parse_pointer_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.pointer_expression; + + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindPointer then + token := parser_lex(parser^.lexer) + end; + token := lexer_current(parser^.lexer); + result^.target := parse_type_expression(parser); + + return result +end; + +proc parse_array_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + buffer: [20]Char; + result: ^AstTypeExpression; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.array_expression; + result^.array_expression.length := 0u; + + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindArray then + token := parser_lex(parser^.lexer) + end; + if token.kind <> lexerKindOf then + token := parser_lex(parser^.lexer); + + result^.array_expression.length := token.integerKind; + + token := parser_lex(parser^.lexer) + end; + token := parser_lex(parser^.lexer); + result^.array_expression.base := parse_type_expression(parser); + + return result +end; + +proc parse_enumeration_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; + current_case: ^Identifier; + case_count: Word; +begin + NEW(result); + result^.kind := AstTypeExpressionKind.enumeration_expression; + + case_count := 1u; + result^.cases := malloc(#size(Identifier) * 2); + token := parser_lex(parser^.lexer); + current_case := result^.cases; + current_case^ := token.identifierKind; + + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindComma do + token := parser_lex(parser^.lexer); + + case_count := case_count + 2u; + + result^.cases := realloc(result^.cases, #size(Identifier) * case_count); + case_count := case_count - 1u; + current_case := result^.cases + (case_count - 1u); + current_case^ := token.identifierKind; + + token := parser_lex(parser^.lexer) + end; + current_case := current_case + 1; + memset(current_case, 0, #size(Identifier)); + + return result +end; + +proc parse_named_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; +begin + token := lexer_current(parser^.lexer); + NEW(result); + + result^.kind := AstTypeExpressionKind.named_expression; + result^.name := token.identifierKind; + + return result +end; + +proc parse_procedure_type(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; + current_parameter: ^^AstTypeExpression; + parameter_count: Word; +begin + parameter_count := 0u; + NEW(result); + result^.kind := AstTypeExpressionKind.procedure_expression; + + result^.parameters := malloc(#size(^AstTypeExpression)); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + while token.kind <> lexerKindRightParen do + parameter_count := parameter_count + 2u; + + result^.parameters := realloc(result^.parameters, #size(^AstTypeExpression) * parameter_count); + parameter_count := parameter_count - 1u; + current_parameter := result^.parameters + (parameter_count - 1u); + + current_parameter^ := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + if token.kind = lexerKindComma then + token := parser_lex(parser^.lexer) + end + end; + current_parameter := result^.parameters + parameter_count; + current_parameter^ := nil; + + return result +end; + +proc parse_type_expression(parser: ^Parser) -> ^AstTypeExpression; +var + token: LexerToken; + result: ^AstTypeExpression; +begin + result := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindRecord then + result := parse_record_type(parser) + end; + if token.kind = lexerKindLeftParen then + result := parse_enumeration_type(parser) + end; + if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then + result := parse_array_type(parser) + end; + if token.kind = lexerKindHat then + result := parse_pointer_type(parser) + end; + if token.kind = lexerKindProc then + result := parse_procedure_type(parser) + end; + if token.kind = lexerKindIdentifier then + result := parse_named_type(parser) + end; + return result +end; + +proc parse_type_declaration(parser: ^Parser) -> ^AstTypedDeclaration; +var + token: LexerToken; + result: ^AstTypedDeclaration; +begin + token := lexer_current(parser^.lexer); + + NEW(result); + result^.identifier := token.identifierKind; + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + result^.type_expression := parse_type_expression(parser); + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_type_part(parser: ^Parser) -> ^^AstTypedDeclaration; +var + token: LexerToken; + result: ^^AstTypedDeclaration; + current_declaration: ^^AstTypedDeclaration; + declaration_count: Word; +begin + token := lexer_current(parser^.lexer); + + result := malloc(#size(^AstTypedDeclaration)); + current_declaration := result; + declaration_count := 0u; + + if token.kind = lexerKindType then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + declaration_count := declaration_count + 1u; + + result := realloc(result, #size(^AstTypedDeclaration) * (declaration_count + 1)); + current_declaration := result + (declaration_count - 1u); + + current_declaration^ := parse_type_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0u then + current_declaration := current_declaration + 1 + end; + current_declaration^ := nil; + + return result +end; + +proc parse_variable_declaration(parser: ^Parser) -> ^AstVariableDeclaration; +var + token: LexerToken; + result: ^AstVariableDeclaration; +begin + NEW(result); + + token := lexer_current(parser^.lexer); + result^.variable_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + token := parser_lex(parser^.lexer); + result^.variable_type := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_variable_part(parser: ^Parser) -> ^^AstVariableDeclaration; +var + token: LexerToken; + result: ^^AstVariableDeclaration; + current_declaration: ^^AstVariableDeclaration; + declaration_count: Word; +begin + token := lexer_current(parser^.lexer); + + result := malloc(#size(^AstVariableDeclaration)); + current_declaration := result; + declaration_count := 0u; + + if token.kind = lexerKindVar then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + declaration_count := declaration_count + 1u; + + result := realloc(result, #size(^AstVariableDeclaration) * (declaration_count + 1)); + current_declaration := result + (declaration_count - 1u); + + current_declaration^ := parse_variable_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + current_declaration := current_declaration + 1 + end; + current_declaration^ := nil; + + return result +end; + +proc parse_constant_declaration(parser: ^Parser) -> ^AstConstantDeclaration; +var + token: LexerToken; + result: ^AstConstantDeclaration; +begin + NEW(result); + + token := lexer_current(parser^.lexer); + result^.constant_name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + token := parser_lex(parser^.lexer); + result^.constant_value := token.integerKind; + + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_constant_part(parser: ^Parser) -> ^^AstConstantDeclaration; +var + token: LexerToken; + result: ^^AstConstantDeclaration; + current_declaration: ^^AstConstantDeclaration; + declaration_count: Word; +begin + token := lexer_current(parser^.lexer); + + result := malloc(#size(^AstConstantDeclaration)); + current_declaration := result; + declaration_count := 0u; + + if token.kind = lexerKindConst then + token := parser_lex(parser^.lexer); + + while token.kind = lexerKindIdentifier do + declaration_count := declaration_count + 1u; + + result := realloc(result, #size(^AstConstantDeclaration) * (declaration_count + 1u)); + current_declaration := result + (declaration_count - 1u); + + current_declaration^ := parse_constant_declaration(parser); + token := parser_lex(parser^.lexer) + end + end; + if declaration_count <> 0 then + current_declaration := current_declaration + 1 + end; + current_declaration^ := nil; + + return result +end; + +proc parse_import_statement(parser: ^Parser) -> ^AstImportStatement; +var + result: ^AstImportStatement; + token: LexerToken; + symbol_count: Word; + current_symbol: ^Identifier; +begin + NEW(result); + symbol_count := 1u; + + token := parser_lex(parser^.lexer); + result^.package := token.identifierKind; + + token := parser_lex(parser^.lexer); + result^.symbols := malloc(#size(Identifier) * 2); + + current_symbol := result^.symbols; + + token := parser_lex(parser^.lexer); + current_symbol^ := token.identifierKind; + + token := parser_lex(parser^.lexer); + while token.kind <> lexerKindSemicolon do + token := parser_lex(parser^.lexer); + symbol_count := symbol_count + 1u; + + result^.symbols := realloc(result^.symbols, #size(Identifier) * (symbol_count + 1u)); + current_symbol := result^.symbols + (symbol_count - 1u); + + current_symbol^ := token.identifierKind; + token := parser_lex(parser^.lexer) + end; + current_symbol := current_symbol + 1; + memset(current_symbol, 0, #size(Identifier)); + + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse_import_part(parser: ^Parser) -> ^^AstImportStatement; +var + token: LexerToken; + import_statement: ^^AstImportStatement; + result: ^^AstImportStatement; + import_count: Word; +begin + token := lexer_current(parser^.lexer); + result := malloc(#size(^AstImportStatement)); + import_statement := result; + import_count := 0u; + + while token.kind = lexerKindFrom do + import_count := import_count + 1u; + + result := realloc(result, #size(^AstImportStatement) * (import_count + 1u)); + import_statement := result + (import_count - 1u); + + import_statement^ := parse_import_statement(parser); + token := lexer_current(parser^.lexer) + end; + if import_count > 0u then + import_statement := import_count + 1 + end; + import_statement^ := nil; + + return result +end; + +proc parse_literal(parser: ^Parser) -> ^AstLiteral; +var + literal: ^AstLiteral; + token: LexerToken; +begin + literal := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindInteger then + NEW(literal); + + literal^.kind := AstLiteralKind.integer; + literal^.integer := token.integerKind + end; + if (token.kind = lexerKindCharacter) or (token.kind = lexerKindString) then + NEW(literal); + + literal^.kind := AstLiteralKind.string; + literal^.string := token.stringKind + end; + if token.kind = lexerKindNull then + NEW(literal); + + literal^.kind := AstLiteralKind.null + end; + if token.kind = lexerKindBoolean then + NEW(literal); + + literal^.kind := AstLiteralKind.boolean; + literal^.boolean := token.booleanKind + end; + if literal <> nil then + token := parser_lex(parser^.lexer) + end; + + return literal +end; + +proc parse_factor(parser: ^Parser) -> ^AstExpression; +var + next_token: LexerToken; + result: ^AstExpression; + literal: ^AstLiteral; +begin + result := nil; + next_token := lexer_current(parser^.lexer); + + literal := parse_literal(parser); + + if (result = nil) & (literal <> nil) then + NEW(result); + + result^.kind := AstExpressionKind.literal; + result^.literal := literal + end; + if (result = nil) & (next_token.kind = lexerKindMinus) then + NEW(result); + next_token := parser_lex(parser^.lexer); + + result^.kind := AstExpressionKind.unary; + result^.unary.operator := AstUnaryOperator.minus; + result^.unary.operand := parse_factor(parser) + end; + if (result = nil) & (next_token.kind = lexerKindTilde) then + NEW(result); + next_token := parser_lex(parser^.lexer); + + result^.kind := AstExpressionKind.unary; + result^.unary.operator := AstUnaryOperator.not; + result^.unary.operand := parse_factor(parser) + end; + if (result = nil) & (next_token.kind = lexerKindLeftParen) then + next_token := parser_lex(parser^.lexer); + result := parse_expression(parser); + if result <> nil then + next_token := parser_lex(parser^.lexer) + end + end; + if (result = nil) & (next_token.kind = lexerKindIdentifier) then + NEW(result); + + result^.kind := AstExpressionKind.identifier; + result^.identifier := next_token.identifierKind; + + next_token := parser_lex(parser^.lexer) + end; + + return result +end; + +proc parse_designator(parser: ^Parser) -> ^AstExpression; +var + next_token: LexerToken; + inner_expression: ^AstExpression; + designator: ^AstExpression; + arguments: ^^AstExpression; + handled: Bool; +begin + designator := parse_factor(parser); + handled := designator <> nil; + next_token := lexer_current(parser^.lexer); + + while handled do + inner_expression := designator; + handled := false; + + if ~handled & (next_token.kind = lexerKindHat) then + NEW(designator); + + designator^.kind := AstExpressionKind.dereference; + designator^.reference := inner_expression; + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindLeftSquare) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := AstExpressionKind.array_access; + designator^.array_access.array := inner_expression; + designator^.array_access.index := parse_expression(parser); + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindDot) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := AstExpressionKind.field_access; + designator^.field_access.aggregate := inner_expression; + designator^.field_access.field := next_token.identifierKind; + + next_token := parser_lex(parser^.lexer); + handled := true + end; + if ~handled & (next_token.kind = lexerKindLeftParen) then + NEW(designator); + next_token := parser_lex(parser^.lexer); + + designator^.kind := AstExpressionKind.call; + designator^.call.callable := inner_expression; + designator^.call.argument_count := 0; + designator^.call.arguments := nil; + + if next_token.kind <> lexerKindRightParen then + designator^.arguments := malloc(#size(^AstExpression)); + designator^.argument_count := 1; + designator^.arguments^ := parse_expression(parser); + + next_token := lexer_current(parser^.lexer); + + while next_token.kind = lexerKindComma do + next_token := parser_lex(parser^.lexer); + + designator^.argument_count := designator^.argument_count + 1; + designator^.arguments := realloc(designator^.arguments, #size(^AstExpression) * designator^.argument_count); + arguments := designator^.arguments + (designator^.argument_count - 1u); + arguments^ := parse_expression(parser); + + next_token := lexer_current(parser^.lexer) + end + end; + + next_token := parser_lex(parser^.lexer); + handled := true + end + end; + + return designator +end; + +proc parse_binary_expression(parser: ^Parser, left: ^AstExpression, operator: AstBinaryOperator) -> ^AstExpression; +var + next_token: LexerToken; + result: ^AstExpression; + right: ^AstExpression; +begin + next_token := parser_lex(parser^.lexer); + right := parse_designator(parser); + result := nil; + + if right <> nil then + NEW(result); + result^.kind := AstExpressionKind.binary; + result^.binary.operator := operator; + result^.binary.lhs := left; + result^.binary.rhs := right + end; + + return result +end; + +proc parse_expression(parser: ^Parser) -> ^AstExpression; +var + next_token: LexerToken; + left: ^AstExpression; + result: ^AstExpression; + written_bytes: Word; +begin + left := parse_designator(parser); + result := nil; + next_token := lexer_current(parser^.lexer); + + if left <> nil then + if (result = nil) & (next_token.kind = lexerKindNotEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.not_equals) + end; + if (result = nil) & (next_token.kind = lexerKindEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.equals) + end; + if (result = nil) & (next_token.kind = lexerKindGreaterThan) then + result := parse_binary_expression(parser, left, AstBinaryOperator.greater) + end; + if (result = nil) & (next_token.kind = lexerKindLessThan) then + result := parse_binary_expression(parser, left, AstBinaryOperator.less) + end; + if (result = nil) & (next_token.kind = lexerKindGreaterEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.greater_equal) + end; + if (result = nil) & (next_token.kind = lexerKindLessEqual) then + result := parse_binary_expression(parser, left, AstBinaryOperator.less_equal) + end; + if (result = nil) & (next_token.kind = lexerKindAnd) then + result := parse_binary_expression(parser, left, AstBinaryOperator.conjunction) + end; + if (result = nil) & (next_token.kind = lexerKindOr) then + result := parse_binary_expression(parser, left, AstBinaryOperator.disjunction) + end; + if (result = nil) & (next_token.kind = lexerKindMinus) then + result := parse_binary_expression(parser, left, AstBinaryOperator.subtraction) + end; + if (result = nil) & (next_token.kind = lexerKindPlus) then + result := parse_binary_expression(parser, left, AstBinaryOperator.sum) + end; + if (result = nil) & (next_token.kind = lexerKindAsterisk) then + result := parse_binary_expression(parser, left, AstBinaryOperator.multiplication) + end + end; + if (result = nil) & (left <> nil) then + result := left + end; + + return result +end; + +proc parse_return_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.return_statement; + + token := parser_lex(parser^.lexer); + result^.return_statement := parse_expression(parser); + + return result +end; + +proc parse_assignment_statement(parser: ^Parser, assignee: ^AstExpression) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.assignment_statement; + result^.assignment_statement.assignee := assignee; + + token := parser_lex(parser^.lexer); + result^.assignment_statement.assignment := parse_expression(parser); + + return result +end; + +proc parse_call_statement(parser: ^Parser, call: ^AstExpression) -> ^AstStatement; +var + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.call_statement; + result^.call_statement := call; + + return result +end; + +proc parse_compound_statement(parser: ^Parser) -> AstCompoundStatement; +var + result: AstCompoundStatement; + token: LexerToken; + current_statement: ^^AstStatement; + old_count: Word; +begin + result.count := 0u; + result.statements := nil; + + token := lexer_current(parser^.lexer); + + while token.kind <> lexerKindEnd do + old_count := result.count; + result.count := result.count + 1u; + + result.statements := realloc(result.statements, #size(^AstStatement) * result.count); + current_statement := result.statements + old_count; + + current_statement^ := parse_statement(parser); + + token := lexer_current(parser^.lexer) + end; + + return result +end; + +proc parse_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + statement: ^AstStatement; + designator: ^AstExpression; +begin + statement := nil; + token := parser_lex(parser^.lexer); + + if token.kind = lexerKindIf then + statement := parse_if_statement(parser) + end; + if token.kind = lexerKindWhile then + statement := parse_while_statement(parser) + end; + if token.kind = lexerKindReturn then + statement := parse_return_statement(parser) + end; + if token.kind = lexerKindIdentifier then + designator := parse_designator(parser); + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindAssignment then + statement := parse_assignment_statement(parser, designator) + end; + if token.kind <> lexerKindAssignment then + statement := parse_call_statement(parser, designator) + end + end; + return statement +end; + +proc parse_if_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.if_statement; + + token := parser_lex(parser^.lexer); + result^.if_statement.condition := parse_expression(parser); + result^.if_statement.branch := parse_compound_statement(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_while_statement(parser: ^Parser) -> ^AstStatement; +var + token: LexerToken; + result: ^AstStatement; +begin + NEW(result); + result^.kind := AstStatementKind.while_statement; + + token := parser_lex(parser^.lexer); + result^.while_statement.condition := parse_expression(parser); + result^.while_statement.body := parse_compound_statement(parser); + + token := parser_lex(parser^.lexer); + return result +end; + +proc parse_statement_part(parser: ^Parser) -> AstCompoundStatement; +var + token: LexerToken; + compound: AstCompoundStatement; +begin + compound.count := 0; + compound.statements := nil; + token := lexer_current(parser^.lexer); + + if token.kind = lexerKindBegin then + compound := parse_compound_statement(parser) + end; + + return compound +end; + +proc parse_procedure_heading(parser: ^Parser) -> ^AstProcedureDeclaration; +var + token: LexerToken; + declaration: ^AstProcedureDeclaration; + parameter_index: Word; + current_parameter: ^AstTypedDeclaration; +begin + NEW(declaration); + + token := parser_lex(parser^.lexer); + declaration^.name := token.identifierKind; + + token := parser_lex(parser^.lexer); + + declaration^.parameters := nil; + declaration^.parameter_count := 0u; + + token := parser_lex(parser^.lexer); + while token.kind <> lexerKindRightParen do + parameter_index := declaration^.parameter_count; + declaration^.parameter_count := declaration^.parameter_count + 1; + declaration^.parameters := realloc(declaration^.parameters, #size(AstTypedDeclaration) * declaration^.parameter_count); + + current_parameter := declaration^.parameters + parameter_index; + + current_parameter^.identifier := token.identifierKind; + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + current_parameter^.type_expression := parse_type_expression(parser); + + token := parser_lex(parser^.lexer); + if token.kind = lexerKindComma then + token := parser_lex(parser^.lexer) + end + end; + token := parser_lex(parser^.lexer); + declaration^.return_type := nil; + + (* Check for the return type and write it. *) + if token.kind = lexerKindArrow then + token := parser_lex(parser^.lexer); + declaration^.return_type := parse_type_expression(parser); + token := parser_lex(parser^.lexer) + end; + token := parser_lex(parser^.lexer); + + return declaration +end; + +proc parse_procedure_declaration(parser: ^Parser) -> ^AstProcedureDeclaration; +var + token: LexerToken; + declaration: ^AstProcedureDeclaration; +begin + declaration := parse_procedure_heading(parser); + + declaration^.constants := parse_constant_part(parser); + declaration^.variables := parse_variable_part(parser); + declaration^.statements := parse_statement_part(parser); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + return declaration +end; + +proc parse_procedure_part(parser: ^Parser) -> ^^AstProcedureDeclaration; +var + token: LexerToken; + current_declaration: ^^AstProcedureDeclaration; + result: ^^AstProcedureDeclaration; + declaration_count: Word; + declaration_index: Word; +begin + token := lexer_current(parser^.lexer); + declaration_count := 0u; + declaration_index := 0u; + + result := malloc(#size(^AstProcedureDeclaration)); + + while token.kind = lexerKindProc do + declaration_count := declaration_count + 1u; + result := realloc(result, #size(^AstProcedureDeclaration) * (declaration_count + 1)); + current_declaration := result + declaration_index; + + current_declaration^ := parse_procedure_declaration(parser); + token := lexer_current(parser^.lexer); + declaration_index := declaration_count + end; + current_declaration := result + declaration_index; + current_declaration^ := nil; + + return result +end; + +proc parse_module(parser: ^Parser) -> ^AstModule; +var + token: LexerToken; + result: ^AstModule; +begin + NEW(result); + token := parser_lex(parser^.lexer); + result^.main := true; + + if token.kind = lexerKindModule then + result^.main := false + end; + token := parser_lex(parser^.lexer); + + (* Write the module body. *) + token := parser_lex(parser^.lexer); + + result^.imports := parse_import_part(parser); + result^.constants := parse_constant_part(parser); + result^.types := parse_type_part(parser); + + result^.variables := parse_variable_part(parser); + result^.procedures := parse_procedure_part(parser); + result^.statements := parse_statement_part(parser); + + token := parser_lex(parser^.lexer); + token := parser_lex(parser^.lexer); + + return result +end; + +proc parse*(lexer: ^Lexer) -> ^AstModule; +var + parser: Parser; +begin + parser.lexer := lexer; + + return parse_module(@parser) +end; + +end. diff --git a/source/Transpiler.elna b/source/Transpiler.elna new file mode 100644 index 0000000..5a65036 --- /dev/null +++ b/source/Transpiler.elna @@ -0,0 +1,631 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +from FIO import File, WriteNBytes, WriteLine, WriteChar, WriteString; +from NumberIO import IntToStr; + +import common, Parser; + +type + TranspilerContext* = record + input_name: String; + output: File; + definition: File; + indentation: Word + end; + +proc indent(context: ^TranspilerContext); +var + count: Word; +begin + count := 0; + + while count < context^.indentation do + WriteString(context^.output, " "); + count := count + 1u + end +end; + +(* Write a semicolon followed by a newline. *) +proc write_semicolon(output: File); +begin + WriteChar(output, ';'); + WriteLine(output) +end; + +proc transpile_import_statement(context: ^TranspilerContext, import_statement: ^AstImportStatement); +var + current_symbol: ^Identifier; +begin + WriteString(context^.output, "FROM "); + transpile_identifier(context, import_statement^.package); + + WriteString(context^.output, " IMPORT "); + + current_symbol := import_statement^.symbols; + transpile_identifier(context, current_symbol^); + current_symbol := current_symbol + 1; + + while current_symbol^[1] <> '\0' do + WriteString(context^.output, ", "); + transpile_identifier(context, current_symbol^); + current_symbol := current_symbol + 1; + end; + write_semicolon(context^.output) +end; + +proc transpile_import_part(context: ^TranspilerContext, imports: ^^AstImportStatement); +var + import_statement: ^AstImportStatement; +begin + while imports^ <> nil do + transpile_import_statement(context, imports^); + imports := imports + 1 + end; + WriteLine(context^.output) +end; + +proc transpile_constant_declaration(context: ^TranspilerContext, declaration: ^AstConstantDeclaration); +var + buffer: [20]Char; +begin + WriteString(context^.output, " "); + transpile_identifier(context, declaration^.constant_name); + + WriteString(context^.output, " = "); + + IntToStr(declaration^.constant_value, 0, buffer); + WriteString(context^.output, buffer); + + write_semicolon(context^.output) +end; + +proc transpile_constant_part(context: ^TranspilerContext, declarations: ^^AstConstantDeclaration, extra_newline: Bool); +var + current_declaration: ^^AstConstantDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, "CONST"); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_constant_declaration(context, current_declaration^); + + current_declaration := current_declaration + 1 + end; + if extra_newline then + WriteLine(context^.output) + end + end +end; + +proc transpile_module(context: ^TranspilerContext, result: ^AstModule); +begin + if result^.main = false then + WriteString(context^.output, "IMPLEMENTATION ") + end; + WriteString(context^.output, "MODULE "); + + (* Write the module name and end the line with a semicolon and newline. *) + transpile_module_name(context); + + write_semicolon(context^.output); + WriteLine(context^.output); + + (* Write the module body. *) + + transpile_import_part(context, result^.imports); + transpile_constant_part(context, result^.constants, true); + transpile_type_part(context, result^.types); + transpile_variable_part(context, result^.variables, true); + transpile_procedure_part(context, result^.procedures); + transpile_statement_part(context, result^.statements); + + WriteString(context^.output, "END "); + transpile_module_name(context); + + WriteChar(context^.output, "."); + WriteLine(context^.output) +end; + +proc transpile_type_fields(context: ^TranspilerContext, fields: ^AstFieldDeclaration); +var + current_field: ^AstFieldDeclaration; +begin + current_field := fields; + + while current_field^.field_name[1] <> '\0' do + WriteString(context^.output, " "); + transpile_identifier(context, current_field^.field_name); + + WriteString(context^.output, ": "); + transpile_type_expression(context, current_field^.field_type); + + current_field := current_field + 1; + + if current_field^.field_name[1] <> '\0' then + WriteChar(context^.output, ';') + end; + WriteLine(context^.output) + end +end; + +proc transpile_record_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +begin + WriteString(context^.output, "RECORD"); + WriteLine(context^.output); + transpile_type_fields(context, type_expression^.fields); + WriteString(context^.output, " END") +end; + +proc transpile_pointer_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +begin + WriteString(context^.output, "POINTER TO "); + + transpile_type_expression(context, type_expression^.target) +end; + +proc transpile_array_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +var + buffer: [20]Char; +begin + WriteString(context^.output, "ARRAY"); + + if type_expression^.length <> 0 then + WriteString(context^.output, "[1.."); + + IntToStr(type_expression^.length, 0, buffer); + WriteString(context^.output, buffer); + + WriteChar(context^.output, ']') + end; + WriteString(context^.output, " OF "); + + transpile_type_expression(context, type_expression^.base) +end; + +proc transpile_enumeration_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +var + current_case: ^Identifier; +begin + current_case := type_expression^.cases; + + WriteString(context^.output, "("); + WriteLine(context^.output); + WriteString(context^.output, " "); + transpile_identifier(context, current_case^); + current_case := current_case + 1; + + while current_case^[1] <> '\0' do + WriteChar(context^.output, ','); + WriteLine(context^.output); + WriteString(context^.output, " "); + transpile_identifier(context, current_case^); + + current_case := current_case + 1 + end; + WriteLine(context^.output); + WriteString(context^.output, " )") +end; + +proc transpile_identifier(context: ^TranspilerContext, identifier: Identifier); +var + written_bytes: Word; +begin + written_bytes := WriteNBytes(context^.output, cast(identifier[1]: Word), @identifier[2]) +end; + +proc transpile_procedure_type(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +var + result: ^AstTypeExpression; + current_parameter: ^^AstTypeExpression; + parameter_count: Word; +begin + WriteString(context^.output, "PROCEDURE("); + current_parameter := type_expression^.parameters; + + while current_parameter^ <> nil do + transpile_type_expression(context, current_parameter^); + + current_parameter := current_parameter + 1; + + if current_parameter^ <> nil then + WriteString(context^.output, ", ") + end + end; + WriteChar(context^.output, ')') +end; + +proc transpile_type_expression(context: ^TranspilerContext, type_expression: ^AstTypeExpression); +begin + if type_expression^.kind = astTypeExpressionKindRecord then + transpile_record_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindEnumeration then + transpile_enumeration_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindArray then + transpile_array_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindPointer then + transpile_pointer_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindProcedure then + transpile_procedure_type(context, type_expression) + end; + if type_expression^.kind = astTypeExpressionKindNamed then + transpile_identifier(context, type_expression^.name) + end +end; + +proc transpile_type_declaration(context: ^TranspilerContext, declaration: ^AstTypedDeclaration); +var + written_bytes: Word; +begin + WriteString(context^.output, " "); + + transpile_identifier(context^.output, declaration^.identifier); + WriteString(context^.output, " = "); + + transpile_type_expression(context, declaration^.type_expression); + write_semicolon(context^.output) +end; + +proc transpile_type_part(context: ^TranspilerContext, declarations: ^^AstTypedDeclaration); +var + current_declaration: ^^AstTypedDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, "TYPE"); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_type_declaration(context, current_declaration^); + + current_declaration := current_declaration + 1 + end; + WriteLine(context^.output) + end +end; + +proc transpile_variable_declaration(context: ^TranspilerContext, declaration: ^AstVariableDeclaration); +begin + WriteString(context^.output, " "); + transpile_identifier(context, declaration^.variable_name); + + WriteString(context^.output, ": "); + + transpile_type_expression(context, declaration^.variable_type); + write_semicolon(context^.output) +end; + +proc transpile_variable_part(context: ^TranspilerContext, declarations: ^^AstVariableDeclaration, extra_newline: Bool); +var + current_declaration: ^^AstVariableDeclaration; +begin + if declarations^ <> nil then + WriteString(context^.output, "VAR"); + WriteLine(context^.output); + + current_declaration := declarations; + while current_declaration^ <> nil do + transpile_variable_declaration(context, current_declaration^); + + current_declaration := current_declaration + 1 + end; + if extra_newline then + WriteLine(context^.output) + end + end +end; + +proc transpile_procedure_heading(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration); +var + parameter_index: Word; + current_parameter: ^AstTypedDeclaration; +begin + WriteString(context^.output, "PROCEDURE "); + transpile_identifier(context, declaration^.name); + WriteChar(context^.output, '('); + + parameter_index := 0; + current_parameter := declaration^.parameters; + + while parameter_index < declaration^.parameter_count do + transpile_identifier(context, current_parameter^.identifier); + WriteString(context^.output, ": "); + transpile_type_expression(context, current_parameter^.type_expression); + + parameter_index := parameter_index + 1u; + current_parameter := current_parameter + 1; + + if parameter_index <> declaration^.parameter_count then + WriteString(context^.output, "; ") + end + end; + + WriteString(context^.output, ")"); + + (* Check for the return type and write it. *) + if declaration^.return_type <> nil then + WriteString(context^.output, ": "); + transpile_type_expression(context, declaration^.return_type) + end; + write_semicolon(context^.output) +end; + +proc transpile_unary_operator(context: ^TranspilerContext, operator: AstUnaryOperator); +begin + if operator = AstUnaryOperator.minus then + WriteChar(context^.output, '-') + end; + if operator = AstUnaryOperator.not then + WriteChar(context^.output, '~') + end +end; + +proc transpile_binary_operator(context: ^TranspilerContext, operator: AstBinaryOperator); +begin + case operator of + AstBinaryOperator.sum: WriteChar(context^.output, '+') + | AstBinaryOperator.subtraction: WriteChar(context^.output, '-') + | AstBinaryOperator.multiplication: WriteChar(context^.output, '*') + | AstBinaryOperator.equals: WriteChar(context^.output, '=') + | AstBinaryOperator.not_equals: WriteChar(context^.output, '#') + | AstBinaryOperator.less: WriteChar(context^.output, '<') + | AstBinaryOperator.greater: WriteChar(context^.output, '>') + | AstBinaryOperator.less_equal: WriteString(context^.output, "<=") + | AstBinaryOperator.greater_equal: WriteString(context^.output, ">=") + | AstBinaryOperator.disjunction: WriteString(context^.output, "OR") + | AstBinaryOperatorConjunction: WriteString(context^.output, "AND") + end +end; + +proc transpile_expression(context: ^TranspilerContext, expression: ^AstExpression); +var + literal: ^AstLiteral; + buffer: [20]Char; + argument_index: Word; + current_argument: ^^AstExpression; +begin + if expression^.kind = astExpressionKindLiteral then + literal := expression^.literal; + + if literal^.kind = AstLiteralKind.integer then + IntToStr(literal^.integer, 0, buffer); + WriteString(context^.output, buffer) + end; + if literal^.kind = AstLiteralKind.string then + WriteString(context^.output, literal^.string) + end; + if literal^.kind = AstLiteralKind.null then + WriteString(context^.output, "NIL") + end; + if (literal^.kind = AstLiteralKind.boolean) & literal^.boolean then + WriteString(context^.output, "TRUE") + end; + if (literal^.kind = AstLiteralKind.boolean) & (literal^.boolean = false) then + WriteString(context^.output, "FALSE") + end + end; + if expression^.kind = astExpressionKindIdentifier then + transpile_identifier(context, expression^.identifier) + end; + if expression^.kind = astExpressionKindDereference then + transpile_expression(context, expression^.reference); + WriteChar(context^.output, '^') + end; + if expression^.kind = astExpressionKindArrayAccess then + transpile_expression(context, expression^.array); + WriteChar(context^.output, '['); + transpile_expression(context, expression^.index); + WriteChar(context^.output, ']') + end; + if expression^.kind = astExpressionKindFieldAccess then + transpile_expression(context, expression^.aggregate); + WriteChar(context^.output, '.'); + transpile_identifier(contextexpression^.field) + end; + if expression^.kind = astExpressionKindUnary then + transpile_unary_operator(context, expression^.unary_operator); + transpile_expression(context, expression^.unary_operand) + end; + if expression^.kind = astExpressionKindBinary then + WriteChar(context^.output, '('); + transpile_expression(context, expression^.lhs); + WriteChar(context^.output, ' '); + transpile_binary_operator(context, expression^.binary_operator); + WriteChar(context^.output, ' '); + transpile_expression(context, expression^.rhs); + WriteChar(context^.output, ')') + end; + if expression^.kind = astExpressionKindCall then + transpile_expression(context, expression^.callable); + WriteChar(context^.output, '('); + + current_argument := expression^.arguments; + if expression^.argument_count > 0 then + transpile_expression(context, current_argument^); + + argument_index := 1u; + current_argument := current_argument + 1; + + while argument_index < expression^.argument_count do + WriteString(context^.output, ", "); + + transpile_expression(context, current_argument^); + + current_argument := current_argument + 1; + argument_index := argument_index + 1u + end + end; + WriteChar(context^.output, ')') + end +end; + +proc transpile_if_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + WriteString(context^.output, "IF "); + transpile_expression(context, statement^.if_condition); + + WriteString(context^.output, " THEN"); + WriteLine(context^.output); + context^.indentation := context^.indentation + 1u; + + transpile_compound_statement(context, statement^.if_branch); + context^.indentation := context^.indentation - 1u; + indent(context); + WriteString(context^.output, "END") +end; + +proc transpile_while_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + WriteString(context^.output, "WHILE "); + transpile_expression(context, statement^.while_condition); + + WriteString(context^.output, " DO"); + WriteLine(context^.output); + context^.indentation := context^.indentation + 1u; + + transpile_compound_statement(context, statement^.while_body); + context^.indentation := context^.indentation - 1u; + indent(context); + WriteString(context^.output, "END") +end; + +proc transpile_assignment_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + transpile_expression(context, statement^.assignee); + WriteString(context^.output, " := "); + transpile_expression(context, statement^.assignment) +end; + +proc transpile_return_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + WriteString(context^.output, "RETURN "); + + transpile_expression(context, statement^.returned) +end; + +proc transpile_compound_statement(context: ^TranspilerContext, statement: AstCompoundStatement); +var + current_statement: ^^AstStatement; + index: Word; +begin + index := 0; + current_statement := statement.statements; + + while index < statement.count do + transpile_statement(context, current_statement^); + + current_statement := current_statement + 1; + index := index + 1u; + + if index <> statement.count then + WriteChar(context^.output, ';') + end; + WriteLine(context^.output) + end +end; + +proc transpile_statement(context: ^TranspilerContext, statement: ^AstStatement); +begin + indent(context); + + if statement^.kind = astStatementKindIf then + transpile_if_statement(context, statement) + end; + if statement^.kind = astStatementKindWhile then + transpile_while_statement(context, statement) + end; + if statement^.kind = astStatementKindReturn then + transpile_return_statement(context, statement) + end; + if statement^.kind = astStatementKindAssignment then + transpile_assignment_statement(context, statement) + end; + if statement^.kind = astStatementKindCall then + transpile_expression(context, statement^.call) + end +end; + +proc transpile_statement_part(context: ^TranspilerContext, compound: AstCompoundStatement); +begin + if compound.count > 0 then + WriteString(context^.output, "BEGIN"); + WriteLine(context^.output); + + context^.indentation := context^.indentation + 1u; + transpile_compound_statement(context, compound); + context^.indentation := context^.indentation - 1u; + end +end; + +proc transpile_procedure_declaration(context: ^TranspilerContext, declaration: ^AstProcedureDeclaration); +begin + transpile_procedure_heading(context, declaration); + + transpile_constant_part(context, declaration^.constants, false); + transpile_variable_part(context, declaration^.variables, false); + transpile_statement_part(context, declaration^.statements); + + WriteString(context^.output, "END "); + transpile_identifier(context^.output, declaration^.name); + + write_semicolon(context^.output) +end; + +proc transpile_procedure_part(context: ^TranspilerContext, declaration: ^^AstProcedureDeclaration); +begin + while declaration^ <> nil do + transpile_procedure_declaration(context, declaration^); + WriteLine(context^.output); + + declaration := declaration + 1 + end +end; + +proc transpile_module_name(context: ^TranspilerContext); +var + counter: Word; + last_slash: Word; +begin + counter := 1u; + last_slash := 0u; + + while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do + if context^.input_name[counter] = '/' then + last_slash := counter + end; + counter := counter + 1u + end; + + if last_slash = 0u then + counter := 1u + end; + if last_slash <> 0u then + counter := last_slash + 1u + end; + while context^.input_name[counter] <> '.' & context^.input_name[counter] <> '\0' do + WriteChar(context^.output, context^.input_name[counter]); + counter := counter + 1u + end +end; + +proc transpile*(ast_module: ^AstModule, output: File, definition: File, input_name: String); +var + context: TranspilerContext; +begin + context.input_name := input_name; + context.output := output; + context.definition := definition; + context.indentation := 0u; + + transpile_module(@context, ast_module) +end; + +end. diff --git a/source/cctype.elna b/source/cctype.elna new file mode 100644 index 0000000..3906cd1 --- /dev/null +++ b/source/cctype.elna @@ -0,0 +1,14 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +proc isdigit*(c: Int ) -> Int; extern; +proc isalnum*(c: Int) -> Int; extern; +proc isalpha*(c: Int) -> Int; extern; +proc isspace*(c: Int) -> Int; extern; + +proc tolower*(c: Int) -> Int; extern; +proc toupper*(c: Int) -> Int; extern; + +end. diff --git a/source/command_line_interface.elna b/source/command_line_interface.elna new file mode 100644 index 0000000..040fdeb --- /dev/null +++ b/source/command_line_interface.elna @@ -0,0 +1,93 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) + +(* + Command line handling. +*) +module; + +import cstdlib, cstring, common; + +type + CommandLine* = record + input: ^Char; + output: ^Char; + lex: Bool; + parse: Bool + end; + +proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine; +var + parameter: ^Char; + i: Int; + result: ^CommandLine; + parsed: Bool; +begin + i := 1; + result := cast(malloc(#size(CommandLine)): ^CommandLine); + result^.lex := false; + result^.parse := false; + result^.input := nil; + result^.output := nil; + + while i < argc & result <> nil do + parameter := (argv + i)^; + parsed := false; + + if strcmp(parameter, "--lex\0".ptr) = 0 then + parsed := true; + result^.lex := true + end; + if strcmp(parameter, "--parse\0".ptr) = 0 then + parsed := true; + result^.parse := true + end; + if strcmp(parameter, "-o\0".ptr) = 0 then + i := i + 1; + + if i = argc then + write_s("Fatal error: expecting a file name following -o."); + result := nil + end; + if i < argc then + parameter := (argv + i)^; + result^.output := parameter + end; + parsed := true + end; + if (parameter^ <> '-') & ~parsed then + parsed := true; + + if result^.input <> nil then + write_s("Fatal error: only one source file can be compiled at once. First given \""); + write_z(result^.input); + write_s("\", then \""); + write_z(parameter); + write_s("\".\n"); + result := nil + end; + if result <> nil then + result^.input := parameter + end + end; + if ~parsed then + write_s("Fatal error: unknown command line options: "); + + write_z(parameter); + write_s(".\n"); + + result := nil + end; + + i := i + 1 + end; + if result <> nil & result^.input = nil then + write_s("Fatal error: no input files.\n"); + result := nil + end; + + return result +end; + +end. diff --git a/source/common.elna b/source/common.elna new file mode 100644 index 0000000..e7b30ca --- /dev/null +++ b/source/common.elna @@ -0,0 +1,72 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +import cstring, cstdio; + +type + Identifier = [256]Char; + TextLocation* = record + line: Word; + column: Word + end; + +proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern; + +proc write_s*(value: String); +begin + (* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *) + write(1, cast(value.ptr: Pointer), cast(value.length: Int)) +end; + +proc write_z*(value: ^Char); +begin + write(1, cast(value: Pointer), cast(strlen(value): Int)) +end; + +proc write_b*(value: Bool); +begin + if value then + write_s("true") + else + write_s("false") + end +end; + +proc write_c*(value: Char); +begin + putchar(cast(value: Int)); + fflush(nil) +end; + +proc write_i*(value: Int); +var + digit: Int; + n: Word; + buffer: [10]Char; +begin + n := 10u; + + if value = 0 then + write_c('0') + end; + while value <> 0 do + digit := value % 10; + value := value / 10; + + buffer[n] := cast(cast('0': Int) + digit: Char); + n := n - 1u + end; + while n < 10u do + n := n + 1u; + write_c(buffer[n]) + end +end; + +proc write_u*(value: Word); +begin + write_i(cast(value: Int)) +end; + +end. diff --git a/source/cstdio.elna b/source/cstdio.elna new file mode 100644 index 0000000..c7507ff --- /dev/null +++ b/source/cstdio.elna @@ -0,0 +1,29 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +type + FILE* = record end; + +var + stdin*: ^FILE := extern; + stdout*: ^FILE := extern; + stderr*: ^FILE := extern; + +proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern; +proc fclose*(stream: ^FILE) -> Int; extern; +proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern; +proc rewind*(stream: ^FILE); extern; +proc ftell*(stream: ^FILE) -> Int; extern; +proc fflush*(stream: ^FILE) -> Int; extern; + +proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern; +proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern; + +proc perror(s: ^Char); extern; + +proc puts(s: ^Char) -> Int; extern; +proc putchar(c: Int) -> Int; extern; + +end. diff --git a/source/cstdlib.elna b/source/cstdlib.elna new file mode 100644 index 0000000..da2029c --- /dev/null +++ b/source/cstdlib.elna @@ -0,0 +1,15 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +proc malloc(size: Word) -> Pointer; extern; +proc free(ptr: Pointer); extern; +proc calloc(nmemb: Word, size: Word) -> Pointer; extern; +proc realloc(ptr: Pointer, size: Word) -> Pointer; extern; + +proc atoi(str: ^Char) -> Int; extern; + +proc exit(code: Int) -> !; extern; + +end. diff --git a/source/cstring.elna b/source/cstring.elna new file mode 100644 index 0000000..24d852a --- /dev/null +++ b/source/cstring.elna @@ -0,0 +1,15 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern; +proc memcpy(dst: Pointer, src: Pointer, n: Word); extern; + +proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern; +proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern; +proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern; +proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern; +proc strlen(ptr: ^Char) -> Word; extern; + +end. diff --git a/source/lexer.elna b/source/lexer.elna new file mode 100644 index 0000000..d5f529b --- /dev/null +++ b/source/lexer.elna @@ -0,0 +1,952 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +module; + +import cstdio, cstring, cctype, cstdlib, common; + +const + CHUNK_SIZE := 85536u; + +type + (* + * Classification table assigns each possible character to a group (class). All + * characters of the same group are handled equivalently. + * + * Classification: + *) + TransitionClass = ( + invalid, + digit, + alpha, + space, + colon, + equals, + left_paren, + right_paren, + asterisk, + underscore, + single, + hex, + zero, + x, + eof, + dot, + minus, + single_quote, + double_quote, + greater, + less, + other + ); + TransitionState = ( + start, + colon, + identifier, + decimal, + greater, + minus, + left_paren, + less, + dot, + comment, + closing_comment, + character, + string, + leading_zero, + decimal_suffix, + finish + ); + LexerToken = record + kind: LexerKind; + value: union + booleanKind: Bool; + identifierKind: Identifier; + integerKind: Int; + stringKind: String + end; + start_location: TextLocation; + end_location: TextLocation + end; + TransitionAction = proc(^Lexer, ^LexerToken); + Transition = record + action: TransitionAction; + next_state: TransitionState + end; + TransitionClasses = [22]Transition; + + BufferPosition* = record + iterator: ^Char; + location: TextLocation + end; + Lexer* = record + input: ^FILE; + buffer: ^Char; + size: Word; + length: Word; + start: BufferPosition; + current: BufferPosition + end; + LexerKind* = ( + unknown, + identifier, + _if, + _then, + _else, + _elsif, + _while, + _do, + _proc, + _begin, + _end, + _extern, + _const, + _var, + _case, + _of, + _type, + _record, + _union, + pipe, + to, + boolean, + null, + and, + _or, + _xor, + not, + _return, + _cast, + shift_left, + shift_right, + left_paren, + right_paren, + left_square, + right_square, + greater_equal, + less_equal, + greater_than, + less_than, + not_equal, + equal, + semicolon, + dot, + comma, + plus, + minus, + multiplication, + division, + remainder, + assignment, + colon, + hat, + at, + comment, + integer, + word, + character, + string, + _defer, + exclamation, + arrow, + trait, + _program, + _module, + _import + ); + +var + classification: [128]TransitionClass; + transitions: [16]TransitionClasses; + +proc initialize_classification(); +var + i: Word; +begin + classification[1] := TransitionClass.eof; (* NUL *) + classification[2] := TransitionClass.invalid; (* SOH *) + classification[3] := TransitionClass.invalid; (* STX *) + classification[4] := TransitionClass.invalid; (* ETX *) + classification[5] := TransitionClass.invalid; (* EOT *) + classification[6] := TransitionClass.invalid; (* EMQ *) + classification[7] := TransitionClass.invalid; (* ACK *) + classification[8] := TransitionClass.invalid; (* BEL *) + classification[9] := TransitionClass.invalid; (* BS *) + classification[10] := TransitionClass.space; (* HT *) + classification[11] := TransitionClass.space; (* LF *) + classification[12] := TransitionClass.invalid; (* VT *) + classification[13] := TransitionClass.invalid; (* FF *) + classification[14] := TransitionClass.space; (* CR *) + classification[15] := TransitionClass.invalid; (* SO *) + classification[16] := TransitionClass.invalid; (* SI *) + classification[17] := TransitionClass.invalid; (* DLE *) + classification[18] := TransitionClass.invalid; (* DC1 *) + classification[19] := TransitionClass.invalid; (* DC2 *) + classification[20] := TransitionClass.invalid; (* DC3 *) + classification[21] := TransitionClass.invalid; (* DC4 *) + classification[22] := TransitionClass.invalid; (* NAK *) + classification[23] := TransitionClass.invalid; (* SYN *) + classification[24] := TransitionClass.invalid; (* ETB *) + classification[25] := TransitionClass.invalid; (* CAN *) + classification[26] := TransitionClass.invalid; (* EM *) + classification[27] := TransitionClass.invalid; (* SUB *) + classification[28] := TransitionClass.invalid; (* ESC *) + classification[29] := TransitionClass.invalid; (* FS *) + classification[30] := TransitionClass.invalid; (* GS *) + classification[31] := TransitionClass.invalid; (* RS *) + classification[32] := TransitionClass.invalid; (* US *) + classification[33] := TransitionClass.space; (* Space *) + classification[34] := TransitionClass.single; (* ! *) + classification[35] := TransitionClass.double_quote; (* " *) + classification[36] := TransitionClass.other; (* # *) + classification[37] := TransitionClass.other; (* $ *) + classification[38] := TransitionClass.single; (* % *) + classification[39] := TransitionClass.single; (* & *) + classification[40] := TransitionClass.single_quote; (* ' *) + classification[41] := TransitionClass.left_paren; (* ( *) + classification[42] := TransitionClass.right_paren; (* ) *) + classification[43] := TransitionClass.asterisk; (* * *) + classification[44] := TransitionClass.single; (* + *) + classification[45] := TransitionClass.single; (* , *) + classification[46] := TransitionClass.minus; (* - *) + classification[47] := TransitionClass.dot; (* . *) + classification[48] := TransitionClass.single; (* / *) + classification[49] := TransitionClass.zero; (* 0 *) + classification[50] := TransitionClass.digit; (* 1 *) + classification[51] := TransitionClass.digit; (* 2 *) + classification[52] := TransitionClass.digit; (* 3 *) + classification[53] := TransitionClass.digit; (* 4 *) + classification[54] := TransitionClass.digit; (* 5 *) + classification[55] := TransitionClass.digit; (* 6 *) + classification[56] := TransitionClass.digit; (* 7 *) + classification[57] := TransitionClass.digit; (* 8 *) + classification[58] := TransitionClass.digit; (* 9 *) + classification[59] := TransitionClass.colon; (* : *) + classification[60] := TransitionClass.single; (* ; *) + classification[61] := TransitionClass.less; (* < *) + classification[62] := TransitionClass.equals; (* = *) + classification[63] := TransitionClass.greater; (* > *) + classification[64] := TransitionClass.other; (* ? *) + classification[65] := TransitionClass.single; (* @ *) + classification[66] := TransitionClass.alpha; (* A *) + classification[67] := TransitionClass.alpha; (* B *) + classification[68] := TransitionClass.alpha; (* C *) + classification[69] := TransitionClass.alpha; (* D *) + classification[70] := TransitionClass.alpha; (* E *) + classification[71] := TransitionClass.alpha; (* F *) + classification[72] := TransitionClass.alpha; (* G *) + classification[73] := TransitionClass.alpha; (* H *) + classification[74] := TransitionClass.alpha; (* I *) + classification[75] := TransitionClass.alpha; (* J *) + classification[76] := TransitionClass.alpha; (* K *) + classification[77] := TransitionClass.alpha; (* L *) + classification[78] := TransitionClass.alpha; (* M *) + classification[79] := TransitionClass.alpha; (* N *) + classification[80] := TransitionClass.alpha; (* O *) + classification[81] := TransitionClass.alpha; (* P *) + classification[82] := TransitionClass.alpha; (* Q *) + classification[83] := TransitionClass.alpha; (* R *) + classification[84] := TransitionClass.alpha; (* S *) + classification[85] := TransitionClass.alpha; (* T *) + classification[86] := TransitionClass.alpha; (* U *) + classification[87] := TransitionClass.alpha; (* V *) + classification[88] := TransitionClass.alpha; (* W *) + classification[89] := TransitionClass.alpha; (* X *) + classification[90] := TransitionClass.alpha; (* Y *) + classification[91] := TransitionClass.alpha; (* Z *) + classification[92] := TransitionClass.single; (* [ *) + classification[93] := TransitionClass.other; (* \ *) + classification[94] := TransitionClass.single; (* ] *) + classification[95] := TransitionClass.single; (* ^ *) + classification[96] := TransitionClass.underscore; (* _ *) + classification[97] := TransitionClass.other; (* ` *) + classification[98] := TransitionClass.hex; (* a *) + classification[99] := TransitionClass.hex; (* b *) + classification[100] := TransitionClass.hex; (* c *) + classification[101] := TransitionClass.hex; (* d *) + classification[102] := TransitionClass.hex; (* e *) + classification[103] := TransitionClass.hex; (* f *) + classification[104] := TransitionClass.alpha; (* g *) + classification[105] := TransitionClass.alpha; (* h *) + classification[106] := TransitionClass.alpha; (* i *) + classification[107] := TransitionClass.alpha; (* j *) + classification[108] := TransitionClass.alpha; (* k *) + classification[109] := TransitionClass.alpha; (* l *) + classification[110] := TransitionClass.alpha; (* m *) + classification[111] := TransitionClass.alpha; (* n *) + classification[112] := TransitionClass.alpha; (* o *) + classification[113] := TransitionClass.alpha; (* p *) + classification[114] := TransitionClass.alpha; (* q *) + classification[115] := TransitionClass.alpha; (* r *) + classification[116] := TransitionClass.alpha; (* s *) + classification[117] := TransitionClass.alpha; (* t *) + classification[118] := TransitionClass.alpha; (* u *) + classification[119] := TransitionClass.alpha; (* v *) + classification[120] := TransitionClass.alpha; (* w *) + classification[121] := TransitionClass.x; (* x *) + classification[122] := TransitionClass.alpha; (* y *) + classification[123] := TransitionClass.alpha; (* z *) + classification[124] := TransitionClass.other; (* { *) + classification[125] := TransitionClass.single; (* | *) + classification[126] := TransitionClass.other; (* } *) + classification[127] := TransitionClass.single; (* ~ *) + classification[128] := TransitionClass.invalid; (* DEL *) + + i := 129u; + while i <= 256u do + classification[i] := TransitionClass.other; + i := i + 1u + end +end; + +proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; +var + result: Bool; + index: Word; + continue: Bool; +begin + index := 0u; + result := true; + continue := (index < keyword.length) & (token_start.iterator <> token_end); + + while continue & result do + result := keyword[index] = token_start.iterator^ + or cast(tolower(cast(keyword[index]: Int)): Char) = token_start.iterator^; + token_start.iterator := token_start.iterator + 1; + index := index + 1u; + continue := (index < keyword.length) & (token_start.iterator <> token_end) + end; + result := result & index = keyword.length; + + return result & (token_start.iterator = token_end) +end; + +(* Reached the end of file. *) +proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); +begin + token^.kind := LexerKind.unknown +end; + +proc increment(position: ^BufferPosition); +begin + position^.iterator := position^.iterator + 1 +end; + +(* Add the character to the token currently read and advance to the next character. *) +proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); +begin + increment(@lexer^.current) +end; + +(* The current character is not a part of the token. Finish the token already + * read. Don't advance to the next character. *) +proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); +begin + if lexer^.start.iterator^ = ':' then + token^.kind := LexerKind.colon + end; + if lexer^.start.iterator^ = '>' then + token^.kind := LexerKind.greater_than + end; + if lexer^.start.iterator^ = '<' then + token^.kind := LexerKind.less_than + end; + if lexer^.start.iterator^ = '(' then + token^.kind := LexerKind.left_paren + end; + if lexer^.start.iterator^ = '-' then + token^.kind := LexerKind.minus + end; + if lexer^.start.iterator^ = '.' then + token^.kind := LexerKind.dot + end +end; + +(* An action for tokens containing multiple characters. *) +proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); +begin + if lexer^.start.iterator^ = '<' then + if lexer^.current.iterator^ = '>' then + token^.kind := LexerKind.not_equal + end; + if lexer^.current.iterator^ = '=' then + token^.kind := LexerKind.less_equal + end + end; + if (lexer^.start.iterator^ = '>') & (lexer^.current.iterator^ = '=') then + token^.kind := LexerKind.greater_equal + end; + if (lexer^.start.iterator^ = ':') & (lexer^.current.iterator^ = '=') then + token^.kind := LexerKind.assignment + end; + if (lexer^.start.iterator^ = '-') & (lexer^.current.iterator^ = '>') then + token^.kind := LexerKind.arrow + end; + increment(@lexer^.current) +end; + +(* Skip a space. *) +proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); +begin + increment(@lexer^.start); + + if lexer^.start.iterator^ = '\n' then + lexer^.start.location.line := lexer^.start.location.line + 1u; + lexer^.start.location.column := 1u + end; + lexer^.current := lexer^.start +end; + +(* Delimited string action. *) +proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); +var + text_length: Word; +begin + if lexer^.start.iterator^ = '(' then + token^.kind := LexerKind.comment + end; + if lexer^.start.iterator^ = '"' then + text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); + + token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); + memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); + + token^.kind := LexerKind.character + end; + if lexer^.start.iterator^ = '\'' then + text_length := cast(lexer^.current.iterator - lexer^.start.iterator + 1: Word); + + token^.value.stringKind := String(cast(malloc(text_length): ^Char), text_length); + memcpy(cast(token^.value.stringKind.ptr: Pointer), cast(lexer^.start.iterator: Pointer), text_length); + + token^.kind := LexerKind.string + end; + increment(@lexer^.current) +end; + +(* Finalize keyword or identifier. *) +proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); +begin + token^.kind := LexerKind.identifier; + + token^.value.identifierKind[1] := cast(lexer^.current.iterator - lexer^.start.iterator: Char); + memcpy(cast(@token^.value.identifierKind[2]: Pointer), cast(lexer^.start.iterator: Pointer), cast(token^.value.identifierKind[1]: Word)); + + if compare_keyword("program", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._program + end; + if compare_keyword("import", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._import + end; + if compare_keyword("const", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._const + end; + if compare_keyword("var", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._var + end; + if compare_keyword("if", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._if + end; + if compare_keyword("then", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._then + end; + if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._elsif + end; + if compare_keyword("else", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._else + end; + if compare_keyword("while", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._while + end; + if compare_keyword("do", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._do + end; + if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._proc + end; + if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._begin + end; + if compare_keyword("end", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._end + end; + if compare_keyword("type", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._type + end; + if compare_keyword("record", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._record + end; + if compare_keyword("union", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._union + end; + if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.null + end; + if compare_keyword("or", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._or + end; + if compare_keyword("return", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._return + end; + if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._defer + end; + if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.to + end; + if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._case + end; + if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._of + end; + if compare_keyword("module", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._module + end; + if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind._xor + end; + if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.boolean; + token^.value.booleanKind := true + end; + if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then + token^.kind := LexerKind.boolean; + token^.value.booleanKind := false + end +end; + +(* Action for tokens containing only one character. The character cannot be + * followed by other characters forming a composite token. *) +proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); +begin + if lexer^.current.iterator^ = '&' then + token^.kind := LexerKind.and + end; + if lexer^.current.iterator^ = ';' then + token^.kind := LexerKind.semicolon + end; + if lexer^.current.iterator^ = ',' then + token^.kind := LexerKind.comma + end; + if lexer^.current.iterator^ = '~' then + token^.kind := LexerKind.not + end; + if lexer^.current.iterator^ = ')' then + token^.kind := LexerKind.right_paren + end; + if lexer^.current.iterator^ = '[' then + token^.kind := LexerKind.left_square + end; + if lexer^.current.iterator^ = ']' then + token^.kind := LexerKind.right_square + end; + if lexer^.current.iterator^ = '^' then + token^.kind := LexerKind.hat + end; + if lexer^.current.iterator^ = '=' then + token^.kind := LexerKind.equal + end; + if lexer^.current.iterator^ = '+' then + token^.kind := LexerKind.plus + end; + if lexer^.current.iterator^ = '*' then + token^.kind := LexerKind.multiplication + end; + if lexer^.current.iterator^ = '/' then + token^.kind := LexerKind.division + end; + if lexer^.current.iterator^ = '%' then + token^.kind := LexerKind.remainder + end; + if lexer^.current.iterator^ = '@' then + token^.kind := LexerKind.at + end; + if lexer^.current.iterator^ = '|' then + token^.kind := LexerKind.pipe + end; + increment(@lexer^.current) +end; + +(* Handle an integer literal. *) +proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); +var + buffer: String; + integer_length: Word; + found: Bool; +begin + token^.kind := LexerKind.integer; + + integer_length := cast(lexer^.current.iterator - lexer^.start.iterator: Word); + memset(cast(token^.value.identifierKind.ptr: Pointer), 0, #size(Identifier)); + memcpy(cast(@token^.value.identifierKind[1]: Pointer), cast(lexer^.start.iterator: Pointer), integer_length); + + token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0'; + token^.value.integerKind := atoi(@token^.value.identifierKind[2]) +end; + +proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; +var + default_transition: Transition; + state_index: Int; +begin + default_transition.action := default_action; + default_transition.next_state := next_state; + state_index := cast(current_state: Int) + 1; + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.digit: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.space: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.colon: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.equals: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.left_paren: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.single: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.hex: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.zero: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.x: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.eof: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.dot: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.minus: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.greater: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.less: Int) + 1] := default_transition; + transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition; + + return state_index +end; + +(* + * The transition table describes transitions from one state to another, given + * a symbol (character class). + * + * The table has m rows and n columns, where m is the amount of states and n is + * the amount of classes. So given the current state and a classified character + * the table can be used to look up the next state. + * + * Each cell is a word long. + * - The least significant byte of the word is a row number (beginning with 0). + * It specifies the target state. "ff" means that this is an end state and no + * transition is possible. + * - The next byte is the action that should be performed when transitioning. + * For the meaning of actions see labels in the lex_next function, which + * handles each action. + *) +proc initialize_transitions(); +var + state_index: Int; +begin + (* Start state. *) + state_index := cast(TransitionState.start: Int) + 1; + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.space: Int) + 1].action := transition_action_skip; + transitions[state_index][cast(TransitionClass.space: Int) + 1].next_state := TransitionState.start; + + transitions[state_index][cast(TransitionClass.colon: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.colon: Int) + 1].next_state := TransitionState.colon; + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.left_paren: Int) + 1].next_state := TransitionState.left_paren; + + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.single: Int) + 1].action := transition_action_single; + transitions[state_index][cast(TransitionClass.single: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.leading_zero; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := transition_action_eof; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.dot; + + transitions[state_index][cast(TransitionClass.minus: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.minus: Int) + 1].next_state := TransitionState.minus; + + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.character; + + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.string; + + transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.greater; + + transitions[state_index][cast(TransitionClass.less: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.less: Int) + 1].next_state := TransitionState.less; + + transitions[state_index][cast(TransitionClass.other: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.other: Int) + 1].next_state := TransitionState.finish; + + (* Colon state. *) + state_index := set_default_transition(TransitionState.colon, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + (* Identifier state. *) + state_index := set_default_transition(TransitionState.identifier, transition_action_key_id, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.identifier; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.identifier; + + (* Decimal state. *) + state_index := set_default_transition(TransitionState.decimal, transition_action_integer, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.decimal; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.decimal_suffix; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.decimal_suffix; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.decimal; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.decimal_suffix; + + (* Greater state. *) + state_index := set_default_transition(TransitionState.greater, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + (* Minus state. *) + state_index := set_default_transition(TransitionState.minus, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; + + (* Left paren state. *) + state_index := set_default_transition(TransitionState.left_paren, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.comment; + + (* Less state. *) + state_index := set_default_transition(TransitionState.less, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.equals: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.equals: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.greater: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.greater: Int) + 1].next_state := TransitionState.finish; + + (* Hexadecimal after 0x. *) + state_index := set_default_transition(TransitionState.dot, transition_action_finalize, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.dot: Int) + 1].action := transition_action_composite; + transitions[state_index][cast(TransitionClass.dot: Int) + 1].next_state := TransitionState.finish; + + (* Comment. *) + state_index := set_default_transition(TransitionState.comment, transition_action_accumulate, TransitionState.comment); + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + (* Closing comment. *) + state_index := set_default_transition(TransitionState.closing_comment, transition_action_accumulate, TransitionState.comment); + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].action := transition_action_delimited; + transitions[state_index][cast(TransitionClass.right_paren: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].action := transition_action_accumulate; + transitions[state_index][cast(TransitionClass.asterisk: Int) + 1].next_state := TransitionState.closing_comment; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + (* Character. *) + state_index := set_default_transition(TransitionState.character, transition_action_accumulate, TransitionState.character); + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].action := transition_action_delimited; + transitions[state_index][cast(TransitionClass.single_quote: Int) + 1].next_state := TransitionState.finish; + + (* String. *) + state_index := set_default_transition(TransitionState.string, transition_action_accumulate, TransitionState.string); + + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.invalid: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.eof: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.eof: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].action := transition_action_delimited; + transitions[state_index][cast(TransitionClass.double_quote: Int) + 1].next_state := TransitionState.finish; + + (* Leading zero. *) + state_index := set_default_transition(TransitionState.leading_zero, transition_action_integer, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.underscore: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish; + + (* Digit with a character suffix. *) + state_index := set_default_transition(TransitionState.decimal_suffix, transition_action_integer, TransitionState.finish); + + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.alpha: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.digit: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.digit: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.hex: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.hex: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.zero: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.zero: Int) + 1].next_state := TransitionState.finish; + + transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; + transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish +end; + +proc lexer_make*(lexer: ^Lexer, input: ^FILE); +begin + lexer^.input := input; + lexer^.length := 0u; + + lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char); + memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE); + lexer^.size := CHUNK_SIZE +end; + +(* Returns the last read token. *) +proc lexer_current*(lexer: ^Lexer) -> LexerToken; +var + current_class: TransitionClass; + current_state: TransitionState; + current_transition: Transition; + result: LexerToken; + index1: Word; + index2: Word; +begin + lexer^.current := lexer^.start; + current_state := TransitionState.start; + + while current_state <> TransitionState.finish do + index1 := cast(lexer^.current.iterator^: Word) + 1u; + current_class := classification[index1]; + + index1 := cast(current_state: Word) + 1u; + index2 := cast(current_class: Word) + 1u; + + current_transition := transitions[index1][index2]; + if current_transition.action <> nil then + current_transition.action(lexer, @result) + end; + current_state := current_transition.next_state + end; + result.start_location := lexer^.start.location; + result.end_location := lexer^.current.location; + + return result +end; + +(* Read and return the next token. *) +proc lexer_lex*(lexer: ^Lexer) -> LexerToken; +var + result: LexerToken; +begin + if lexer^.length = 0u then + lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input); + lexer^.current.location.column := 1u; + lexer^.current.location.line := 1u; + lexer^.current.iterator := lexer^.buffer + end; + lexer^.start := lexer^.current; + + result := lexer_current(lexer); + return result +end; + +proc lexer_destroy*(lexer: ^Lexer); +begin + free(cast(lexer^.buffer: Pointer)) +end; + +proc lexer_initialize(); +begin + initialize_classification(); + initialize_transitions() +end; + +end. diff --git a/source/main.elna b/source/main.elna new file mode 100644 index 0000000..dae045b --- /dev/null +++ b/source/main.elna @@ -0,0 +1,841 @@ +(* This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at https://mozilla.org/MPL/2.0/. *) +program; + +import cstdio, cctype, common, command_line_interface, lexer; + +type + SourceFile* = record + buffer: [1024]Char; + handle: ^FILE; + size: Word; + index: Word + end; + StringBuffer* = record + data: Pointer; + size: Word; + capacity: Word + end; + SourceCode = record + position: TextLocation; + + input: Pointer; + empty: proc(Pointer) -> Bool; + advance: proc(Pointer); + head: proc(Pointer) -> Char + end; + Token* = record + kind: LexerKind; + value: union + int_value: Int; + string: String; + boolean_value: Bool; + char_value: Char + end + end; + Tokenizer* = record + length: Word; + data: ^Token + end; + +(* + Standard procedures. +*) +proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer; + return realloc(ptr, n * size) +end; + +proc substring(string: String, start: Word, count: Word) -> String; + return String(string.ptr + start, count) +end; + +proc open_substring(string: String, start: Word) -> String; + return substring(string, start, string.length - start) +end; + +proc string_dup(origin: String) -> String; +var + copy: ^Char; +begin + copy := cast(malloc(origin.length): ^Char); + strncpy(copy, origin.ptr, origin.length); + + return String(copy, origin.length) +end; + +proc string_buffer_new() -> StringBuffer; +var + result: StringBuffer; +begin + result.capacity := 64u; + result.data := malloc(result.capacity); + result.size := 0u; + + return result +end; + +proc string_buffer_push(buffer: ^StringBuffer, char: Char); +begin + if buffer^.size >= buffer^.capacity then + buffer^.capacity := buffer^.capacity + 1024u; + buffer^.data := realloc(buffer^.data, buffer^.capacity) + end; + cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char); + buffer^.size := buffer^.size + 1u +end; + +proc string_buffer_pop(buffer: ^StringBuffer, count: Word); +begin + buffer^.size := buffer^.size - count +end; + +proc string_buffer_clear(buffer: ^StringBuffer) -> String; +var + result: String; +begin + result := String(cast(buffer^.data: ^Char), buffer^.size); + buffer^.size := 0u; + return result +end; + +(* + Source code stream procedures. +*) + +proc read_source(filename: ^Char) -> ^SourceFile; +var + result: ^SourceFile; + file_handle: ^FILE; +begin + file_handle := fopen(filename, "rb\0".ptr); + + if file_handle <> nil then + result := cast(malloc(#size(SourceFile)): ^SourceFile); + result^.handle := file_handle; + result^.size := 0u; + result^.index := 1u + end; + return result +end; + +proc source_file_empty(source_input: Pointer) -> Bool; +var + source_file: ^SourceFile; +begin + source_file := cast(source_input: ^SourceFile); + + if source_file^.index > source_file^.size then + source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle); + source_file^.index := 1u + end; + + return source_file^.size = 0u +end; + +proc source_file_head(source_input: Pointer) -> Char; +var + source_file: ^SourceFile; +begin + source_file := cast(source_input: ^SourceFile); + + return source_file^.buffer[source_file^.index] +end; + +proc source_file_advance(source_input: Pointer); +var + source_file: ^SourceFile; +begin + source_file := cast(source_input: ^SourceFile); + + source_file^.index := source_file^.index + 1u +end; + +proc source_code_empty(source_code: ^SourceCode) -> Bool; + return source_code^.empty(source_code^.input) +end; + +proc source_code_head(source_code: SourceCode) -> Char; + return source_code.head(source_code.input) +end; + +proc source_code_advance(source_code: ^SourceCode); +begin + source_code^.advance(source_code^.input); + source_code^.position.column := source_code^.position.column +end; + +proc source_code_break(source_code: ^SourceCode); +begin + source_code^.position.line := source_code^.position.line + 1u; + source_code^.position.column := 0u +end; + +proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool; + return ~source_code_empty(source_code) & source_code_head(source_code^) = expected +end; + +(* + Token procedures. +*) + +proc lexer_escape(escape: Char, result: ^Char) -> Bool; +var + successful: Bool; +begin + case escape of + 'n': + result^ := '\n'; + successful := true + | 'a': + result^ := '\a'; + successful := true + | 'b': + result^ := '\b'; + successful := true + | 't': + result^ := '\t'; + successful := true + | 'f': + result^ := '\f'; + successful := true + | 'r': + result^ := '\r'; + successful := true + | 'v': + result^ := '\v'; + successful := true + | '\\': + result^ := '\\'; + successful := true + | '\'': + result^ := '\''; + successful := true + | '"': + result^ := '"'; + successful := true + | '?': + result^ := '\?'; + successful := true + | '0': + result^ := '\0'; + successful := true + else + successful := false + end; + return successful +end; + +(* Skip spaces. *) +proc lexer_spaces(source_code: ^SourceCode); +var + current: Char; +begin + while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do + current := source_code_head(source_code^); + + if current = '\n' then + source_code_break(source_code) + end; + source_code_advance(source_code) + end +end; + +(* Checker whether the character is allowed in an identificator. *) +proc lexer_is_ident(char: Char) -> Bool; + return isalnum(cast(char: Int)) <> 0 or char = '_' +end; + +proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer); +var + content_length: Word; +begin + while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do + string_buffer_push(token_content, source_code_head(source_code^)); + source_code_advance(source_code) + end +end; + +proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; +var + trailing: Word; +begin + trailing := 0u; + + while ~source_code_empty(source_code) & trailing < 2u do + if source_code_head(source_code^) = '*' then + string_buffer_push(token_content, '*'); + trailing := 1u + elsif source_code_head(source_code^) = ')' & trailing = 1u then + string_buffer_pop(token_content, 1u); + trailing := 2u + else + string_buffer_push(token_content, source_code_head(source_code^)); + trailing := 0u + end; + source_code_advance(source_code) + end; + + return trailing = 2u +end; + +proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool; +var + successful: Bool; +begin + successful := ~source_code_empty(source_code); + + if successful then + if source_code_head(source_code^) = '\\' then + source_code_advance(source_code); + + successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content) + else + token_content^ := source_code_head(source_code^); + successful := true + end + end; + if successful then + source_code_advance(source_code) + end; + return successful +end; + +proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; +var + token_end, constructed_string: ^Char; + token_length: Word; + is_valid: Bool := true; + next_char: Char; +begin + while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do + is_valid := lexer_character(source_code, @next_char); + + if is_valid then + string_buffer_push(token_content, next_char) + end + end; + + if is_valid & source_code_expect(source_code, '"') then + source_code_advance(source_code) + else + is_valid := false + end; + return is_valid +end; + +proc lexer_number(source_code: ^SourceCode, token_content: ^Int); +begin + token_content^ := 0; + + while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do + token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int)); + + source_code_advance(source_code) + end +end; + +(* Categorize an identifier. *) +proc lexer_categorize(token_content: String) -> Token; +var + current_token: Token; +begin + if token_content = "if" then + current_token.kind := LexerKind._if + elsif token_content = "then" then + current_token.kind := LexerKind._then + elsif token_content = "else" then + current_token.kind := LexerKind._else + elsif token_content = "elsif" then + current_token.kind := LexerKind._elsif + elsif token_content = "while" then + current_token.kind := LexerKind._while + elsif token_content = "do" then + current_token.kind := LexerKind._do + elsif token_content = "proc" then + current_token.kind := LexerKind._proc + elsif token_content = "begin" then + current_token.kind := LexerKind._begin + elsif token_content = "end" then + current_token.kind := LexerKind._end + elsif token_content = "extern" then + current_token.kind := LexerKind._extern + elsif token_content = "const" then + current_token.kind := LexerKind._const + elsif token_content = "var" then + current_token.kind := LexerKind._var + elsif token_content = "case" then + current_token.kind := LexerKind._case + elsif token_content = "of" then + current_token.kind := LexerKind._of + elsif token_content = "type" then + current_token.kind := LexerKind._type + elsif token_content = "record" then + current_token.kind := LexerKind._record + elsif token_content = "union" then + current_token.kind := LexerKind._union + elsif token_content = "true" then + current_token.kind := LexerKind.boolean; + current_token.value.boolean_value := true + elsif token_content = "false" then + current_token.kind := LexerKind.boolean; + current_token.value.boolean_value := false + elsif token_content = "nil" then + current_token.kind := LexerKind.null + elsif token_content = "or" then + current_token.kind := LexerKind._or + elsif token_content = "return" then + current_token.kind := LexerKind._return + elsif token_content = "cast" then + current_token.kind := LexerKind._cast + elsif token_content = "defer" then + current_token.kind := LexerKind._defer + elsif token_content = "program" then + current_token.kind := LexerKind._program + elsif token_content = "module" then + current_token.kind := LexerKind._module + elsif token_content = "import" then + current_token.kind := LexerKind._import + else + current_token.kind := LexerKind.identifier; + current_token.value.string := string_dup(token_content) + end; + + return current_token +end; + +proc lexer_add_token(lexer: ^Tokenizer, token: Token); +var + new_length: Word; +begin + new_length := lexer^.length + 1u; + lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token); + (lexer^.data + lexer^.length)^ := token; + lexer^.length := new_length +end; + +(* Read the next token from the input. *) +proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token; +var + current_token: Token; + first_char: Char; +begin + current_token.kind := LexerKind.unknown; + + first_char := source_code_head(source_code); + + if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then + lexer_identifier(@source_code, token_buffer); + current_token := lexer_categorize(string_buffer_clear(token_buffer)) + elsif first_char = '#' then + source_code_advance(@source_code); + lexer_identifier(@source_code, token_buffer); + + current_token.kind := LexerKind.trait; + current_token.value.string := string_dup(string_buffer_clear(token_buffer)) + elsif isdigit(cast(first_char: Int)) <> 0 then + lexer_number(@source_code, @current_token.value.int_value); + + if source_code_expect(@source_code, 'u') then + current_token.kind := LexerKind.word; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.integer + end + elsif first_char = '(' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.left_paren + elsif source_code_head(source_code) = '*' then + source_code_advance(@source_code); + + if lexer_comment(@source_code, token_buffer) then + current_token.value.string := string_dup(string_buffer_clear(token_buffer)); + current_token.kind := LexerKind.comment + else + current_token.kind := LexerKind.unknown + end + else + current_token.kind := LexerKind.left_paren + end + elsif first_char = ')' then + current_token.kind := LexerKind.right_paren; + source_code_advance(@source_code) + elsif first_char = '\'' then + source_code_advance(@source_code); + + if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then + current_token.kind := LexerKind.character; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.unknown + end + elsif first_char = '"' then + source_code_advance(@source_code); + + if lexer_string(@source_code, token_buffer) then + current_token.kind := LexerKind.string; + current_token.value.string := string_dup(string_buffer_clear(token_buffer)) + else + current_token.kind := LexerKind.unknown + end + elsif first_char = '[' then + current_token.kind := LexerKind.left_square; + source_code_advance(@source_code) + elsif first_char = ']' then + current_token.kind := LexerKind.right_square; + source_code_advance(@source_code) + elsif first_char = '>' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.greater_than + elsif source_code_head(source_code) = '=' then + current_token.kind := LexerKind.greater_equal; + source_code_advance(@source_code) + elsif source_code_head(source_code) = '>' then + current_token.kind := LexerKind.shift_right; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.greater_than + end + elsif first_char = '<' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.less_than + elsif source_code_head(source_code) = '=' then + current_token.kind := LexerKind.less_equal; + source_code_advance(@source_code) + elsif source_code_head(source_code) = '<' then + current_token.kind := LexerKind.shift_left; + source_code_advance(@source_code) + elsif source_code_head(source_code) = '>' then + current_token.kind := LexerKind.not_equal; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.less_than + end + elsif first_char = '=' then + current_token.kind := LexerKind.equal; + source_code_advance(@source_code) + elsif first_char = ';' then + current_token.kind := LexerKind.semicolon; + source_code_advance(@source_code) + elsif first_char = '.' then + current_token.kind := LexerKind.dot; + source_code_advance(@source_code) + elsif first_char = ',' then + current_token.kind := LexerKind.comma; + source_code_advance(@source_code) + elsif first_char = '+' then + current_token.kind := LexerKind.plus; + source_code_advance(@source_code) + elsif first_char = '-' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.minus + elsif source_code_head(source_code) = '>' then + current_token.kind := LexerKind.arrow; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.minus + end + elsif first_char = '*' then + current_token.kind := LexerKind.multiplication; + source_code_advance(@source_code) + elsif first_char = '/' then + current_token.kind := LexerKind.division; + source_code_advance(@source_code) + elsif first_char = '%' then + current_token.kind := LexerKind.remainder; + source_code_advance(@source_code) + elsif first_char = ':' then + source_code_advance(@source_code); + + if source_code_empty(@source_code) then + current_token.kind := LexerKind.colon + elsif source_code_head(source_code) = '=' then + current_token.kind := LexerKind.assignment; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.colon + end + elsif first_char = '^' then + current_token.kind := LexerKind.hat; + source_code_advance(@source_code) + elsif first_char = '@' then + current_token.kind := LexerKind.at; + source_code_advance(@source_code) + elsif first_char = '!' then + current_token.kind := LexerKind.exclamation; + source_code_advance(@source_code) + elsif first_char = '&' then + current_token.kind := LexerKind.and; + source_code_advance(@source_code) + elsif first_char = '~' then + current_token.kind := LexerKind.not; + source_code_advance(@source_code) + elsif first_char = '|' then + current_token.kind := LexerKind.pipe; + source_code_advance(@source_code) + else + current_token.kind := LexerKind.unknown; + source_code_advance(@source_code) + end; + + return current_token +end; + +(* Split the source text into tokens. *) +proc lexer_text(source_code: SourceCode) -> Tokenizer; +var + current_token: Token; + token_buffer: StringBuffer; + lexer: Tokenizer; +begin + lexer := Tokenizer(0u, nil); + token_buffer := string_buffer_new(); + + lexer_spaces(@source_code); + + while ~source_code_empty(@source_code) do + current_token := lexer_next(source_code, @token_buffer); + + if current_token.kind <> LexerKind.unknown then + lexer_add_token(@lexer, current_token); + lexer_spaces(@source_code) + else + write_s("Lexical analysis error on \""); + write_c(source_code_head(source_code)); + write_s("\".\n") + end + end; + + return lexer +end; + +(* + Parser. +*) + +proc parse(tokens: ^Token, tokens_size: Word); +var + current_token: ^Token; + i: Word := 0u; +begin + while i < tokens_size do + current_token := tokens + i; + + case current_token^.kind of + LexerKind._if: + write_s("IF") + | LexerKind._then: + write_s("THEN") + | LexerKind._else: + write_s("ELSE") + | LexerKind._elsif: + write_s("ELSIF") + | LexerKind._while: + write_s("WHILE") + | LexerKind._do: + write_s("DO") + | LexerKind._proc: + write_s("PROC") + | LexerKind._begin: + write_s("BEGIN") + | LexerKind._end: + write_s("END") + | LexerKind._extern: + write_s("EXTERN") + | LexerKind._const: + write_s("CONST") + | LexerKind._var: + write_s("VAR") + | LexerKind._case: + write_s("CASE") + | LexerKind._of: + write_s("OF") + | LexerKind._type: + write_s("TYPE") + | LexerKind._record: + write_s("RECORD") + | LexerKind._union: + write_s("UNION") + | LexerKind.pipe: + write_s("|") + | LexerKind.to: + write_s("TO") + | LexerKind.boolean: + write_s("BOOLEAN<"); + write_b(current_token^.value.boolean_value); + write_c('>') + | LexerKind.null: + write_s("NIL") + | LexerKind.and: + write_s("&") + | LexerKind._or: + write_s("OR") + | LexerKind.not: + write_s("~") + | LexerKind._return: + write_s("RETURN") + | LexerKind._cast: + write_s("CAST") + | LexerKind.shift_left: + write_s("<<") + | LexerKind.shift_right: + write_s(">>") + | LexerKind.identifier: + write_c('<'); + write_s(current_token^.value.string); + write_c('>') + | LexerKind.trait: + write_c('#'); + write_s(current_token^.value.string) + | LexerKind.left_paren: + write_s("(") + | LexerKind.right_paren: + write_s(")") + | LexerKind.left_square: + write_s("[") + | LexerKind.right_square: + write_s("]") + | LexerKind.greater_equal: + write_s(">=") + | LexerKind.less_equal: + write_s("<=") + | LexerKind.greater_than: + write_s(">") + | LexerKind.less_than: + write_s("<") + | LexerKind.equal: + write_s("=") + | LexerKind.not_equal: + write_s("<>") + | LexerKind.semicolon: + write_c(';') + | LexerKind.dot: + write_c('.') + | LexerKind.comma: + write_c(',') + | LexerKind.plus: + write_c('+') + | LexerKind.minus: + write_c('-') + | LexerKind.multiplication: + write_c('*') + | LexerKind.division: + write_c('/') + | LexerKind.remainder: + write_c('%') + | LexerKind.assignment: + write_s(":=") + | LexerKind.colon: + write_c(':') + | LexerKind.hat: + write_c('^') + | LexerKind.at: + write_c('@') + | LexerKind.comment: + write_s("(* COMMENT *)") + | LexerKind.integer: + write_c('<'); + write_i(current_token^.value.int_value); + write_c('>') + | LexerKind.word: + write_c('<'); + write_i(current_token^.value.int_value); + write_s("u>") + | LexerKind.character: + write_c('<'); + write_i(cast(current_token^.value.char_value: Int)); + write_s("c>") + | LexerKind.string: + write_s("\"...\"") + | LexerKind._defer: + write_s("DEFER") + | LexerKind.exclamation: + write_c('!') + | LexerKind.arrow: + write_s("->") + | LexerKind._program: + write_s("PROGRAM") + | LexerKind._module: + write_s("MODULE") + | LexerKind._import: + write_s("IMPORT") + else + write_s("UNKNOWN<"); + write_i(cast(current_token^.kind: Int)); + write_c('>') + end; + write_c(' '); + + i := i + 1u + end; + write_c('\n') +end; + +(* + Compilation entry. +*) + +proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; +var + return_code: Int := 0; + lexer: Tokenizer; +begin + if command_line^.lex or command_line^.parse then + lexer := lexer_text(source_code) + end; + if command_line^.parse then + parse(lexer.data, lexer.length) + end; + + return return_code +end; + +proc process(argc: Int, argv: ^^Char) -> Int; +var + tokens: ^Token; + tokens_size: Word; + source_code: SourceCode; + command_line: ^CommandLine; + return_code: Int := 0; + source_file: ^SourceFile; +begin + command_line := parse_command_line(argc, argv); + if command_line = nil then + return_code := 2 + end; + + if return_code = 0 then + source_file := read_source(command_line^.input); + + if source_file = nil then + perror(command_line^.input); + return_code := 3 + end + end; + + if return_code = 0 then + defer + fclose(source_file^.handle) + end; + + source_code.position := TextLocation(1u, 1u); + source_code.input := cast(source_file: Pointer); + source_code.empty := source_file_empty; + source_code.head := source_file_head; + source_code.advance := source_file_advance; + + return_code := compile_in_stages(command_line, source_code) + end; + return return_code +end; + + return process(count, parameters) +end. |
