diff --git a/source/Parser.elna b/source/Parser.elna deleted file mode 100644 index 1225750..0000000 --- a/source/Parser.elna +++ /dev/null @@ -1,1174 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -import cstdlib, common, Lexer; - -type - Parser = record - lexer: ^Lexer - end; - - AstLiteralKind* = ( - integer, - string, - null, - boolean - ); - AstLiteral* = record - kind: AstLiteralKind; - value: union - integer: Int; - string: String; - boolean: Bool - end - end; - - AstUnaryOperator* = ( - reference, - not, - minus - ); - AstBinaryOperator* = ( - sum, - subtraction, - multiplication, - division, - remainder, - equals, - not_equals, - less, - greater, - less_equal, - greater_equal, - disjunction, - conjunction, - exclusive_disjunction, - shift_left, - shift_right - ); - - AstExpressionKind* = ( - literal, - identifier, - array_access, - dereference, - field_access, - unary, - binary, - call - ); - AstExpression* = record - kind: AstExpressionKind - value: union - literal: ^AstLiteral; - identifier: Identifier; - reference: ^AstExpression; - array_access: record - array: ^AstExpression; - index: ^AstExpression - end; - field_access: record - aggregate: ^AstExpression; - field: Identifier - end; - unary: record - operator: AstUnaryOperator; - operand: ^AstExpression - end; - binary: record - operator: AstBinaryOperator; - lhs: ^AstExpression; - rhs: ^AstExpression - end; - call: record - callable: ^AstExpression; - argument_count: Word; - arguments: ^^AstExpression - end - end - end; - - ConditionalStatement = record - condition: ^AstExpression; - branch: AstCompoundStatement - end; - - AstStatementKind* = ( - if_statement, - while_statement, - assignment_statement, - return_statement, - call_statement - ); - AstStatement* = record - kind: AstStatementKind - value: union - if_statement: ConditionalStatement; - while_statement: ConditionalStatement; - assignment_statement: record - assignee: ^AstExpression; - assignment: ^AstExpression - end; - return_statement: ^AstExpression; - call_statement: ^AstExpression - end - end; - AstCompoundStatement* = record - count: Word; - statements: ^^AstStatement - end; - - AstImportStatement* = record - package: Identifier; - symbols: ^Identifier - end; - - AstConstantDeclaration* = record - constant_name: Identifier; - constant_value: Int - end; - - AstFieldDeclaration* = record - field_name: Identifier; - field_type: ^AstTypeExpression - end; - - AstTypeExpressionKind* = ( - named_expression, - record_expression, - enumeration_expression, - array_expression, - pointer_expression, - procedure_expression - ); - AstTypeExpression* = record - kind: AstTypeExpressionKind; - value: union - name: Identifier; - cases: ^Identifier; - target: ^AstTypeExpression; - fields: ^AstFieldDeclaration; - array_expression: record - base: ^AstTypeExpression; - length: Word - end; - parameters: ^^AstTypeExpression - end - end; - - AstTypedDeclaration* = record - identifier: Identifier; - type_expression: ^AstTypeExpression - end; - - AstVariableDeclaration* = record - variable_name: Identifier; - variable_type: ^AstTypeExpression - end; - - AstProcedureDeclaration* = record - name: Identifier; - parameter_count: Word; - parameters: ^AstTypedDeclaration; - return_type: ^AstTypeExpression; - constants: ^^AstConstantDeclaration; - variables: ^^AstVariableDeclaration; - statements: AstCompoundStatement - end; - - AstModule* = record - main: Bool; - imports: ^^AstImportStatement; - constants: ^^AstConstantDeclaration; - types: ^^AstTypedDeclaration; - variables: ^^AstVariableDeclaration; - procedures: ^^AstProcedureDeclaration; - statements: AstCompoundStatement - end; - -(* Calls lexer_lex() but skips the comments. *) -proc parser_lex(lexer: ^Lexer) -> LexerToken; -var - result: LexerToken; -begin - result := lexer_lex(lexer); - - while result.kind = lexerKindComment do - result := lexer_lex(lexer) - end; - - return result -end; - -proc parse_type_fields(parser: ^Parser) -> ^AstFieldDeclaration; -var - token: LexerToken; - field_declarations: ^AstFieldDeclaration; - field_count: Word; - current_field: ^AstFieldDeclaration; -begin - field_declarations := malloc(#size(AstFieldDeclaration)); - token := parser_lex(parser^.lexer); - field_count := 0; - - while token.kind <> lexerKindEnd do - field_count := field_count + 2u; - - field_declarations := realloc(field_declarations, #size(AstFieldDeclaration) * field_count); - field_count := field_count - 1u; - current_field := field_declarations + (field_count - 1u); - - token := parser_lex(parser^.lexer); - - current_field^.field_name := token.identifierKind; - - token := parser_lex(parser^.lexer); - current_field^.field_type := parse_type_expression(parser); - token := parser_lex(parser^.lexer); - - if token.kind = lexerKindSemicolon then - token := parser_lex(parser^.lexer) - end - end; - current_field := current_field + 1; - memset(current_field, 0, #size(AstFieldDeclaration)); - - return field_declarations -end; - -proc parse_record_type(parser: ^Parser) -> ^AstTypeExpression; -var - result: ^AstTypeExpression; -begin - NEW(result); - result^.kind := AstTypeExpressionKind.record_expression; - result^.fields := parse_type_fields(parser); - - return result -end; - -proc parse_pointer_type(parser: ^Parser) -> ^AstTypeExpression; -var - token: LexerToken; - result: ^AstTypeExpression; -begin - NEW(result); - result^.kind := AstTypeExpressionKind.pointer_expression; - - token := lexer_current(parser^.lexer); - - if token.kind = lexerKindPointer then - token := parser_lex(parser^.lexer) - end; - token := lexer_current(parser^.lexer); - result^.target := parse_type_expression(parser); - - return result -end; - -proc parse_array_type(parser: ^Parser) -> ^AstTypeExpression; -var - token: LexerToken; - buffer: [20]Char; - result: ^AstTypeExpression; -begin - NEW(result); - result^.kind := AstTypeExpressionKind.array_expression; - result^.array_expression.length := 0u; - - token := lexer_current(parser^.lexer); - - if token.kind = lexerKindArray then - token := parser_lex(parser^.lexer) - end; - if token.kind <> lexerKindOf then - token := parser_lex(parser^.lexer); - - result^.array_expression.length := token.integerKind; - - token := parser_lex(parser^.lexer) - end; - token := parser_lex(parser^.lexer); - result^.array_expression.base := parse_type_expression(parser); - - return result -end; - -proc parse_enumeration_type(parser: ^Parser) -> ^AstTypeExpression; -var - token: LexerToken; - result: ^AstTypeExpression; - current_case: ^Identifier; - case_count: Word; -begin - NEW(result); - result^.kind := AstTypeExpressionKind.enumeration_expression; - - case_count := 1u; - result^.cases := malloc(#size(Identifier) * 2); - token := parser_lex(parser^.lexer); - current_case := result^.cases; - current_case^ := token.identifierKind; - - token := parser_lex(parser^.lexer); - - while token.kind = lexerKindComma do - token := parser_lex(parser^.lexer); - - case_count := case_count + 2u; - - result^.cases := realloc(result^.cases, #size(Identifier) * case_count); - case_count := case_count - 1u; - current_case := result^.cases + (case_count - 1u); - current_case^ := token.identifierKind; - - token := parser_lex(parser^.lexer) - end; - current_case := current_case + 1; - memset(current_case, 0, #size(Identifier)); - - return result -end; - -proc parse_named_type(parser: ^Parser) -> ^AstTypeExpression; -var - token: LexerToken; - result: ^AstTypeExpression; -begin - token := lexer_current(parser^.lexer); - NEW(result); - - result^.kind := AstTypeExpressionKind.named_expression; - result^.name := token.identifierKind; - - return result -end; - -proc parse_procedure_type(parser: ^Parser) -> ^AstTypeExpression; -var - token: LexerToken; - result: ^AstTypeExpression; - current_parameter: ^^AstTypeExpression; - parameter_count: Word; -begin - parameter_count := 0u; - NEW(result); - result^.kind := AstTypeExpressionKind.procedure_expression; - - result^.parameters := malloc(#size(^AstTypeExpression)); - - token := parser_lex(parser^.lexer); - token := parser_lex(parser^.lexer); - - while token.kind <> lexerKindRightParen do - parameter_count := parameter_count + 2u; - - result^.parameters := realloc(result^.parameters, #size(^AstTypeExpression) * parameter_count); - parameter_count := parameter_count - 1u; - current_parameter := result^.parameters + (parameter_count - 1u); - - current_parameter^ := parse_type_expression(parser); - - token := parser_lex(parser^.lexer); - if token.kind = lexerKindComma then - token := parser_lex(parser^.lexer) - end - end; - current_parameter := result^.parameters + parameter_count; - current_parameter^ := nil; - - return result -end; - -proc parse_type_expression(parser: ^Parser) -> ^AstTypeExpression; -var - token: LexerToken; - result: ^AstTypeExpression; -begin - result := nil; - token := lexer_current(parser^.lexer); - - if token.kind = lexerKindRecord then - result := parse_record_type(parser) - end; - if token.kind = lexerKindLeftParen then - result := parse_enumeration_type(parser) - end; - if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then - result := parse_array_type(parser) - end; - if token.kind = lexerKindHat then - result := parse_pointer_type(parser) - end; - if token.kind = lexerKindProc then - result := parse_procedure_type(parser) - end; - if token.kind = lexerKindIdentifier then - result := parse_named_type(parser) - end; - return result -end; - -proc parse_type_declaration(parser: ^Parser) -> ^AstTypedDeclaration; -var - token: LexerToken; - result: ^AstTypedDeclaration; -begin - token := lexer_current(parser^.lexer); - - NEW(result); - result^.identifier := token.identifierKind; - - token := parser_lex(parser^.lexer); - token := parser_lex(parser^.lexer); - - result^.type_expression := parse_type_expression(parser); - token := parser_lex(parser^.lexer); - - return result -end; - -proc parse_type_part(parser: ^Parser) -> ^^AstTypedDeclaration; -var - token: LexerToken; - result: ^^AstTypedDeclaration; - current_declaration: ^^AstTypedDeclaration; - declaration_count: Word; -begin - token := lexer_current(parser^.lexer); - - result := malloc(#size(^AstTypedDeclaration)); - current_declaration := result; - declaration_count := 0u; - - if token.kind = lexerKindType then - token := parser_lex(parser^.lexer); - - while token.kind = lexerKindIdentifier do - declaration_count := declaration_count + 1u; - - result := realloc(result, #size(^AstTypedDeclaration) * (declaration_count + 1)); - current_declaration := result + (declaration_count - 1u); - - current_declaration^ := parse_type_declaration(parser); - token := parser_lex(parser^.lexer) - end - end; - if declaration_count <> 0u then - current_declaration := current_declaration + 1 - end; - current_declaration^ := nil; - - return result -end; - -proc parse_variable_declaration(parser: ^Parser) -> ^AstVariableDeclaration; -var - token: LexerToken; - result: ^AstVariableDeclaration; -begin - NEW(result); - - token := lexer_current(parser^.lexer); - result^.variable_name := token.identifierKind; - - token := parser_lex(parser^.lexer); - - token := parser_lex(parser^.lexer); - result^.variable_type := parse_type_expression(parser); - - token := parser_lex(parser^.lexer); - return result -end; - -proc parse_variable_part(parser: ^Parser) -> ^^AstVariableDeclaration; -var - token: LexerToken; - result: ^^AstVariableDeclaration; - current_declaration: ^^AstVariableDeclaration; - declaration_count: Word; -begin - token := lexer_current(parser^.lexer); - - result := malloc(#size(^AstVariableDeclaration)); - current_declaration := result; - declaration_count := 0u; - - if token.kind = lexerKindVar then - token := parser_lex(parser^.lexer); - - while token.kind = lexerKindIdentifier do - declaration_count := declaration_count + 1u; - - result := realloc(result, #size(^AstVariableDeclaration) * (declaration_count + 1)); - current_declaration := result + (declaration_count - 1u); - - current_declaration^ := parse_variable_declaration(parser); - token := parser_lex(parser^.lexer) - end - end; - if declaration_count <> 0 then - current_declaration := current_declaration + 1 - end; - current_declaration^ := nil; - - return result -end; - -proc parse_constant_declaration(parser: ^Parser) -> ^AstConstantDeclaration; -var - token: LexerToken; - result: ^AstConstantDeclaration; -begin - NEW(result); - - token := lexer_current(parser^.lexer); - result^.constant_name := token.identifierKind; - - token := parser_lex(parser^.lexer); - - token := parser_lex(parser^.lexer); - result^.constant_value := token.integerKind; - - token := parser_lex(parser^.lexer); - - return result -end; - -proc parse_constant_part(parser: ^Parser) -> ^^AstConstantDeclaration; -var - token: LexerToken; - result: ^^AstConstantDeclaration; - current_declaration: ^^AstConstantDeclaration; - declaration_count: Word; -begin - token := lexer_current(parser^.lexer); - - result := malloc(#size(^AstConstantDeclaration)); - current_declaration := result; - declaration_count := 0u; - - if token.kind = lexerKindConst then - token := parser_lex(parser^.lexer); - - while token.kind = lexerKindIdentifier do - declaration_count := declaration_count + 1u; - - result := realloc(result, #size(^AstConstantDeclaration) * (declaration_count + 1u)); - current_declaration := result + (declaration_count - 1u); - - current_declaration^ := parse_constant_declaration(parser); - token := parser_lex(parser^.lexer) - end - end; - if declaration_count <> 0 then - current_declaration := current_declaration + 1 - end; - current_declaration^ := nil; - - return result -end; - -proc parse_import_statement(parser: ^Parser) -> ^AstImportStatement; -var - result: ^AstImportStatement; - token: LexerToken; - symbol_count: Word; - current_symbol: ^Identifier; -begin - NEW(result); - symbol_count := 1u; - - token := parser_lex(parser^.lexer); - result^.package := token.identifierKind; - - token := parser_lex(parser^.lexer); - result^.symbols := malloc(#size(Identifier) * 2); - - current_symbol := result^.symbols; - - token := parser_lex(parser^.lexer); - current_symbol^ := token.identifierKind; - - token := parser_lex(parser^.lexer); - while token.kind <> lexerKindSemicolon do - token := parser_lex(parser^.lexer); - symbol_count := symbol_count + 1u; - - result^.symbols := realloc(result^.symbols, #size(Identifier) * (symbol_count + 1u)); - current_symbol := result^.symbols + (symbol_count - 1u); - - current_symbol^ := token.identifierKind; - token := parser_lex(parser^.lexer) - end; - current_symbol := current_symbol + 1; - memset(current_symbol, 0, #size(Identifier)); - - token := parser_lex(parser^.lexer); - - return result -end; - -proc parse_import_part(parser: ^Parser) -> ^^AstImportStatement; -var - token: LexerToken; - import_statement: ^^AstImportStatement; - result: ^^AstImportStatement; - import_count: Word; -begin - token := lexer_current(parser^.lexer); - result := malloc(#size(^AstImportStatement)); - import_statement := result; - import_count := 0u; - - while token.kind = lexerKindFrom do - import_count := import_count + 1u; - - result := realloc(result, #size(^AstImportStatement) * (import_count + 1u)); - import_statement := result + (import_count - 1u); - - import_statement^ := parse_import_statement(parser); - token := lexer_current(parser^.lexer) - end; - if import_count > 0u then - import_statement := import_count + 1 - end; - import_statement^ := nil; - - return result -end; - -proc parse_literal(parser: ^Parser) -> ^AstLiteral; -var - literal: ^AstLiteral; - token: LexerToken; -begin - literal := nil; - token := lexer_current(parser^.lexer); - - if token.kind = lexerKindInteger then - NEW(literal); - - literal^.kind := AstLiteralKind.integer; - literal^.integer := token.integerKind - end; - if (token.kind = lexerKindCharacter) or (token.kind = lexerKindString) then - NEW(literal); - - literal^.kind := AstLiteralKind.string; - literal^.string := token.stringKind - end; - if token.kind = lexerKindNull then - NEW(literal); - - literal^.kind := AstLiteralKind.null - end; - if token.kind = lexerKindBoolean then - NEW(literal); - - literal^.kind := AstLiteralKind.boolean; - literal^.boolean := token.booleanKind - end; - if literal <> nil then - token := parser_lex(parser^.lexer) - end; - - return literal -end; - -proc parse_factor(parser: ^Parser) -> ^AstExpression; -var - next_token: LexerToken; - result: ^AstExpression; - literal: ^AstLiteral; -begin - result := nil; - next_token := lexer_current(parser^.lexer); - - literal := parse_literal(parser); - - if (result = nil) & (literal <> nil) then - NEW(result); - - result^.kind := AstExpressionKind.literal; - result^.literal := literal - end; - if (result = nil) & (next_token.kind = lexerKindMinus) then - NEW(result); - next_token := parser_lex(parser^.lexer); - - result^.kind := AstExpressionKind.unary; - result^.unary.operator := AstUnaryOperator.minus; - result^.unary.operand := parse_factor(parser) - end; - if (result = nil) & (next_token.kind = lexerKindTilde) then - NEW(result); - next_token := parser_lex(parser^.lexer); - - result^.kind := AstExpressionKind.unary; - result^.unary.operator := AstUnaryOperator.not; - result^.unary.operand := parse_factor(parser) - end; - if (result = nil) & (next_token.kind = lexerKindLeftParen) then - next_token := parser_lex(parser^.lexer); - result := parse_expression(parser); - if result <> nil then - next_token := parser_lex(parser^.lexer) - end - end; - if (result = nil) & (next_token.kind = lexerKindIdentifier) then - NEW(result); - - result^.kind := AstExpressionKind.identifier; - result^.identifier := next_token.identifierKind; - - next_token := parser_lex(parser^.lexer) - end; - - return result -end; - -proc parse_designator(parser: ^Parser) -> ^AstExpression; -var - next_token: LexerToken; - inner_expression: ^AstExpression; - designator: ^AstExpression; - arguments: ^^AstExpression; - handled: Bool; -begin - designator := parse_factor(parser); - handled := designator <> nil; - next_token := lexer_current(parser^.lexer); - - while handled do - inner_expression := designator; - handled := false; - - if ~handled & (next_token.kind = lexerKindHat) then - NEW(designator); - - designator^.kind := AstExpressionKind.dereference; - designator^.reference := inner_expression; - - next_token := parser_lex(parser^.lexer); - handled := true - end; - if ~handled & (next_token.kind = lexerKindLeftSquare) then - NEW(designator); - next_token := parser_lex(parser^.lexer); - - designator^.kind := AstExpressionKind.array_access; - designator^.array_access.array := inner_expression; - designator^.array_access.index := parse_expression(parser); - - next_token := parser_lex(parser^.lexer); - handled := true - end; - if ~handled & (next_token.kind = lexerKindDot) then - NEW(designator); - next_token := parser_lex(parser^.lexer); - - designator^.kind := AstExpressionKind.field_access; - designator^.field_access.aggregate := inner_expression; - designator^.field_access.field := next_token.identifierKind; - - next_token := parser_lex(parser^.lexer); - handled := true - end; - if ~handled & (next_token.kind = lexerKindLeftParen) then - NEW(designator); - next_token := parser_lex(parser^.lexer); - - designator^.kind := AstExpressionKind.call; - designator^.call.callable := inner_expression; - designator^.call.argument_count := 0; - designator^.call.arguments := nil; - - if next_token.kind <> lexerKindRightParen then - designator^.arguments := malloc(#size(^AstExpression)); - designator^.argument_count := 1; - designator^.arguments^ := parse_expression(parser); - - next_token := lexer_current(parser^.lexer); - - while next_token.kind = lexerKindComma do - next_token := parser_lex(parser^.lexer); - - designator^.argument_count := designator^.argument_count + 1; - designator^.arguments := realloc(designator^.arguments, #size(^AstExpression) * designator^.argument_count); - arguments := designator^.arguments + (designator^.argument_count - 1u); - arguments^ := parse_expression(parser); - - next_token := lexer_current(parser^.lexer) - end - end; - - next_token := parser_lex(parser^.lexer); - handled := true - end - end; - - return designator -end; - -proc parse_binary_expression(parser: ^Parser, left: ^AstExpression, operator: AstBinaryOperator) -> ^AstExpression; -var - next_token: LexerToken; - result: ^AstExpression; - right: ^AstExpression; -begin - next_token := parser_lex(parser^.lexer); - right := parse_designator(parser); - result := nil; - - if right <> nil then - NEW(result); - result^.kind := AstExpressionKind.binary; - result^.binary.operator := operator; - result^.binary.lhs := left; - result^.binary.rhs := right - end; - - return result -end; - -proc parse_expression(parser: ^Parser) -> ^AstExpression; -var - next_token: LexerToken; - left: ^AstExpression; - result: ^AstExpression; - written_bytes: Word; -begin - left := parse_designator(parser); - result := nil; - next_token := lexer_current(parser^.lexer); - - if left <> nil then - if (result = nil) & (next_token.kind = lexerKindNotEqual) then - result := parse_binary_expression(parser, left, AstBinaryOperator.not_equals) - end; - if (result = nil) & (next_token.kind = lexerKindEqual) then - result := parse_binary_expression(parser, left, AstBinaryOperator.equals) - end; - if (result = nil) & (next_token.kind = lexerKindGreaterThan) then - result := parse_binary_expression(parser, left, AstBinaryOperator.greater) - end; - if (result = nil) & (next_token.kind = lexerKindLessThan) then - result := parse_binary_expression(parser, left, AstBinaryOperator.less) - end; - if (result = nil) & (next_token.kind = lexerKindGreaterEqual) then - result := parse_binary_expression(parser, left, AstBinaryOperator.greater_equal) - end; - if (result = nil) & (next_token.kind = lexerKindLessEqual) then - result := parse_binary_expression(parser, left, AstBinaryOperator.less_equal) - end; - if (result = nil) & (next_token.kind = lexerKindAnd) then - result := parse_binary_expression(parser, left, AstBinaryOperator.conjunction) - end; - if (result = nil) & (next_token.kind = lexerKindOr) then - result := parse_binary_expression(parser, left, AstBinaryOperator.disjunction) - end; - if (result = nil) & (next_token.kind = lexerKindMinus) then - result := parse_binary_expression(parser, left, AstBinaryOperator.subtraction) - end; - if (result = nil) & (next_token.kind = lexerKindPlus) then - result := parse_binary_expression(parser, left, AstBinaryOperator.sum) - end; - if (result = nil) & (next_token.kind = lexerKindAsterisk) then - result := parse_binary_expression(parser, left, AstBinaryOperator.multiplication) - end - end; - if (result = nil) & (left <> nil) then - result := left - end; - - return result -end; - -proc parse_return_statement(parser: ^Parser) -> ^AstStatement; -var - token: LexerToken; - result: ^AstStatement; -begin - NEW(result); - result^.kind := AstStatementKind.return_statement; - - token := parser_lex(parser^.lexer); - result^.return_statement := parse_expression(parser); - - return result -end; - -proc parse_assignment_statement(parser: ^Parser, assignee: ^AstExpression) -> ^AstStatement; -var - token: LexerToken; - result: ^AstStatement; -begin - NEW(result); - result^.kind := AstStatementKind.assignment_statement; - result^.assignment_statement.assignee := assignee; - - token := parser_lex(parser^.lexer); - result^.assignment_statement.assignment := parse_expression(parser); - - return result -end; - -proc parse_call_statement(parser: ^Parser, call: ^AstExpression) -> ^AstStatement; -var - result: ^AstStatement; -begin - NEW(result); - result^.kind := AstStatementKind.call_statement; - result^.call_statement := call; - - return result -end; - -proc parse_compound_statement(parser: ^Parser) -> AstCompoundStatement; -var - result: AstCompoundStatement; - token: LexerToken; - current_statement: ^^AstStatement; - old_count: Word; -begin - result.count := 0u; - result.statements := nil; - - token := lexer_current(parser^.lexer); - - while token.kind <> lexerKindEnd do - old_count := result.count; - result.count := result.count + 1u; - - result.statements := realloc(result.statements, #size(^AstStatement) * result.count); - current_statement := result.statements + old_count; - - current_statement^ := parse_statement(parser); - - token := lexer_current(parser^.lexer) - end; - - return result -end; - -proc parse_statement(parser: ^Parser) -> ^AstStatement; -var - token: LexerToken; - statement: ^AstStatement; - designator: ^AstExpression; -begin - statement := nil; - token := parser_lex(parser^.lexer); - - if token.kind = lexerKindIf then - statement := parse_if_statement(parser) - end; - if token.kind = lexerKindWhile then - statement := parse_while_statement(parser) - end; - if token.kind = lexerKindReturn then - statement := parse_return_statement(parser) - end; - if token.kind = lexerKindIdentifier then - designator := parse_designator(parser); - token := lexer_current(parser^.lexer); - - if token.kind = lexerKindAssignment then - statement := parse_assignment_statement(parser, designator) - end; - if token.kind <> lexerKindAssignment then - statement := parse_call_statement(parser, designator) - end - end; - return statement -end; - -proc parse_if_statement(parser: ^Parser) -> ^AstStatement; -var - token: LexerToken; - result: ^AstStatement; -begin - NEW(result); - result^.kind := AstStatementKind.if_statement; - - token := parser_lex(parser^.lexer); - result^.if_statement.condition := parse_expression(parser); - result^.if_statement.branch := parse_compound_statement(parser); - - token := parser_lex(parser^.lexer); - return result -end; - -proc parse_while_statement(parser: ^Parser) -> ^AstStatement; -var - token: LexerToken; - result: ^AstStatement; -begin - NEW(result); - result^.kind := AstStatementKind.while_statement; - - token := parser_lex(parser^.lexer); - result^.while_statement.condition := parse_expression(parser); - result^.while_statement.body := parse_compound_statement(parser); - - token := parser_lex(parser^.lexer); - return result -end; - -proc parse_statement_part(parser: ^Parser) -> AstCompoundStatement; -var - token: LexerToken; - compound: AstCompoundStatement; -begin - compound.count := 0; - compound.statements := nil; - token := lexer_current(parser^.lexer); - - if token.kind = lexerKindBegin then - compound := parse_compound_statement(parser) - end; - - return compound -end; - -proc parse_procedure_heading(parser: ^Parser) -> ^AstProcedureDeclaration; -var - token: LexerToken; - declaration: ^AstProcedureDeclaration; - parameter_index: Word; - current_parameter: ^AstTypedDeclaration; -begin - NEW(declaration); - - token := parser_lex(parser^.lexer); - declaration^.name := token.identifierKind; - - token := parser_lex(parser^.lexer); - - declaration^.parameters := nil; - declaration^.parameter_count := 0u; - - token := parser_lex(parser^.lexer); - while token.kind <> lexerKindRightParen do - parameter_index := declaration^.parameter_count; - declaration^.parameter_count := declaration^.parameter_count + 1; - declaration^.parameters := realloc(declaration^.parameters, #size(AstTypedDeclaration) * declaration^.parameter_count); - - current_parameter := declaration^.parameters + parameter_index; - - current_parameter^.identifier := token.identifierKind; - - token := parser_lex(parser^.lexer); - token := parser_lex(parser^.lexer); - - current_parameter^.type_expression := parse_type_expression(parser); - - token := parser_lex(parser^.lexer); - if token.kind = lexerKindComma then - token := parser_lex(parser^.lexer) - end - end; - token := parser_lex(parser^.lexer); - declaration^.return_type := nil; - - (* Check for the return type and write it. *) - if token.kind = lexerKindArrow then - token := parser_lex(parser^.lexer); - declaration^.return_type := parse_type_expression(parser); - token := parser_lex(parser^.lexer) - end; - token := parser_lex(parser^.lexer); - - return declaration -end; - -proc parse_procedure_declaration(parser: ^Parser) -> ^AstProcedureDeclaration; -var - token: LexerToken; - declaration: ^AstProcedureDeclaration; -begin - declaration := parse_procedure_heading(parser); - - declaration^.constants := parse_constant_part(parser); - declaration^.variables := parse_variable_part(parser); - declaration^.statements := parse_statement_part(parser); - - token := parser_lex(parser^.lexer); - token := parser_lex(parser^.lexer); - - return declaration -end; - -proc parse_procedure_part(parser: ^Parser) -> ^^AstProcedureDeclaration; -var - token: LexerToken; - current_declaration: ^^AstProcedureDeclaration; - result: ^^AstProcedureDeclaration; - declaration_count: Word; - declaration_index: Word; -begin - token := lexer_current(parser^.lexer); - declaration_count := 0u; - declaration_index := 0u; - - result := malloc(#size(^AstProcedureDeclaration)); - - while token.kind = lexerKindProc do - declaration_count := declaration_count + 1u; - result := realloc(result, #size(^AstProcedureDeclaration) * (declaration_count + 1)); - current_declaration := result + declaration_index; - - current_declaration^ := parse_procedure_declaration(parser); - token := lexer_current(parser^.lexer); - declaration_index := declaration_count - end; - current_declaration := result + declaration_index; - current_declaration^ := nil; - - return result -end; - -proc parse_module(parser: ^Parser) -> ^AstModule; -var - token: LexerToken; - result: ^AstModule; -begin - NEW(result); - token := parser_lex(parser^.lexer); - result^.main := true; - - if token.kind = lexerKindModule then - result^.main := false - end; - token := parser_lex(parser^.lexer); - - (* Write the module body. *) - token := parser_lex(parser^.lexer); - - result^.imports := parse_import_part(parser); - result^.constants := parse_constant_part(parser); - result^.types := parse_type_part(parser); - - result^.variables := parse_variable_part(parser); - result^.procedures := parse_procedure_part(parser); - result^.statements := parse_statement_part(parser); - - token := parser_lex(parser^.lexer); - token := parser_lex(parser^.lexer); - - return result -end; - -proc parse*(lexer: ^Lexer) -> ^AstModule; -var - parser: Parser; -begin - parser.lexer := lexer; - - return parse_module(@parser) -end; - -end. diff --git a/source/cctype.elna b/source/cctype.elna deleted file mode 100644 index 3906cd1..0000000 --- a/source/cctype.elna +++ /dev/null @@ -1,14 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -proc isdigit*(c: Int ) -> Int; extern; -proc isalnum*(c: Int) -> Int; extern; -proc isalpha*(c: Int) -> Int; extern; -proc isspace*(c: Int) -> Int; extern; - -proc tolower*(c: Int) -> Int; extern; -proc toupper*(c: Int) -> Int; extern; - -end. diff --git a/source/command_line_interface.elna b/source/command_line_interface.elna deleted file mode 100644 index 040fdeb..0000000 --- a/source/command_line_interface.elna +++ /dev/null @@ -1,93 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) - -(* - Command line handling. -*) -module; - -import cstdlib, cstring, common; - -type - CommandLine* = record - input: ^Char; - output: ^Char; - lex: Bool; - parse: Bool - end; - -proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine; -var - parameter: ^Char; - i: Int; - result: ^CommandLine; - parsed: Bool; -begin - i := 1; - result := cast(malloc(#size(CommandLine)): ^CommandLine); - result^.lex := false; - result^.parse := false; - result^.input := nil; - result^.output := nil; - - while i < argc & result <> nil do - parameter := (argv + i)^; - parsed := false; - - if strcmp(parameter, "--lex\0".ptr) = 0 then - parsed := true; - result^.lex := true - end; - if strcmp(parameter, "--parse\0".ptr) = 0 then - parsed := true; - result^.parse := true - end; - if strcmp(parameter, "-o\0".ptr) = 0 then - i := i + 1; - - if i = argc then - write_s("Fatal error: expecting a file name following -o."); - result := nil - end; - if i < argc then - parameter := (argv + i)^; - result^.output := parameter - end; - parsed := true - end; - if (parameter^ <> '-') & ~parsed then - parsed := true; - - if result^.input <> nil then - write_s("Fatal error: only one source file can be compiled at once. First given \""); - write_z(result^.input); - write_s("\", then \""); - write_z(parameter); - write_s("\".\n"); - result := nil - end; - if result <> nil then - result^.input := parameter - end - end; - if ~parsed then - write_s("Fatal error: unknown command line options: "); - - write_z(parameter); - write_s(".\n"); - - result := nil - end; - - i := i + 1 - end; - if result <> nil & result^.input = nil then - write_s("Fatal error: no input files.\n"); - result := nil - end; - - return result -end; - -end. diff --git a/source/common.elna b/source/common.elna deleted file mode 100644 index e7b30ca..0000000 --- a/source/common.elna +++ /dev/null @@ -1,72 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -import cstring, cstdio; - -type - Identifier = [256]Char; - TextLocation* = record - line: Word; - column: Word - end; - -proc write*(fd: Int, buf: Pointer, Word: Int) -> Int; extern; - -proc write_s*(value: String); -begin - (* fwrite(cast(value.ptr: Pointer), value.length, 1u, stdout) *) - write(1, cast(value.ptr: Pointer), cast(value.length: Int)) -end; - -proc write_z*(value: ^Char); -begin - write(1, cast(value: Pointer), cast(strlen(value): Int)) -end; - -proc write_b*(value: Bool); -begin - if value then - write_s("true") - else - write_s("false") - end -end; - -proc write_c*(value: Char); -begin - putchar(cast(value: Int)); - fflush(nil) -end; - -proc write_i*(value: Int); -var - digit: Int; - n: Word; - buffer: [10]Char; -begin - n := 10u; - - if value = 0 then - write_c('0') - end; - while value <> 0 do - digit := value % 10; - value := value / 10; - - buffer[n] := cast(cast('0': Int) + digit: Char); - n := n - 1u - end; - while n < 10u do - n := n + 1u; - write_c(buffer[n]) - end -end; - -proc write_u*(value: Word); -begin - write_i(cast(value: Int)) -end; - -end. diff --git a/source/cstdio.elna b/source/cstdio.elna deleted file mode 100644 index c7507ff..0000000 --- a/source/cstdio.elna +++ /dev/null @@ -1,29 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -type - FILE* = record end; - -var - stdin*: ^FILE := extern; - stdout*: ^FILE := extern; - stderr*: ^FILE := extern; - -proc fopen*(pathname: ^Char, mode: ^Char) -> ^FILE; extern; -proc fclose*(stream: ^FILE) -> Int; extern; -proc fseek*(stream: ^FILE, off: Int, whence: Int) -> Int; extern; -proc rewind*(stream: ^FILE); extern; -proc ftell*(stream: ^FILE) -> Int; extern; -proc fflush*(stream: ^FILE) -> Int; extern; - -proc fread*(ptr: Pointer, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern; -proc fwrite*(ptr: Pointer, size: Word, nitems: Word, stream: ^FILE) -> Word; extern; - -proc perror(s: ^Char); extern; - -proc puts(s: ^Char) -> Int; extern; -proc putchar(c: Int) -> Int; extern; - -end. diff --git a/source/cstdlib.elna b/source/cstdlib.elna deleted file mode 100644 index da2029c..0000000 --- a/source/cstdlib.elna +++ /dev/null @@ -1,15 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -proc malloc(size: Word) -> Pointer; extern; -proc free(ptr: Pointer); extern; -proc calloc(nmemb: Word, size: Word) -> Pointer; extern; -proc realloc(ptr: Pointer, size: Word) -> Pointer; extern; - -proc atoi(str: ^Char) -> Int; extern; - -proc exit(code: Int) -> !; extern; - -end. diff --git a/source/cstring.elna b/source/cstring.elna deleted file mode 100644 index 24d852a..0000000 --- a/source/cstring.elna +++ /dev/null @@ -1,15 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -module; - -proc memset(ptr: Pointer, c: Int, n: Word) -> ^Char; extern; -proc memcpy(dst: Pointer, src: Pointer, n: Word); extern; - -proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern; -proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern; -proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern; -proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern; -proc strlen(ptr: ^Char) -> Word; extern; - -end. diff --git a/source/main.elna b/source/main.elna deleted file mode 100644 index dae045b..0000000 --- a/source/main.elna +++ /dev/null @@ -1,841 +0,0 @@ -(* This Source Code Form is subject to the terms of the Mozilla Public License, - v. 2.0. If a copy of the MPL was not distributed with this file, You can - obtain one at https://mozilla.org/MPL/2.0/. *) -program; - -import cstdio, cctype, common, command_line_interface, lexer; - -type - SourceFile* = record - buffer: [1024]Char; - handle: ^FILE; - size: Word; - index: Word - end; - StringBuffer* = record - data: Pointer; - size: Word; - capacity: Word - end; - SourceCode = record - position: TextLocation; - - input: Pointer; - empty: proc(Pointer) -> Bool; - advance: proc(Pointer); - head: proc(Pointer) -> Char - end; - Token* = record - kind: LexerKind; - value: union - int_value: Int; - string: String; - boolean_value: Bool; - char_value: Char - end - end; - Tokenizer* = record - length: Word; - data: ^Token - end; - -(* - Standard procedures. -*) -proc reallocarray(ptr: Pointer, n: Word, size: Word) -> Pointer; - return realloc(ptr, n * size) -end; - -proc substring(string: String, start: Word, count: Word) -> String; - return String(string.ptr + start, count) -end; - -proc open_substring(string: String, start: Word) -> String; - return substring(string, start, string.length - start) -end; - -proc string_dup(origin: String) -> String; -var - copy: ^Char; -begin - copy := cast(malloc(origin.length): ^Char); - strncpy(copy, origin.ptr, origin.length); - - return String(copy, origin.length) -end; - -proc string_buffer_new() -> StringBuffer; -var - result: StringBuffer; -begin - result.capacity := 64u; - result.data := malloc(result.capacity); - result.size := 0u; - - return result -end; - -proc string_buffer_push(buffer: ^StringBuffer, char: Char); -begin - if buffer^.size >= buffer^.capacity then - buffer^.capacity := buffer^.capacity + 1024u; - buffer^.data := realloc(buffer^.data, buffer^.capacity) - end; - cast(buffer^.data + buffer^.size: ^Char)^ := cast(char: Char); - buffer^.size := buffer^.size + 1u -end; - -proc string_buffer_pop(buffer: ^StringBuffer, count: Word); -begin - buffer^.size := buffer^.size - count -end; - -proc string_buffer_clear(buffer: ^StringBuffer) -> String; -var - result: String; -begin - result := String(cast(buffer^.data: ^Char), buffer^.size); - buffer^.size := 0u; - return result -end; - -(* - Source code stream procedures. -*) - -proc read_source(filename: ^Char) -> ^SourceFile; -var - result: ^SourceFile; - file_handle: ^FILE; -begin - file_handle := fopen(filename, "rb\0".ptr); - - if file_handle <> nil then - result := cast(malloc(#size(SourceFile)): ^SourceFile); - result^.handle := file_handle; - result^.size := 0u; - result^.index := 1u - end; - return result -end; - -proc source_file_empty(source_input: Pointer) -> Bool; -var - source_file: ^SourceFile; -begin - source_file := cast(source_input: ^SourceFile); - - if source_file^.index > source_file^.size then - source_file^.size := fread(cast(@source_file^.buffer: Pointer), 1u, 1024u, source_file^.handle); - source_file^.index := 1u - end; - - return source_file^.size = 0u -end; - -proc source_file_head(source_input: Pointer) -> Char; -var - source_file: ^SourceFile; -begin - source_file := cast(source_input: ^SourceFile); - - return source_file^.buffer[source_file^.index] -end; - -proc source_file_advance(source_input: Pointer); -var - source_file: ^SourceFile; -begin - source_file := cast(source_input: ^SourceFile); - - source_file^.index := source_file^.index + 1u -end; - -proc source_code_empty(source_code: ^SourceCode) -> Bool; - return source_code^.empty(source_code^.input) -end; - -proc source_code_head(source_code: SourceCode) -> Char; - return source_code.head(source_code.input) -end; - -proc source_code_advance(source_code: ^SourceCode); -begin - source_code^.advance(source_code^.input); - source_code^.position.column := source_code^.position.column -end; - -proc source_code_break(source_code: ^SourceCode); -begin - source_code^.position.line := source_code^.position.line + 1u; - source_code^.position.column := 0u -end; - -proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool; - return ~source_code_empty(source_code) & source_code_head(source_code^) = expected -end; - -(* - Token procedures. -*) - -proc lexer_escape(escape: Char, result: ^Char) -> Bool; -var - successful: Bool; -begin - case escape of - 'n': - result^ := '\n'; - successful := true - | 'a': - result^ := '\a'; - successful := true - | 'b': - result^ := '\b'; - successful := true - | 't': - result^ := '\t'; - successful := true - | 'f': - result^ := '\f'; - successful := true - | 'r': - result^ := '\r'; - successful := true - | 'v': - result^ := '\v'; - successful := true - | '\\': - result^ := '\\'; - successful := true - | '\'': - result^ := '\''; - successful := true - | '"': - result^ := '"'; - successful := true - | '?': - result^ := '\?'; - successful := true - | '0': - result^ := '\0'; - successful := true - else - successful := false - end; - return successful -end; - -(* Skip spaces. *) -proc lexer_spaces(source_code: ^SourceCode); -var - current: Char; -begin - while ~source_code_empty(source_code) & isspace(cast(source_code_head(source_code^): Int)) <> 0 do - current := source_code_head(source_code^); - - if current = '\n' then - source_code_break(source_code) - end; - source_code_advance(source_code) - end -end; - -(* Checker whether the character is allowed in an identificator. *) -proc lexer_is_ident(char: Char) -> Bool; - return isalnum(cast(char: Int)) <> 0 or char = '_' -end; - -proc lexer_identifier(source_code: ^SourceCode, token_content: ^StringBuffer); -var - content_length: Word; -begin - while ~source_code_empty(source_code) & lexer_is_ident(source_code_head(source_code^)) do - string_buffer_push(token_content, source_code_head(source_code^)); - source_code_advance(source_code) - end -end; - -proc lexer_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; -var - trailing: Word; -begin - trailing := 0u; - - while ~source_code_empty(source_code) & trailing < 2u do - if source_code_head(source_code^) = '*' then - string_buffer_push(token_content, '*'); - trailing := 1u - elsif source_code_head(source_code^) = ')' & trailing = 1u then - string_buffer_pop(token_content, 1u); - trailing := 2u - else - string_buffer_push(token_content, source_code_head(source_code^)); - trailing := 0u - end; - source_code_advance(source_code) - end; - - return trailing = 2u -end; - -proc lexer_character(source_code: ^SourceCode, token_content: ^Char) -> Bool; -var - successful: Bool; -begin - successful := ~source_code_empty(source_code); - - if successful then - if source_code_head(source_code^) = '\\' then - source_code_advance(source_code); - - successful := ~source_code_empty(source_code) & lexer_escape(source_code_head(source_code^), token_content) - else - token_content^ := source_code_head(source_code^); - successful := true - end - end; - if successful then - source_code_advance(source_code) - end; - return successful -end; - -proc lexer_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; -var - token_end, constructed_string: ^Char; - token_length: Word; - is_valid: Bool := true; - next_char: Char; -begin - while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do - is_valid := lexer_character(source_code, @next_char); - - if is_valid then - string_buffer_push(token_content, next_char) - end - end; - - if is_valid & source_code_expect(source_code, '"') then - source_code_advance(source_code) - else - is_valid := false - end; - return is_valid -end; - -proc lexer_number(source_code: ^SourceCode, token_content: ^Int); -begin - token_content^ := 0; - - while ~source_code_empty(source_code) & isdigit(cast(source_code_head(source_code^): Int)) <> 0 do - token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int)); - - source_code_advance(source_code) - end -end; - -(* Categorize an identifier. *) -proc lexer_categorize(token_content: String) -> Token; -var - current_token: Token; -begin - if token_content = "if" then - current_token.kind := LexerKind._if - elsif token_content = "then" then - current_token.kind := LexerKind._then - elsif token_content = "else" then - current_token.kind := LexerKind._else - elsif token_content = "elsif" then - current_token.kind := LexerKind._elsif - elsif token_content = "while" then - current_token.kind := LexerKind._while - elsif token_content = "do" then - current_token.kind := LexerKind._do - elsif token_content = "proc" then - current_token.kind := LexerKind._proc - elsif token_content = "begin" then - current_token.kind := LexerKind._begin - elsif token_content = "end" then - current_token.kind := LexerKind._end - elsif token_content = "extern" then - current_token.kind := LexerKind._extern - elsif token_content = "const" then - current_token.kind := LexerKind._const - elsif token_content = "var" then - current_token.kind := LexerKind._var - elsif token_content = "case" then - current_token.kind := LexerKind._case - elsif token_content = "of" then - current_token.kind := LexerKind._of - elsif token_content = "type" then - current_token.kind := LexerKind._type - elsif token_content = "record" then - current_token.kind := LexerKind._record - elsif token_content = "union" then - current_token.kind := LexerKind._union - elsif token_content = "true" then - current_token.kind := LexerKind.boolean; - current_token.value.boolean_value := true - elsif token_content = "false" then - current_token.kind := LexerKind.boolean; - current_token.value.boolean_value := false - elsif token_content = "nil" then - current_token.kind := LexerKind.null - elsif token_content = "or" then - current_token.kind := LexerKind._or - elsif token_content = "return" then - current_token.kind := LexerKind._return - elsif token_content = "cast" then - current_token.kind := LexerKind._cast - elsif token_content = "defer" then - current_token.kind := LexerKind._defer - elsif token_content = "program" then - current_token.kind := LexerKind._program - elsif token_content = "module" then - current_token.kind := LexerKind._module - elsif token_content = "import" then - current_token.kind := LexerKind._import - else - current_token.kind := LexerKind.identifier; - current_token.value.string := string_dup(token_content) - end; - - return current_token -end; - -proc lexer_add_token(lexer: ^Tokenizer, token: Token); -var - new_length: Word; -begin - new_length := lexer^.length + 1u; - lexer^.data := cast(reallocarray(cast(lexer^.data: Pointer), new_length, #size(Token)): ^Token); - (lexer^.data + lexer^.length)^ := token; - lexer^.length := new_length -end; - -(* Read the next token from the input. *) -proc lexer_next(source_code: SourceCode, token_buffer: ^StringBuffer) -> Token; -var - current_token: Token; - first_char: Char; -begin - current_token.kind := LexerKind.unknown; - - first_char := source_code_head(source_code); - - if isalpha(cast(first_char: Int)) <> 0 or first_char = '_' then - lexer_identifier(@source_code, token_buffer); - current_token := lexer_categorize(string_buffer_clear(token_buffer)) - elsif first_char = '#' then - source_code_advance(@source_code); - lexer_identifier(@source_code, token_buffer); - - current_token.kind := LexerKind.trait; - current_token.value.string := string_dup(string_buffer_clear(token_buffer)) - elsif isdigit(cast(first_char: Int)) <> 0 then - lexer_number(@source_code, @current_token.value.int_value); - - if source_code_expect(@source_code, 'u') then - current_token.kind := LexerKind.word; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.integer - end - elsif first_char = '(' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.left_paren - elsif source_code_head(source_code) = '*' then - source_code_advance(@source_code); - - if lexer_comment(@source_code, token_buffer) then - current_token.value.string := string_dup(string_buffer_clear(token_buffer)); - current_token.kind := LexerKind.comment - else - current_token.kind := LexerKind.unknown - end - else - current_token.kind := LexerKind.left_paren - end - elsif first_char = ')' then - current_token.kind := LexerKind.right_paren; - source_code_advance(@source_code) - elsif first_char = '\'' then - source_code_advance(@source_code); - - if lexer_character(@source_code, @current_token.value.char_value) & source_code_expect(@source_code, '\'') then - current_token.kind := LexerKind.character; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.unknown - end - elsif first_char = '"' then - source_code_advance(@source_code); - - if lexer_string(@source_code, token_buffer) then - current_token.kind := LexerKind.string; - current_token.value.string := string_dup(string_buffer_clear(token_buffer)) - else - current_token.kind := LexerKind.unknown - end - elsif first_char = '[' then - current_token.kind := LexerKind.left_square; - source_code_advance(@source_code) - elsif first_char = ']' then - current_token.kind := LexerKind.right_square; - source_code_advance(@source_code) - elsif first_char = '>' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.greater_than - elsif source_code_head(source_code) = '=' then - current_token.kind := LexerKind.greater_equal; - source_code_advance(@source_code) - elsif source_code_head(source_code) = '>' then - current_token.kind := LexerKind.shift_right; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.greater_than - end - elsif first_char = '<' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.less_than - elsif source_code_head(source_code) = '=' then - current_token.kind := LexerKind.less_equal; - source_code_advance(@source_code) - elsif source_code_head(source_code) = '<' then - current_token.kind := LexerKind.shift_left; - source_code_advance(@source_code) - elsif source_code_head(source_code) = '>' then - current_token.kind := LexerKind.not_equal; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.less_than - end - elsif first_char = '=' then - current_token.kind := LexerKind.equal; - source_code_advance(@source_code) - elsif first_char = ';' then - current_token.kind := LexerKind.semicolon; - source_code_advance(@source_code) - elsif first_char = '.' then - current_token.kind := LexerKind.dot; - source_code_advance(@source_code) - elsif first_char = ',' then - current_token.kind := LexerKind.comma; - source_code_advance(@source_code) - elsif first_char = '+' then - current_token.kind := LexerKind.plus; - source_code_advance(@source_code) - elsif first_char = '-' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.minus - elsif source_code_head(source_code) = '>' then - current_token.kind := LexerKind.arrow; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.minus - end - elsif first_char = '*' then - current_token.kind := LexerKind.multiplication; - source_code_advance(@source_code) - elsif first_char = '/' then - current_token.kind := LexerKind.division; - source_code_advance(@source_code) - elsif first_char = '%' then - current_token.kind := LexerKind.remainder; - source_code_advance(@source_code) - elsif first_char = ':' then - source_code_advance(@source_code); - - if source_code_empty(@source_code) then - current_token.kind := LexerKind.colon - elsif source_code_head(source_code) = '=' then - current_token.kind := LexerKind.assignment; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.colon - end - elsif first_char = '^' then - current_token.kind := LexerKind.hat; - source_code_advance(@source_code) - elsif first_char = '@' then - current_token.kind := LexerKind.at; - source_code_advance(@source_code) - elsif first_char = '!' then - current_token.kind := LexerKind.exclamation; - source_code_advance(@source_code) - elsif first_char = '&' then - current_token.kind := LexerKind.and; - source_code_advance(@source_code) - elsif first_char = '~' then - current_token.kind := LexerKind.not; - source_code_advance(@source_code) - elsif first_char = '|' then - current_token.kind := LexerKind.pipe; - source_code_advance(@source_code) - else - current_token.kind := LexerKind.unknown; - source_code_advance(@source_code) - end; - - return current_token -end; - -(* Split the source text into tokens. *) -proc lexer_text(source_code: SourceCode) -> Tokenizer; -var - current_token: Token; - token_buffer: StringBuffer; - lexer: Tokenizer; -begin - lexer := Tokenizer(0u, nil); - token_buffer := string_buffer_new(); - - lexer_spaces(@source_code); - - while ~source_code_empty(@source_code) do - current_token := lexer_next(source_code, @token_buffer); - - if current_token.kind <> LexerKind.unknown then - lexer_add_token(@lexer, current_token); - lexer_spaces(@source_code) - else - write_s("Lexical analysis error on \""); - write_c(source_code_head(source_code)); - write_s("\".\n") - end - end; - - return lexer -end; - -(* - Parser. -*) - -proc parse(tokens: ^Token, tokens_size: Word); -var - current_token: ^Token; - i: Word := 0u; -begin - while i < tokens_size do - current_token := tokens + i; - - case current_token^.kind of - LexerKind._if: - write_s("IF") - | LexerKind._then: - write_s("THEN") - | LexerKind._else: - write_s("ELSE") - | LexerKind._elsif: - write_s("ELSIF") - | LexerKind._while: - write_s("WHILE") - | LexerKind._do: - write_s("DO") - | LexerKind._proc: - write_s("PROC") - | LexerKind._begin: - write_s("BEGIN") - | LexerKind._end: - write_s("END") - | LexerKind._extern: - write_s("EXTERN") - | LexerKind._const: - write_s("CONST") - | LexerKind._var: - write_s("VAR") - | LexerKind._case: - write_s("CASE") - | LexerKind._of: - write_s("OF") - | LexerKind._type: - write_s("TYPE") - | LexerKind._record: - write_s("RECORD") - | LexerKind._union: - write_s("UNION") - | LexerKind.pipe: - write_s("|") - | LexerKind.to: - write_s("TO") - | LexerKind.boolean: - write_s("BOOLEAN<"); - write_b(current_token^.value.boolean_value); - write_c('>') - | LexerKind.null: - write_s("NIL") - | LexerKind.and: - write_s("&") - | LexerKind._or: - write_s("OR") - | LexerKind.not: - write_s("~") - | LexerKind._return: - write_s("RETURN") - | LexerKind._cast: - write_s("CAST") - | LexerKind.shift_left: - write_s("<<") - | LexerKind.shift_right: - write_s(">>") - | LexerKind.identifier: - write_c('<'); - write_s(current_token^.value.string); - write_c('>') - | LexerKind.trait: - write_c('#'); - write_s(current_token^.value.string) - | LexerKind.left_paren: - write_s("(") - | LexerKind.right_paren: - write_s(")") - | LexerKind.left_square: - write_s("[") - | LexerKind.right_square: - write_s("]") - | LexerKind.greater_equal: - write_s(">=") - | LexerKind.less_equal: - write_s("<=") - | LexerKind.greater_than: - write_s(">") - | LexerKind.less_than: - write_s("<") - | LexerKind.equal: - write_s("=") - | LexerKind.not_equal: - write_s("<>") - | LexerKind.semicolon: - write_c(';') - | LexerKind.dot: - write_c('.') - | LexerKind.comma: - write_c(',') - | LexerKind.plus: - write_c('+') - | LexerKind.minus: - write_c('-') - | LexerKind.multiplication: - write_c('*') - | LexerKind.division: - write_c('/') - | LexerKind.remainder: - write_c('%') - | LexerKind.assignment: - write_s(":=") - | LexerKind.colon: - write_c(':') - | LexerKind.hat: - write_c('^') - | LexerKind.at: - write_c('@') - | LexerKind.comment: - write_s("(* COMMENT *)") - | LexerKind.integer: - write_c('<'); - write_i(current_token^.value.int_value); - write_c('>') - | LexerKind.word: - write_c('<'); - write_i(current_token^.value.int_value); - write_s("u>") - | LexerKind.character: - write_c('<'); - write_i(cast(current_token^.value.char_value: Int)); - write_s("c>") - | LexerKind.string: - write_s("\"...\"") - | LexerKind._defer: - write_s("DEFER") - | LexerKind.exclamation: - write_c('!') - | LexerKind.arrow: - write_s("->") - | LexerKind._program: - write_s("PROGRAM") - | LexerKind._module: - write_s("MODULE") - | LexerKind._import: - write_s("IMPORT") - else - write_s("UNKNOWN<"); - write_i(cast(current_token^.kind: Int)); - write_c('>') - end; - write_c(' '); - - i := i + 1u - end; - write_c('\n') -end; - -(* - Compilation entry. -*) - -proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; -var - return_code: Int := 0; - lexer: Tokenizer; -begin - if command_line^.lex or command_line^.parse then - lexer := lexer_text(source_code) - end; - if command_line^.parse then - parse(lexer.data, lexer.length) - end; - - return return_code -end; - -proc process(argc: Int, argv: ^^Char) -> Int; -var - tokens: ^Token; - tokens_size: Word; - source_code: SourceCode; - command_line: ^CommandLine; - return_code: Int := 0; - source_file: ^SourceFile; -begin - command_line := parse_command_line(argc, argv); - if command_line = nil then - return_code := 2 - end; - - if return_code = 0 then - source_file := read_source(command_line^.input); - - if source_file = nil then - perror(command_line^.input); - return_code := 3 - end - end; - - if return_code = 0 then - defer - fclose(source_file^.handle) - end; - - source_code.position := TextLocation(1u, 1u); - source_code.input := cast(source_file: Pointer); - source_code.empty := source_file_empty; - source_code.head := source_file_head; - source_code.advance := source_file_advance; - - return_code := compile_in_stages(command_line, source_code) - end; - return return_code -end; - - return process(count, parameters) -end.