From f091344cce6061c5f02406ab420b32aba5f7ee62 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Tue, 25 Feb 2025 23:39:31 +0100 Subject: [PATCH] Replace type expression with traits --- boot/ast.cc | 101 +++---- boot/lexer.ll | 3 + boot/parser.yy | 18 +- gcc/elna-generic.cc | 142 +++++----- include/elna/boot/ast.h | 73 ++---- include/elna/gcc/elna-generic.h | 7 +- source.elna | 452 +++++++++++++++++++------------- 7 files changed, 423 insertions(+), 373 deletions(-) diff --git a/boot/ast.cc b/boot/ast.cc index 8358abb..c565bda 100644 --- a/boot/ast.cc +++ b/boot/ast.cc @@ -44,30 +44,25 @@ namespace boot definition->body().accept(this); } - void empty_visitor::visit(call_expression *expression) + void empty_visitor::visit(procedure_call *call) { - for (struct expression *const argument : expression->arguments) + for (expression *const argument : call->arguments) { argument->accept(this); } } + void empty_visitor::visit(traits_expression *trait) + { + trait->type().accept(this); + } + void empty_visitor::visit(cast_expression *expression) { expression->target().accept(this); expression->value().accept(this); } - void empty_visitor::visit(type_expression *expression) - { - expression->body().accept(this); - } - - void empty_visitor::visit(call_statement *statement) - { - statement->body().accept(this); - } - void empty_visitor::visit(assign_statement *statement) { statement->rvalue().accept(this); @@ -249,13 +244,11 @@ namespace boot return this->source_position; } - statement::statement(const struct position position) - : node(position) + statement::statement() { } - expression::expression(const struct position position) - : node(position) + expression::expression() { } @@ -539,13 +532,12 @@ namespace boot } } - literal::literal(const struct position position) - : expression(position) + literal::literal() { } defer_statement::defer_statement(const struct position position) - : statement(position) + : node(position) { } @@ -562,13 +554,12 @@ namespace boot } } - designator_expression::designator_expression(const struct position position) - : expression(position) + designator_expression::designator_expression() { } variable_expression::variable_expression(const struct position position, const std::string& name) - : designator_expression(position), m_name(name) + : node(position), name(name) { } @@ -577,11 +568,6 @@ namespace boot visitor->visit(this); } - const std::string& variable_expression::name() const - { - return m_name; - } - variable_expression *variable_expression::is_variable() { return this; @@ -589,7 +575,7 @@ namespace boot array_access_expression::array_access_expression(const struct position position, expression *base, expression *index) - : designator_expression(position), m_base(base), m_index(index) + : node(position), m_base(base), m_index(index) { } @@ -621,7 +607,7 @@ namespace boot field_access_expression::field_access_expression(const struct position position, expression *base, const std::string& field) - : designator_expression(position), m_base(base), m_field(field) + : node(position), m_base(base), m_field(field) { } @@ -652,7 +638,7 @@ namespace boot dereference_expression::dereference_expression(const struct position position, expression *base) - : designator_expression(position), m_base(base) + : node(position), m_base(base) { } @@ -678,7 +664,7 @@ namespace boot binary_expression::binary_expression(const struct position position, expression *lhs, expression *rhs, const binary_operator operation) - : expression(position), m_lhs(lhs), m_rhs(rhs), m_operator(operation) + : node(position), m_lhs(lhs), m_rhs(rhs), m_operator(operation) { } @@ -710,7 +696,7 @@ namespace boot unary_expression::unary_expression(const struct position position, expression *operand, const unary_operator operation) - : expression(position), m_operand(std::move(operand)), m_operator(operation) + : node(position), m_operand(std::move(operand)), m_operator(operation) { } @@ -734,22 +720,22 @@ namespace boot delete m_operand; } - call_expression::call_expression(const struct position position, designator_expression *callable) - : expression(position), m_callable(callable) + procedure_call::procedure_call(const struct position position, designator_expression *callable) + : node(position), m_callable(callable) { } - void call_expression::accept(parser_visitor *visitor) + void procedure_call::accept(parser_visitor *visitor) { visitor->visit(this); } - designator_expression& call_expression::callable() + designator_expression& procedure_call::callable() { return *m_callable; } - call_expression::~call_expression() + procedure_call::~procedure_call() { for (expression *const argument : arguments) { @@ -760,7 +746,7 @@ namespace boot cast_expression::cast_expression(const struct position position, std::shared_ptr target, expression *value) - : expression(position), m_target(target), m_value(value) + : node(position), m_target(target), m_value(value) { } @@ -784,39 +770,20 @@ namespace boot delete m_value; } - type_expression::type_expression(const struct position position, std::shared_ptr body) - : expression(position), m_body(body) + traits_expression::traits_expression(const struct position position, + const std::string& name, std::shared_ptr type) + : node(position), m_type(type), name(name) { } - void type_expression::accept(parser_visitor *visitor) + void traits_expression::accept(parser_visitor *visitor) { visitor->visit(this); } - top_type& type_expression::body() + top_type& traits_expression::type() { - return *m_body; - } - - call_statement::call_statement(const struct position position, call_expression *body) - : statement(position), m_body(body) - { - } - - void call_statement::accept(parser_visitor *visitor) - { - visitor->visit(this); - } - - call_expression& call_statement::body() - { - return *m_body; - } - - call_statement::~call_statement() - { - delete m_body; + return *m_type; } conditional_statements::conditional_statements(expression *prerequisite) @@ -839,7 +806,7 @@ namespace boot } return_statement::return_statement(const struct position position, expression *return_expression) - : statement(position), m_return_expression(return_expression) + : node(position), m_return_expression(return_expression) { } @@ -865,7 +832,7 @@ namespace boot assign_statement::assign_statement(const struct position position, designator_expression *lvalue, expression *rvalue) - : statement(position), m_lvalue(lvalue), m_rvalue(rvalue) + : node(position), m_lvalue(lvalue), m_rvalue(rvalue) { } @@ -906,7 +873,7 @@ namespace boot if_statement::if_statement(const struct position position, conditional_statements *body, std::vector *alternative) - : statement(position), m_body(body), m_alternative(alternative) + : node(position), m_body(body), m_alternative(alternative) { } @@ -936,7 +903,7 @@ namespace boot } while_statement::while_statement(const struct position position, conditional_statements *body) - : statement(position), m_body(body) + : node(position), m_body(body) { } diff --git a/boot/lexer.ll b/boot/lexer.ll index cb8837a..bb7ca06 100644 --- a/boot/lexer.ll +++ b/boot/lexer.ll @@ -131,6 +131,9 @@ defer { [A-Za-z_][A-Za-z0-9_]* { return yy::parser::make_IDENTIFIER(yytext, this->location); } +#[A-Za-z_][A-Za-z0-9_]* { + return yy::parser::make_TRAIT(yytext + 1, this->location); + } [0-9]+u { return yy::parser::make_WORD(strtoul(yytext, NULL, 10), this->location); } diff --git a/boot/parser.yy b/boot/parser.yy index 42ea4f2..bbc17fd 100644 --- a/boot/parser.yy +++ b/boot/parser.yy @@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. If not see %start program; %token IDENTIFIER +%token TRAIT %token INTEGER %token WORD %token FLOAT @@ -125,11 +126,12 @@ along with GCC; see the file COPYING3. If not see formal_parameters formal_parameter_list; %type formal_parameter %type > type_expression; +%type traits_expression; %type expression operand unary; %type > expressions actual_parameter_list; %type designator_expression; %type assign_statement; -%type call_expression; +%type call_expression; %type while_statement; %type if_statement; %type return_statement; @@ -227,7 +229,7 @@ assign_statement: designator_expression ":=" expression } call_expression: designator_expression actual_parameter_list { - $$ = new elna::boot::call_expression(elna::boot::make_position(@1), $1); + $$ = new elna::boot::procedure_call(elna::boot::make_position(@1), $1); std::swap($$->arguments, $2); } cast_expression: "cast" "(" expression ":" type_expression ")" @@ -313,10 +315,15 @@ literal: { $$ = new elna::boot::number_literal(elna::boot::make_position(@1), $1); } +traits_expression: + TRAIT "(" type_expression ")" + { + $$ = new elna::boot::traits_expression(elna::boot::make_position(@1), $1, $3); + } operand: literal { $$ = $1; } | designator_expression { $$ = $1; } - | "(" type_expression ")" { $$ = new elna::boot::type_expression(elna::boot::make_position(@1), $2); } + | traits_expression { $$ = $1; } | cast_expression { $$ = $1; } | call_expression { $$ = $1; } | "(" expression ")" { $$ = $2; } @@ -448,10 +455,7 @@ statement: | while_statement { $$ = $1; } | if_statement { $$ = $1; } | return_statement { $$ = $1; } - | call_expression - { - $$ = new elna::boot::call_statement(elna::boot::make_position(@1), $1); - } + | call_expression { $$ = $1; } | defer_statement { $$ = $1; } statements: statement statements diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc index 73dcddc..af7c532 100644 --- a/gcc/elna-generic.cc +++ b/gcc/elna-generic.cc @@ -84,11 +84,6 @@ namespace gcc list_length(TYPE_ARG_TYPES(symbol_type)) - 1, arguments.size()); this->current_expression = error_mark_node; } - else if (TREE_TYPE(symbol_type) == void_type_node) - { - append_statement(stmt); - this->current_expression = NULL_TREE; - } else { this->current_expression = stmt; @@ -136,10 +131,10 @@ namespace gcc } } - void generic_visitor::visit(boot::call_expression *expression) + void generic_visitor::visit(boot::procedure_call *call) { - location_t call_location = get_location(&expression->position()); - expression->callable().accept(this); + location_t call_location = get_location(&call->position()); + call->callable().accept(this); tree expression_type = TYPE_P(this->current_expression) ? this->current_expression @@ -147,17 +142,17 @@ namespace gcc if (TYPE_P(this->current_expression) && TREE_CODE(expression_type) == RECORD_TYPE) { - build_record_call(call_location, this->current_expression, expression->arguments); + build_record_call(call_location, this->current_expression, call->arguments); } else if (TREE_CODE(expression_type) == FUNCTION_TYPE) { this->current_expression = build1(ADDR_EXPR, build_pointer_type_for_mode(expression_type, VOIDmode, true), this->current_expression); - build_procedure_call(call_location, this->current_expression, expression->arguments); + build_procedure_call(call_location, this->current_expression, call->arguments); } else if (is_pointer_type(expression_type) && TREE_CODE(TREE_TYPE(expression_type)) == FUNCTION_TYPE) { - build_procedure_call(call_location, this->current_expression, expression->arguments); + build_procedure_call(call_location, this->current_expression, call->arguments); } else { @@ -178,11 +173,6 @@ namespace gcc cast_target, this->current_expression); } - void generic_visitor::visit(boot::type_expression *expression) - { - this->current_expression = build_type(expression->body()); - } - void generic_visitor::visit(boot::program *program) { for (boot::constant_definition *const constant : program->constants) @@ -226,10 +216,7 @@ namespace gcc DECL_ARGUMENTS(fndecl) = chainon(DECL_ARGUMENTS(fndecl), declaration_tree); parameter_type = TREE_CHAIN(parameter_type); } - for (boot::statement *const body_statement : program->body) - { - body_statement->accept(this); - } + visit_statements(program->body); tree set_result = build2(INIT_EXPR, void_type_node, DECL_RESULT(fndecl), build_int_cst_type(integer_type_node, 0)); tree return_stmt = build1(RETURN_EXPR, void_type_node, set_result); @@ -248,6 +235,19 @@ namespace gcc cgraph_node::finalize_function(fndecl, true); } + void generic_visitor::visit(boot::block *block) + { + for (boot::constant_definition *const constant : block->constants) + { + constant->accept(this); + } + for (boot::variable_declaration *const variable : block->variables) + { + variable->accept(this); + } + visit_statements(block->body); + } + void generic_visitor::visit(boot::procedure_definition *definition) { tree declaration_type = build_procedure_type(definition->heading()); @@ -903,13 +903,13 @@ namespace gcc void generic_visitor::visit(boot::variable_expression *expression) { - auto symbol = this->lookup(expression->name()); + auto symbol = this->lookup(expression->name); if (symbol == NULL_TREE) { error_at(get_location(&expression->position()), "symbol '%s' not declared in the current scope", - expression->name().c_str()); + expression->name.c_str()); this->current_expression = error_mark_node; } else @@ -964,40 +964,40 @@ namespace gcc } } + void generic_visitor::visit(boot::traits_expression *trait) + { + tree type_expression = build_type(trait->type()); + + if (trait->name == "size") + { + this->current_expression = build1(CONVERT_EXPR, elna_word_type_node, size_in_bytes(type_expression)); + } + else if (trait->name == "alignment") + { + this->current_expression = build_int_cstu(elna_word_type_node, TYPE_ALIGN_UNIT(type_expression)); + } + else if (trait->name == "min" && is_integral_type(type_expression)) + { + this->current_expression = TYPE_MIN_VALUE(type_expression); + } + else if (trait->name == "max" && is_integral_type(type_expression)) + { + this->current_expression = TYPE_MAX_VALUE(type_expression); + } + else + { + error_at(get_location(&trait->position()), "type '%s' does not have property '%s'", + print_type(type_expression).c_str(), trait->name.c_str()); + this->current_expression = error_mark_node; + } + } + void generic_visitor::visit(boot::field_access_expression *expression) { expression->base().accept(this); location_t expression_location = get_location(&expression->position()); - if (TYPE_P(this->current_expression)) - { - if (expression->field() == "size") - { - this->current_expression = build1(CONVERT_EXPR, elna_word_type_node, - size_in_bytes(this->current_expression)); - } - else if (expression->field() == "alignment") - { - this->current_expression = build_int_cstu(elna_word_type_node, - TYPE_ALIGN_UNIT(this->current_expression)); - } - else if (expression->field() == "min" && is_integral_type(this->current_expression)) - { - this->current_expression = TYPE_MIN_VALUE(this->current_expression); - } - else if (expression->field() == "max" && is_integral_type(this->current_expression)) - { - this->current_expression = TYPE_MAX_VALUE(this->current_expression); - } - else - { - error_at(expression_location, "type '%s' does not have property '%s'", - print_type(this->current_expression).c_str(), expression->field().c_str()); - this->current_expression = error_mark_node; - } - - } - else if (is_aggregate_type(TREE_TYPE(this->current_expression))) + if (is_aggregate_type(TREE_TYPE(this->current_expression))) { tree field_declaration = TYPE_FIELDS(TREE_TYPE(this->current_expression)); @@ -1026,6 +1026,12 @@ namespace gcc field_declaration, NULL_TREE); } } + else + { + error_at(expression_location, "type '%s' does not have a field named '%s'", + print_type(TREE_TYPE(this->current_expression)).c_str(), expression->field().c_str()); + this->current_expression = error_mark_node; + } } void generic_visitor::visit(boot::dereference_expression *expression) @@ -1049,7 +1055,7 @@ namespace gcc if (TREE_CODE(lvalue) == CONST_DECL) { error_at(statement_location, "cannot modify constant '%s'", - statement->lvalue().is_variable()->name().c_str()); + statement->lvalue().is_variable()->name.c_str()); this->current_expression = error_mark_node; } else if (is_assignable_from(TREE_TYPE(lvalue), rvalue)) @@ -1083,10 +1089,7 @@ namespace gcc if (statement->alternative() != nullptr) { enter_scope(); - for (const auto body_statement : *statement->alternative()) - { - body_statement->accept(this); - } + visit_statements(*statement->alternative()); tree mapping = leave_scope(); append_statement(mapping); } @@ -1120,10 +1123,7 @@ namespace gcc append_statement(then_label_expr); enter_scope(); - for (const auto body_statement : branch.statements) - { - body_statement->accept(this); - } + visit_statements(branch.statements); tree mapping = leave_scope(); append_statement(mapping); append_statement(goto_endif); @@ -1160,11 +1160,18 @@ namespace gcc this->current_expression = NULL_TREE; } - void generic_visitor::visit(boot::call_statement *statement) + void generic_visitor::visit_statements(const std::vector& statements) { - statement->body().accept(this); - append_statement(this->current_expression); - this->current_expression = NULL_TREE; + for (boot::statement *const statement : statements) + { + statement->accept(this); + + if (this->current_expression != NULL_TREE && this->current_expression != error_mark_node) + { + append_statement(this->current_expression); + this->current_expression = NULL_TREE; + } + } } void generic_visitor::visit(boot::return_statement *statement) @@ -1181,15 +1188,14 @@ namespace gcc this->current_expression); tree return_stmt = build1(RETURN_EXPR, void_type_node, set_result); append_statement(return_stmt); + + this->current_expression = NULL_TREE; } void generic_visitor::visit(boot::defer_statement *statement) { enter_scope(); - for (boot::statement *const body_statement : statement->statements) - { - body_statement->accept(this); - } + visit_statements(statement->statements); defer(leave_scope()); } } diff --git a/include/elna/boot/ast.h b/include/elna/boot/ast.h index 83f1afb..2e9902f 100644 --- a/include/elna/boot/ast.h +++ b/include/elna/boot/ast.h @@ -58,14 +58,13 @@ namespace boot class constant_definition; class procedure_definition; class type_definition; - class call_expression; + class procedure_call; class cast_expression; - class type_expression; class assign_statement; class if_statement; class while_statement; class return_statement; - class call_statement; + class traits_expression; class block; class program; class binary_expression; @@ -93,10 +92,9 @@ namespace boot virtual void visit(constant_definition *) = 0; virtual void visit(procedure_definition *) = 0; virtual void visit(type_definition *) = 0; - virtual void visit(call_expression *) = 0; + virtual void visit(procedure_call *) = 0; virtual void visit(cast_expression *) = 0; - virtual void visit(type_expression *) = 0; - virtual void visit(call_statement *) = 0; + virtual void visit(traits_expression *) = 0; virtual void visit(assign_statement *) = 0; virtual void visit(if_statement *) = 0; virtual void visit(while_statement *) = 0; @@ -134,10 +132,9 @@ namespace boot virtual void visit(constant_definition *definition) override; virtual void visit(procedure_definition *definition) override; virtual void visit(type_definition *definition) override; - virtual void visit(call_expression *expression) override; + virtual void visit(traits_expression *trait) override; + virtual void visit(procedure_call *call) override; virtual void visit(cast_expression *expression) override; - virtual void visit(type_expression *expression) override; - virtual void visit(call_statement *statement) override; virtual void visit(assign_statement *statement) override; virtual void visit(if_statement *) override; virtual void visit(while_statement *) override; @@ -189,22 +186,16 @@ namespace boot const struct position& position() const; }; - class statement : public node + class statement : public virtual node { protected: - /** - * \param position Source code position. - */ - explicit statement(const struct position position); + statement(); }; - class expression : public node + class expression : public virtual node { protected: - /** - * \param position Source code position. - */ - explicit expression(const struct position position); + expression(); }; /** @@ -331,7 +322,7 @@ namespace boot class literal : public expression { protected: - explicit literal(const struct position position); + literal(); }; /** @@ -435,31 +426,17 @@ namespace boot virtual ~cast_expression() override; }; - /** - * Type inside an expression. - */ - class type_expression : public expression + class traits_expression : public expression { - std::shared_ptr m_body; + std::shared_ptr m_type; public: - type_expression(const struct position position, std::shared_ptr body); + const std::string name; + + traits_expression(const struct position position, const std::string& name, std::shared_ptr type); virtual void accept(parser_visitor *visitor) override; - top_type& body(); - }; - - class call_statement : public statement - { - call_expression *m_body; - - public: - call_statement(const struct position position, call_expression *body); - virtual void accept(parser_visitor *visitor) override; - - call_expression& body(); - - virtual ~call_statement() override; + top_type& type(); }; /** @@ -501,19 +478,17 @@ namespace boot virtual dereference_expression *is_dereference(); protected: - designator_expression(const struct position position); + designator_expression(); }; class variable_expression : public designator_expression { - std::string m_name; - public: + const std::string name; + variable_expression(const struct position position, const std::string& name); virtual void accept(parser_visitor *visitor) override; - const std::string& name() const; - variable_expression *is_variable() override; }; @@ -570,19 +545,19 @@ namespace boot /** * Procedure call expression. */ - class call_expression : public expression + class procedure_call : public expression, public statement { designator_expression *m_callable; public: std::vector arguments; - call_expression(const struct position position, designator_expression *callable); + procedure_call(const struct position position, designator_expression *callable); virtual void accept(parser_visitor *visitor) override; designator_expression& callable(); - virtual ~call_expression() override; + virtual ~procedure_call() override; }; class assign_statement : public statement @@ -676,7 +651,7 @@ namespace boot T value; number_literal(const struct position position, const T& value) - : literal(position), value(value) + : node(position), value(value) { } diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h index 7fd4115..700ba76 100644 --- a/include/elna/gcc/elna-generic.h +++ b/include/elna/gcc/elna-generic.h @@ -60,15 +60,16 @@ namespace gcc tree procedure_address, const std::vector& arguments); void build_record_call(location_t call_location, tree symbol, const std::vector& arguments); + void visit_statements(const std::vector& statements); public: generic_visitor(std::shared_ptr symbol_table); void visit(boot::program *program) override; void visit(boot::procedure_definition *definition) override; - void visit(boot::call_expression *expression) override; + void visit(boot::procedure_call *call) override; void visit(boot::cast_expression *expression) override; - void visit(boot::type_expression *expression) override; + void visit(boot::traits_expression *trait) override; void visit(boot::number_literal *literal) override; void visit(boot::number_literal *literal) override; void visit(boot::number_literal *literal) override; @@ -85,10 +86,10 @@ namespace gcc void visit(boot::array_access_expression *expression) override; void visit(boot::field_access_expression *expression) override; void visit(boot::dereference_expression *expression) override; + void visit(boot::block *block) override; void visit(boot::assign_statement *statement) override; void visit(boot::if_statement *statement) override; void visit(boot::while_statement *statement) override; - void visit(boot::call_statement *statement) override; void visit(boot::return_statement *statement) override; void visit(boot::defer_statement *statement) override; }; diff --git a/source.elna b/source.elna index 9225a26..8c58017 100644 --- a/source.elna +++ b/source.elna @@ -72,13 +72,28 @@ type first: Position last: Position end + FILE* = record end + SourceFile* = record + buffer: [1024]Char + handle: ^FILE + size: Word + index: Word + end + StringBuffer* = record + data: ^Byte + size: Word + capacity: Word + end SourceCode = record position: Position - text: String + + input: ^Byte + empty: proc(data: ^Byte) -> Bool + advance: proc(data: ^Byte) + head: proc(data: ^Byte) -> Char end TokenValue* = union int_value: Int - string_value: ^Char string: String boolean_value: Bool char_value: Char @@ -88,7 +103,6 @@ type value: TokenValue location: Location end - FILE* = record end CommandLine* = record input: ^Char tokenize: Bool @@ -119,8 +133,6 @@ proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern proc strlen(ptr: ^Char) -> Word; extern -proc strtol(nptr: ^Char, endptr: ^^Char, base: Int) -> Int; extern - proc perror(s: ^Char); extern proc exit(code: Int) -> !; extern @@ -225,6 +237,41 @@ begin return String(copy, origin.length) end +proc string_buffer_new() -> StringBuffer; +var + result: StringBuffer +begin + result.capacity := 64u + result.data := malloc(result.capacity) + result.size := 0u + + return result +end + +proc string_buffer_push(buffer: ^StringBuffer, char: Char); +begin + if buffer^.size >= buffer^.capacity then + buffer^.capacity := buffer^.capacity + 1024u + buffer^.data := realloc(buffer^.data, buffer^.capacity) + end + (buffer^.data + buffer^.size)^ := cast(char: Byte) + buffer^.size := buffer^.size + 1u +end + +proc string_buffer_pop(buffer: ^StringBuffer, count: Word); +begin + buffer^.size := buffer^.size - count +end + +proc string_buffer_clear(buffer: ^StringBuffer) -> String; +var + result: String +begin + result := String(cast(buffer^.data: ^Char), buffer^.size) + buffer^.size := 0u + return result +end + (* End of standard procedures. *) @@ -234,36 +281,20 @@ begin return Position(1u, 1u) end -proc read_source(filename: ^Char, result: ^String) -> Bool; +proc read_source(filename: ^Char) -> ^SourceFile; var - input_file: ^FILE - source_size: Int - input: ^Byte + result: ^SourceFile + file_handle: ^FILE begin - input_file := fopen(filename, "rb\0".ptr) + file_handle := fopen(filename, "rb\0".ptr) - if input_file = nil then - return false + if file_handle <> nil then + result := cast(malloc(#size(SourceFile)): ^SourceFile) + result^.handle := file_handle + result^.size := 0u + result^.index := 1u end - defer - fclose(input_file) - end - if fseek(input_file, 0, SEEK_END) <> 0 then - return false - end - source_size := ftell(input_file) - if source_size < 0 then - return false - end - rewind(input_file) - - input := malloc(cast(source_size: Word)) - if fread(input, cast(source_size: Word), 1u, input_file) <> 1u then - return false - end - result^ := String(cast(input: ^Char), cast(source_size: Word)) - - return true + return result end proc escape_char(escape: Char, result: ^Char) -> Bool; @@ -312,118 +343,169 @@ begin return successful end -proc advance_source(source_code: SourceCode, length: Word) -> SourceCode; +proc source_file_empty(source_input: ^Byte) -> Bool; +var + source_file: ^SourceFile begin - source_code.text := open_substring(source_code.text, length) - source_code.position.column := source_code.position.column + length + source_file := cast(source_input: ^SourceFile) - return source_code + if source_file^.index > source_file^.size then + source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle) + source_file^.index := 1u + end + + return source_file^.size = 0u end -proc skip_spaces(source_code: SourceCode) -> SourceCode; +proc source_file_head(source_input: ^Byte) -> Char; +var + source_file: ^SourceFile begin - while source_code.text.length > 0u and is_space(source_code.text[1u]) do - if source_code.text[1u] = '\n' then - source_code.position.line := source_code.position.line + 1u - source_code.position.column := 1u - else - source_code.position.column := source_code.position.column + 1u + source_file := cast(source_input: ^SourceFile) + + return source_file^.buffer[source_file^.index] +end + +proc source_file_advance(source_input: ^Byte); +var + source_file: ^SourceFile +begin + source_file := cast(source_input: ^SourceFile) + + source_file^.index := source_file^.index + 1u +end + +proc source_code_empty(source_code: ^SourceCode) -> Bool; +begin + return source_code^.empty(source_code^.input) +end + +proc source_code_head(source_code: SourceCode) -> Char; +begin + return source_code.head(source_code.input) +end + +proc source_code_advance(source_code: ^SourceCode); +begin + source_code^.advance(source_code^.input) + source_code^.position.column := source_code^.position.column +end + +proc source_code_break(source_code: ^SourceCode); +begin + source_code^.position.line := source_code^.position.line + 1u + source_code^.position.column := 0u +end + +proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool; +begin + return not source_code_empty(source_code) and source_code_head(source_code^) = expected +end + +proc skip_spaces(source_code: ^SourceCode); +begin + while not source_code_empty(source_code) and is_space(source_code_head(source_code^)) do + if source_code_head(source_code^) = '\n' then + source_code_break(source_code) end - source_code.text := open_substring(source_code.text, 1u) + source_code_advance(source_code) end - return source_code end -proc lex_identifier(source_code: ^SourceCode, token_content: ^String); +proc is_ident(char: Char) -> Bool; +begin + return is_alnum(char) or char = '_' +end + +proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer); var content_length: Word begin - content_length := 0u - token_content^ := source_code^.text - - while is_alnum(source_code^.text[1u]) or source_code^.text[1u] = '_' do - content_length := content_length + 1u - source_code^ := advance_source(source_code^, 1u) + while not source_code_empty(source_code) and is_ident(source_code_head(source_code^)) do + string_buffer_push(token_content, source_code_head(source_code^)) + source_code_advance(source_code) end - token_content^ := substring(token_content^, 0u, content_length) end -proc lex_comment(source_code: ^SourceCode, token_content: ^String) -> Bool; +proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; var - content_length: Word trailing: Word begin - content_length := 0u - token_content^ := source_code^.text trailing := 0u - while source_code^.text.length > 0u and trailing < 2u do - if source_code^.text[1u] = '*' then - content_length := content_length + trailing + while not source_code_empty(source_code) and trailing < 2u do + if source_code_head(source_code^) = '*' then + string_buffer_push(token_content, '*') trailing := 1u - elsif source_code^.text[1u] = ')' and trailing = 1u then + elsif source_code_head(source_code^) = ')' and trailing = 1u then + string_buffer_pop(token_content, 1u) trailing := 2u else - content_length := content_length + trailing + 1u + string_buffer_push(token_content, source_code_head(source_code^)) trailing := 0u end - source_code^ := advance_source(source_code^, 1u) + source_code_advance(source_code) end return trailing = 2u end -proc lex_character(input: ^Char, current_token: ^Token) -> ^Char; +proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool; +var + successful: Bool begin - if input^ = '\\' then - input := input + 1 - if escape_char(input^, @current_token^.value.char_value) then - input := input + 1 - end - elsif input^ <> '\0' then - current_token^.value.char_value := input^ - input := input + 1 + successful := not source_code_empty(source_code) + + if successful then + if source_code_head(source_code^) = '\\' then + source_code_advance(source_code) + + successful := not source_code_empty(source_code) and escape_char(source_code_head(source_code^), token_content) + else + token_content^ := source_code_head(source_code^) + successful := true + end end - return input + if successful then + source_code_advance(source_code) + end + return successful end -proc lex_string(input: ^Char, current_token: ^Token) -> ^Char; +proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool; var token_end, constructed_string: ^Char token_length: Word is_valid: Bool + next_char: Char begin - token_end := input - - while token_end^ <> '\0' and not ((token_end - 1)^ <> '\\' and token_end^ = '"') do - token_end := token_end + 1 - end - if token_end^ <> '\"' then - return input - end - token_length := cast(token_end - input: Word) - current_token^.value.string_value := cast(calloc(token_length, 1u): ^Char) - is_valid := true - constructed_string := current_token^.value.string_value - while input < token_end and is_valid do - if input^ = '\\' then - input := input + 1 - if escape_char(input^, constructed_string) then - input := input + 1 - else - is_valid := false - end - elsif input^ <> '\0' then - constructed_string^ := input^ - input := input + 1 + while is_valid and not source_code_empty(source_code) and source_code_head(source_code^) <> '"' do + is_valid := lex_character(source_code, @next_char) + + if is_valid then + string_buffer_push(token_content, next_char) end - - constructed_string := constructed_string + 1 end - return token_end + if is_valid and source_code_expect(source_code, '"') then + source_code_advance(source_code) + else + is_valid := false + end + return is_valid +end + +proc lex_number(source_code: ^SourceCode, token_content: ^Int); +begin + token_content^ := 0 + + while not source_code_empty(source_code) and is_digit(source_code_head(source_code^)) do + token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int)) + + source_code_advance(source_code) + end end proc print_tokens(tokens: ^Token, tokens_size: Word); @@ -647,46 +729,43 @@ end proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token; var - token_end: ^Char tokens, current_token: ^Token - token_length: Word first_char: Char - token_content: String + token_buffer: StringBuffer begin tokens_size^ := 0u tokens := nil - source_code := skip_spaces(source_code) + token_buffer := string_buffer_new() - while source_code.text.length <> 0u do - tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, Token.size): ^Token) + skip_spaces(@source_code) + + while not source_code_empty(@source_code) do + tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, #size(Token)): ^Token) current_token := tokens + tokens_size^ - first_char := source_code.text[1u] + first_char := source_code_head(source_code) if is_alpha(first_char) or first_char = '_' then - lex_identifier(@source_code, @token_content) - current_token^ := categorize_identifier(token_content) + lex_identifier(@source_code, @token_buffer) + current_token^ := categorize_identifier(string_buffer_clear(@token_buffer)) elsif is_digit(first_char) then - token_end := nil - current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10) - token_length := cast(token_end - source_code.text.ptr: Word) + lex_number(@source_code, @current_token^.value.int_value) - if token_end^ = 'u' then + if source_code_expect(@source_code, 'u') then current_token^.kind := TOKEN_WORD - source_code := advance_source(source_code, token_length + 1u) + source_code_advance(@source_code) else current_token^.kind := TOKEN_INTEGER - source_code := advance_source(source_code, token_length) end elsif first_char = '(' then - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) - if source_code.text.length = 0u then + if source_code_empty(@source_code) then current_token^.kind := TOKEN_LEFT_PAREN - elsif source_code.text[1u] = '*' then - source_code := advance_source(source_code, 1u) + elsif source_code_head(source_code) = '*' then + source_code_advance(@source_code) - if lex_comment(@source_code, @token_content) then - current_token^.value.string := string_dup(token_content) + if lex_comment(@source_code, @token_buffer) then + current_token^.value.string := string_dup(string_buffer_clear(@token_buffer)) current_token^.kind := TOKEN_COMMENT else current_token^.kind := 0 @@ -696,125 +775,125 @@ begin end elsif first_char = ')' then current_token^.kind := TOKEN_RIGHT_PAREN - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '\'' then - token_end := lex_character(source_code.text.ptr + 1, current_token) - token_length := cast(token_end - source_code.text.ptr: Word) + source_code_advance(@source_code) - if token_end^ = '\'' then + if lex_character(@source_code, @current_token^.value.char_value) and source_code_expect(@source_code, '\'') then current_token^.kind := TOKEN_CHARACTER - source_code := advance_source(source_code, token_length + 1u) + source_code_advance(@source_code) else - source_code := advance_source(source_code, 1u) + current_token^.kind := 0 end elsif first_char = '"' then - token_end := lex_string(source_code.text.ptr + 1, current_token) - - if token_end^ = '"' then + source_code_advance(@source_code) + + if lex_string(@source_code, @token_buffer) then current_token^.kind := TOKEN_STRING - token_length := cast(token_end - source_code.text.ptr: Word) - source_code := advance_source(source_code, token_length + 1u) + current_token^.value.string := string_dup(string_buffer_clear(@token_buffer)) + else + current_token^.kind := 0 end elsif first_char = '[' then current_token^.kind := TOKEN_LEFT_SQUARE - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = ']' then current_token^.kind := TOKEN_RIGHT_SQUARE - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '>' then - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) - if source_code.text.length = 0u then + if source_code_empty(@source_code) then current_token^.kind := TOKEN_GREATER_THAN - elsif source_code.text[1u] = '=' then + elsif source_code_head(source_code) = '=' then current_token^.kind := TOKEN_GREATER_EQUAL - source_code := advance_source(source_code, 1u) - elsif source_code.text[1u] = '>' then + source_code_advance(@source_code) + elsif source_code_head(source_code) = '>' then current_token^.kind := TOKEN_SHIFT_RIGHT - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) else current_token^.kind := TOKEN_GREATER_THAN end elsif first_char = '<' then - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) - if source_code.text.length = 0u then + if source_code_empty(@source_code) then current_token^.kind := TOKEN_LESS_THAN - elsif source_code.text[1u] = '=' then + elsif source_code_head(source_code) = '=' then current_token^.kind := TOKEN_LESS_EQUAL - source_code := advance_source(source_code, 1u) - elsif source_code.text[1u] = '<' then + source_code_advance(@source_code) + elsif source_code_head(source_code) = '<' then current_token^.kind := TOKEN_SHIFT_LEFT - source_code := advance_source(source_code, 1u) - elsif source_code.text[1u] = '>' then + source_code_advance(@source_code) + elsif source_code_head(source_code) = '>' then current_token^.kind := TOKEN_NOT_EQUAL - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) else current_token^.kind := TOKEN_LESS_THAN end elsif first_char = '=' then current_token^.kind := TOKEN_EQUAL - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = ';' then current_token^.kind := TOKEN_SEMICOLON - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '.' then current_token^.kind := TOKEN_DOT - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = ',' then current_token^.kind := TOKEN_COMMA - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '+' then current_token^.kind := TOKEN_PLUS - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '-' then - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) - if source_code.text.length = 0u then + if source_code_empty(@source_code) then current_token^.kind := TOKEN_MINUS - elsif source_code.text[1u] = '>' then + elsif source_code_head(source_code) = '>' then current_token^.kind := TOKEN_ARROW - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) else current_token^.kind := TOKEN_MINUS end elsif first_char = '*' then current_token^.kind := TOKEN_MULTIPLICATION - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '/' then current_token^.kind := TOKEN_DIVISION - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '%' then current_token^.kind := TOKEN_REMAINDER - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = ':' then - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) - if source_code.text.length = 0u then + if source_code_empty(@source_code) then current_token^.kind := TOKEN_COLON - elsif source_code.text[1u] = '=' then + elsif source_code_head(source_code) = '=' then current_token^.kind := TOKEN_ASSIGNMENT - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) else current_token^.kind := TOKEN_COLON end elsif first_char = '^' then current_token^.kind := TOKEN_HAT - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '@' then current_token^.kind := TOKEN_AT - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) elsif first_char = '!' then current_token^.kind := TOKEN_EXCLAMATION - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) else current_token^.kind := 0 - source_code := advance_source(source_code, 1u) + source_code_advance(@source_code) end if current_token^.kind <> 0 then tokens_size^ := tokens_size^ + 1u - source_code := skip_spaces(source_code) + skip_spaces(@source_code) else write_s("Lexical analysis error on \"") write_c(first_char) @@ -832,7 +911,7 @@ var result: ^CommandLine begin i := 1 - result := cast(malloc(CommandLine.size): ^CommandLine) + result := cast(malloc(#size(CommandLine)): ^CommandLine) result^.tokenize := false result^.syntax_tree := false result^.input := nil @@ -872,23 +951,38 @@ var tokens_size: Word source_code: SourceCode command_line: ^CommandLine + return_code: Int begin + return_code := 0 + command_line := parse_command_line(argc, argv) if command_line = nil then - return 2 + return_code := 2 end - source_code.position := make_position() - if not read_source(command_line^.input, @source_code.text) then - perror(command_line^.input) - return 3 - end - tokens := tokenize(source_code, @tokens_size) + if return_code = 0 then + source_code.position := make_position() - if command_line^.tokenize then - print_tokens(tokens, tokens_size) + source_code.input := cast(read_source(command_line^.input): ^Byte) + source_code.empty := source_file_empty + source_code.head := source_file_head + source_code.advance := source_file_advance + + if source_code.input = nil then + perror(command_line^.input) + return_code := 3 + end end - return 0 + if return_code = 0 then + tokens := tokenize(source_code, @tokens_size) + + fclose(cast(source_code.input: ^SourceFile)^.handle) + + if command_line^.tokenize then + print_tokens(tokens, tokens_size) + end + end + return return_code end begin