From a7b0c53d23f2d19c63274ce80337058560dca4ca Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 3 Jan 2025 22:18:35 +0100 Subject: [PATCH] Add string type --- Rakefile | 11 +++ gcc/elna-diagnostic.cc | 4 + gcc/elna-generic.cc | 21 +++++- gcc/elna-tree.cc | 21 +++++- gcc/elna1.cc | 126 ++++++++++++++++++-------------- include/elna/gcc/elna-generic.h | 1 + include/elna/gcc/elna-tree.h | 15 +++- include/elna/source/ast.h | 32 ++++++-- source/ast.cc | 34 +++++++-- source/lexer.ll | 4 + source/parser.yy | 9 ++- 11 files changed, 202 insertions(+), 76 deletions(-) diff --git a/Rakefile b/Rakefile index c5128fd..9eaeb9d 100644 --- a/Rakefile +++ b/Rakefile @@ -1,3 +1,14 @@ +# MacOS: +# --- +# CC=gcc-14 CXX=g++-14 \ +# CFLAGS="-I/opt/homebrew/Cellar/flex/2.6.4_2/include" \ +# CXXFLAGS="-I/opt/homebrew/Cellar/flex/2.6.4_2/include" \ +# ../gcc-14.2.0/configure \ +# --disable-bootstrap \ +# --enable-languages=c,c++,elna \ +# --with-sysroot=/Library/Developer/CommandLineTools/SDKs/MacOSX15.2.sdk \ +# --prefix=$(realpath ../gcc-install) + task :default do sh 'make -C build' sh './build/bin/elna' diff --git a/gcc/elna-diagnostic.cc b/gcc/elna-diagnostic.cc index 8e804ab..e19eea5 100644 --- a/gcc/elna-diagnostic.cc +++ b/gcc/elna-diagnostic.cc @@ -32,6 +32,10 @@ namespace gcc { return "Char"; } + else if (is_string_type(type)) + { + return "String"; + } else { return "<>"; diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc index 407d998..07117cb 100644 --- a/gcc/elna-generic.cc +++ b/gcc/elna-generic.cc @@ -25,7 +25,7 @@ namespace gcc if (statement->arguments().size() != 1) { error_at(get_location(&statement->position()), - "procedure '%s' expects 1 argument, %i given", + "procedure '%s' expects 1 argument, %lu given", statement->name().c_str(), statement->arguments().size()); return; } @@ -46,6 +46,10 @@ namespace gcc { format_number = "%c\n"; } + else if (is_string_type(argument_type)) + { + format_number = "%s\n"; + } else { error_at(get_location(&argument->position()), @@ -83,6 +87,7 @@ namespace gcc tree main_fndecl_type = build_function_type_array(integer_type_node, 2, main_fndecl_type_param); this->main_fndecl = build_fn_decl("main", main_fndecl_type); tree resdecl = build_decl(UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, integer_type_node); + DECL_CONTEXT(resdecl) = this->main_fndecl; DECL_RESULT(this->main_fndecl) = resdecl; tree set_result = build2(INIT_EXPR, void_type_node, DECL_RESULT(main_fndecl), build_int_cst_type(integer_type_node, 0)); @@ -139,6 +144,11 @@ namespace gcc this->current_expression = build_int_cstu(elna_char_type_node, character->character()); } + void generic_visitor::visit(source::string_literal *string) + { + this->current_expression = build_string_literal(string->string().size() + 1, string->string().c_str()); + } + void generic_visitor::visit(source::binary_expression *expression) { expression->lhs().accept(this); @@ -180,6 +190,8 @@ namespace gcc operator_code = MULT_EXPR; target_type = left_type; break; + default: + break; } if (operator_code != ERROR_MARK) // An arithmetic operation. { @@ -219,6 +231,8 @@ namespace gcc operator_code = GE_EXPR; target_type = boolean_type_node; break; + default: + break; } gcc_assert(operator_code != ERROR_MARK); gcc_assert(target_type != error_mark_node); @@ -275,6 +289,10 @@ namespace gcc { declaration_type = elna_char_type_node; } + else if (declaration->type().base() == "String") + { + declaration_type = elna_string_type_node; + } else { error_at(get_location(&declaration->type().position()), @@ -286,6 +304,7 @@ namespace gcc get_identifier(declaration->identifier().c_str()), declaration_type); auto result = this->symbol_map.insert({ declaration->identifier(), declaration_tree }); + // DECL_CONTEXT(declaration_tree) = this->main_fndecl; if (result.second) { auto declaration_statement = build1_loc(declaration_location, DECL_EXPR, diff --git a/gcc/elna-tree.cc b/gcc/elna-tree.cc index 690b8e1..210a1df 100644 --- a/gcc/elna-tree.cc +++ b/gcc/elna-tree.cc @@ -4,8 +4,23 @@ tree elna_global_trees[ELNA_TI_MAX]; -void elna_init_ttree(void) +namespace elna { - elna_char_type_node = make_unsigned_type(8); - TYPE_STRING_FLAG(elna_char_type_node) = 1; +namespace gcc +{ + void init_ttree() + { + elna_char_type_node = make_unsigned_type(8); + elna_string_type_node = build_pointer_type( + build_qualified_type(char_type_node, TYPE_QUAL_CONST)); /* const char* */ + TYPE_STRING_FLAG(elna_char_type_node) = 1; + } + + bool is_string_type(tree type) + { + gcc_assert(TYPE_P(type)); + return TREE_CODE(type) == POINTER_TYPE + && TYPE_MAIN_VARIANT(TREE_TYPE(type)) == char_type_node; + } +} } diff --git a/gcc/elna1.cc b/gcc/elna1.cc index d086f3e..fdadfcf 100644 --- a/gcc/elna1.cc +++ b/gcc/elna1.cc @@ -25,14 +25,14 @@ struct GTY (()) lang_type { - char dummy; + char dummy; }; /* Language-dependent contents of a decl. */ struct GTY (()) lang_decl { - char dummy; + char dummy; }; /* Language-dependent contents of an identifier. This must include a @@ -40,7 +40,7 @@ struct GTY (()) lang_decl struct GTY (()) lang_identifier { - struct tree_identifier common; + struct tree_identifier common; }; /* The resulting tree type. */ @@ -50,15 +50,15 @@ union GTY ((desc ("TREE_CODE (&%h.generic) == IDENTIFIER_NODE"), "TS_COMMON) ? ((union lang_tree_node *) TREE_CHAIN " "(&%h.generic)) : NULL"))) lang_tree_node { - union tree_node GTY ((tag ("0"), desc ("tree_node_structure (&%h)"))) generic; - struct lang_identifier GTY ((tag ("1"))) identifier; + union tree_node GTY ((tag ("0"), desc ("tree_node_structure (&%h)"))) generic; + struct lang_identifier GTY ((tag ("1"))) identifier; }; /* We don't use language_function. */ struct GTY (()) language_function { - int dummy; + int dummy; }; /* Language hooks. */ @@ -66,9 +66,8 @@ struct GTY (()) language_function static bool elna_langhook_init(void) { build_common_tree_nodes(false); - elna_init_ttree(); + elna::gcc::init_ttree(); - /* I don't know why this has to be done explicitly. */ void_list_node = build_tree_list(NULL_TREE, void_type_node); build_common_builtin_nodes(); @@ -82,7 +81,7 @@ static void elna_parse_file(const char *filename) if (!file) { - fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename); + fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename); } elna::source::driver driver{ filename }; @@ -90,7 +89,7 @@ static void elna_parse_file(const char *filename) yy::parser parser(lexer, driver); linemap_add(line_table, LC_ENTER, 0, filename, 1); - if (auto result = parser()) + if (parser()) { for (const auto& error : driver.errors()) { @@ -110,56 +109,77 @@ static void elna_parse_file(const char *filename) static void elna_langhook_parse_file(void) { - for (int i = 0; i < num_in_fnames; i++) + for (unsigned int i = 0; i < num_in_fnames; i++) { - elna_parse_file (in_fnames[i]); + elna_parse_file(in_fnames[i]); } } -static tree -elna_langhook_type_for_mode (enum machine_mode mode, int unsignedp) +static tree elna_langhook_type_for_mode(enum machine_mode mode, int unsignedp) { - if (mode == TYPE_MODE (float_type_node)) - return float_type_node; - - if (mode == TYPE_MODE (double_type_node)) - return double_type_node; - - if (mode == TYPE_MODE (intQI_type_node)) - return unsignedp ? unsigned_intQI_type_node : intQI_type_node; - if (mode == TYPE_MODE (intHI_type_node)) - return unsignedp ? unsigned_intHI_type_node : intHI_type_node; - if (mode == TYPE_MODE (intSI_type_node)) - return unsignedp ? unsigned_intSI_type_node : intSI_type_node; - if (mode == TYPE_MODE (intDI_type_node)) - return unsignedp ? unsigned_intDI_type_node : intDI_type_node; - if (mode == TYPE_MODE (intTI_type_node)) - return unsignedp ? unsigned_intTI_type_node : intTI_type_node; - - if (mode == TYPE_MODE (integer_type_node)) - return unsignedp ? unsigned_type_node : integer_type_node; - - if (mode == TYPE_MODE (long_integer_type_node)) - return unsignedp ? long_unsigned_type_node : long_integer_type_node; - - if (mode == TYPE_MODE (long_long_integer_type_node)) - return unsignedp ? long_long_unsigned_type_node - : long_long_integer_type_node; - - if (COMPLEX_MODE_P (mode)) + if (mode == TYPE_MODE(float_type_node)) { - if (mode == TYPE_MODE (complex_float_type_node)) - return complex_float_type_node; - if (mode == TYPE_MODE (complex_double_type_node)) - return complex_double_type_node; - if (mode == TYPE_MODE (complex_long_double_type_node)) - return complex_long_double_type_node; - if (mode == TYPE_MODE (complex_integer_type_node) && !unsignedp) - return complex_integer_type_node; + return float_type_node; } - - /* gcc_unreachable */ - return NULL; + else if (mode == TYPE_MODE(double_type_node)) + { + return double_type_node; + } + if (mode == TYPE_MODE(intQI_type_node)) + { + return unsignedp ? unsigned_intQI_type_node : intQI_type_node; + } + else if (mode == TYPE_MODE(intHI_type_node)) + { + return unsignedp ? unsigned_intHI_type_node : intHI_type_node; + } + else if (mode == TYPE_MODE(intSI_type_node)) + { + return unsignedp ? unsigned_intSI_type_node : intSI_type_node; + } + else if (mode == TYPE_MODE(intDI_type_node)) + { + return unsignedp ? unsigned_intDI_type_node : intDI_type_node; + } + else if (mode == TYPE_MODE(intTI_type_node)) + { + return unsignedp ? unsigned_intTI_type_node : intTI_type_node; + } + else if (mode == TYPE_MODE(integer_type_node)) + { + return unsignedp ? unsigned_type_node : integer_type_node; + } + else if (mode == TYPE_MODE(long_integer_type_node)) + { + return unsignedp ? long_unsigned_type_node : long_integer_type_node; + } + else if (mode == TYPE_MODE(long_long_integer_type_node)) + { + return unsignedp + ? long_long_unsigned_type_node + : long_long_integer_type_node; + } + if (COMPLEX_MODE_P(mode)) + { + if (mode == TYPE_MODE(complex_float_type_node)) + { + return complex_float_type_node; + } + if (mode == TYPE_MODE(complex_double_type_node)) + { + return complex_double_type_node; + } + if (mode == TYPE_MODE(complex_long_double_type_node)) + { + return complex_long_double_type_node; + } + if (mode == TYPE_MODE(complex_integer_type_node) && !unsignedp) + { + return complex_integer_type_node; + } + } + /* gcc_unreachable */ + return nullptr; } static tree elna_langhook_type_for_size(unsigned int bits ATTRIBUTE_UNUSED, diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h index 68c6b20..6649251 100644 --- a/include/elna/gcc/elna-generic.h +++ b/include/elna/gcc/elna-generic.h @@ -31,6 +31,7 @@ namespace gcc void visit(source::number_literal *literal) override; void visit(source::number_literal *boolean) override; void visit(source::char_literal *character) override; + void visit(source::string_literal *string) override; void visit(source::binary_expression *expression) override; void visit(source::constant_definition *definition) override; void visit(source::declaration *declaration) override; diff --git a/include/elna/gcc/elna-tree.h b/include/elna/gcc/elna-tree.h index 34cc5ef..73bb402 100644 --- a/include/elna/gcc/elna-tree.h +++ b/include/elna/gcc/elna-tree.h @@ -7,12 +7,21 @@ enum elna_tree_index { - ELNA_TI_CHAR_TYPE, - ELNA_TI_MAX + ELNA_TI_CHAR_TYPE, + ELNA_TI_STRING_TYPE, + ELNA_TI_MAX }; extern GTY(()) tree elna_global_trees[ELNA_TI_MAX]; #define elna_char_type_node elna_global_trees[ELNA_TI_CHAR_TYPE] +#define elna_string_type_node elna_global_trees[ELNA_TI_STRING_TYPE] -void elna_init_ttree(void); +namespace elna +{ +namespace gcc +{ + void init_ttree(); + bool is_string_type(tree type); +} +} diff --git a/include/elna/source/ast.h b/include/elna/source/ast.h index 58687c1..ada06e7 100644 --- a/include/elna/source/ast.h +++ b/include/elna/source/ast.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include "elna/source/result.h" #include "elna/source/types.h" @@ -49,6 +51,7 @@ namespace source template class number_literal; class char_literal; + class string_literal; /** * Interface for AST visitors. @@ -73,6 +76,7 @@ namespace source virtual void visit(number_literal *) = 0; virtual void visit(number_literal *) = 0; virtual void visit(char_literal *) = 0; + virtual void visit(string_literal *) = 0; }; /** @@ -80,7 +84,7 @@ namespace source */ struct empty_visitor : parser_visitor { - virtual void visit(declaration *declaration) override; + virtual void visit(declaration *) override; virtual void visit(constant_definition *definition) override; virtual void visit(procedure_definition *definition) override; virtual void visit(call_statement *statement) override; @@ -92,12 +96,13 @@ namespace source virtual void visit(program *program) override; virtual void visit(binary_expression *expression) override; virtual void visit(unary_expression *expression) override; - virtual void visit(type_expression *variable) override; - virtual void visit(variable_expression *variable) override; - virtual void visit(number_literal *number) override; - virtual void visit(number_literal *number) override; - virtual void visit(number_literal *boolean) override; - virtual void visit(char_literal *character) override; + virtual void visit(type_expression *) override; + virtual void visit(variable_expression *) override; + virtual void visit(number_literal *) override; + virtual void visit(number_literal *) override; + virtual void visit(number_literal *) override; + virtual void visit(char_literal *) override; + virtual void visit(string_literal *) override; }; /** @@ -408,9 +413,9 @@ namespace source class block : public node { - std::unique_ptr m_body; std::vector> m_definitions; std::vector> m_declarations; + std::unique_ptr m_body; public: block(const struct position position, std::vector>&& definitions, @@ -465,6 +470,17 @@ namespace source unsigned char character() const noexcept; }; + class string_literal : public expression + { + std::string m_string; + + public: + string_literal(const struct position position, const std::string& value); + virtual void accept(parser_visitor *visitor) override; + + const std::string& string() const noexcept; + }; + class variable_expression : public expression { std::string m_name; diff --git a/source/ast.cc b/source/ast.cc index 1c9e9ae..f3979d3 100644 --- a/source/ast.cc +++ b/source/ast.cc @@ -7,7 +7,7 @@ namespace elna { namespace source { - void empty_visitor::visit(declaration *declaration) + void empty_visitor::visit(declaration *) { } @@ -87,27 +87,31 @@ namespace source expression->operand().accept(this); } - void empty_visitor::visit(type_expression *variable) + void empty_visitor::visit(type_expression *) { } - void empty_visitor::visit(variable_expression *variable) + void empty_visitor::visit(variable_expression *) { } - void empty_visitor::visit(number_literal *number) + void empty_visitor::visit(number_literal *) { } - void empty_visitor::visit(number_literal *number) + void empty_visitor::visit(number_literal *) { } - void empty_visitor::visit(number_literal *character) + void empty_visitor::visit(number_literal *) { } - void empty_visitor::visit(char_literal *character) + void empty_visitor::visit(char_literal *) + { + } + + void empty_visitor::visit(string_literal *) { } @@ -313,6 +317,21 @@ namespace source return m_character; } + string_literal::string_literal(const struct position position, const std::string& value) + : expression(position), m_string(value) + { + } + + void string_literal::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + const std::string& string_literal::string() const noexcept + { + return m_string; + } + variable_expression::variable_expression(const struct position position, const std::string& name) : expression(position), m_name(name) { @@ -550,6 +569,7 @@ namespace source case binary_operator::greater_equal: return ">="; } + __builtin_unreachable(); }; } } diff --git a/source/lexer.ll b/source/lexer.ll index b518d9a..7bbc760 100644 --- a/source/lexer.ll +++ b/source/lexer.ll @@ -80,6 +80,10 @@ false { return yy::parser::make_CHARACTER( std::string(yytext, 1, strlen(yytext) - 2), this->location); } +\"[^\"]*\" { + return yy::parser::make_STRING( + std::string(yytext, 1, strlen(yytext) - 2), this->location); + } \( { return yy::parser::make_LEFT_PAREN(this->location); } diff --git a/source/parser.yy b/source/parser.yy index 2873a4b..47b342d 100644 --- a/source/parser.yy +++ b/source/parser.yy @@ -60,6 +60,7 @@ %token INTEGER "integer" %token FLOAT "float" %token CHARACTER "character" +%token STRING "string" %token BOOLEAN %token IF WHILE DO %token CONST VAR PROCEDURE @@ -76,6 +77,7 @@ %type >> float_literal; %type >> boolean_literal; %type > character_literal; +%type > string_literal; %type > constant_definition; %type >> constant_definition_part constant_definitions; %type > variable_declaration; @@ -153,7 +155,11 @@ float_literal: FLOAT }; character_literal: CHARACTER { - $$ = std::make_unique(elna::source::make_position(@1), $1[0]); + $$ = std::make_unique(elna::source::make_position(@1), $1.at(0)); + }; +string_literal: STRING + { + $$ = std::make_unique(elna::source::make_position(@1), $1); }; boolean_literal: BOOLEAN { @@ -194,6 +200,7 @@ pointer: | float_literal { $$ = std::move($1); } | boolean_literal { $$ = std::move($1); } | character_literal { $$ = std::move($1); } + | string_literal { $$ = std::move($1); } | variable_expression { $$ = std::move($1); } | LEFT_PAREN expression RIGHT_PAREN { $$ = std::move($2); } summand: