diff --git a/config-lang.in b/config-lang.in index 5e58f8b..c544997 100644 --- a/config-lang.in +++ b/config-lang.in @@ -1,3 +1,20 @@ +# config-lang.in -- Top level configure fragment for gcc Elna frontend. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + language="elna" gcc_subdir="elna/gcc" diff --git a/gcc/Make-lang.in b/gcc/Make-lang.in index 0e96ea4..fbd620b 100644 --- a/gcc/Make-lang.in +++ b/gcc/Make-lang.in @@ -1,3 +1,20 @@ +# Make-lang.in -- Top level -*- makefile -*- fragment for the Elna frontend. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + ELNA_INSTALL_NAME := $(shell echo gelna|sed '$(program_transform_name)') ELNA_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gelna|sed '$(program_transform_name)') diff --git a/gcc/config-lang.in b/gcc/config-lang.in index 2d9f57a..3b3e9bc 100644 --- a/gcc/config-lang.in +++ b/gcc/config-lang.in @@ -1,3 +1,27 @@ +# config-lang.in -- Top level configure fragment for gcc Elna frontend. +# Copyright (C) 2025 Free Software Foundation, Inc. + +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. + +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# Configure looks for the existence of this file to auto-config each language. +# We define several parameters used by configure: +# +# language - name of language as it would appear in $(LANGUAGES) +# boot_language - "yes" if we need to build this language in stage1 +# compilers - value to add to $(COMPILERS) + language="elna" gcc_subdir="elna/gcc" diff --git a/gcc/elna-convert.cc b/gcc/elna-convert.cc index d338150..71baf47 100644 --- a/gcc/elna-convert.cc +++ b/gcc/elna-convert.cc @@ -1,3 +1,20 @@ +/* elna-convert.cc -- Data type conversion routines. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #include "config.h" #include "system.h" #include "coretypes.h" diff --git a/gcc/elna-diagnostic.cc b/gcc/elna-diagnostic.cc index c05321c..1e8a2db 100644 --- a/gcc/elna-diagnostic.cc +++ b/gcc/elna-diagnostic.cc @@ -1,3 +1,20 @@ +/* elna-diagnostic.cc -- Elna frontend specific diagnostic routines. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #include "elna/gcc/elna-diagnostic.h" #include "elna/gcc/elna-tree.h" @@ -12,7 +29,7 @@ namespace gcc return linemap_position_for_column(line_table, position->column); } - const char *print_type(tree type) + std::string print_type(tree type) { gcc_assert(TYPE_P(type)); @@ -38,7 +55,7 @@ namespace gcc } else if (is_pointer_type(type)) { - return "pointer"; + return std::string("\"pointer to " + print_type(TREE_TYPE(type)) + "\""); } else if (TREE_CODE(type) == ARRAY_TYPE) { diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc index db29edf..3cbf86d 100644 --- a/gcc/elna-generic.cc +++ b/gcc/elna-generic.cc @@ -1,3 +1,20 @@ +/* elna-generic.cc -- Visitor generating a GENERIC tree. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #include #include "elna/gcc/elna-generic.h" @@ -295,6 +312,7 @@ namespace gcc tree index_constant = build_int_cstu(index_type, string->number().size()); tree element_type = this->symbol_map->lookup("Char"); tree string_type = build_array_type(element_type, build_index_type(index_constant)); + tree string_record = this->symbol_map->lookup("String"); tree string_literal = build_string(string->number().size(), string->number().c_str()); TREE_TYPE(string_literal) = string_type; @@ -302,12 +320,11 @@ namespace gcc TREE_READONLY(string_literal) = 1; TREE_STATIC(string_literal) = 1; - string_type = build_pointer_type(element_type); + string_type = TREE_TYPE(TREE_CHAIN(TYPE_FIELDS(string_record))); string_literal = build4(ARRAY_REF, element_type, string_literal, integer_zero_node, NULL_TREE, NULL_TREE); string_literal = build1(ADDR_EXPR, string_type, string_literal); vec *elms = NULL; - tree string_record = this->symbol_map->lookup("String"); CONSTRUCTOR_APPEND_ELT(elms, TYPE_FIELDS(string_record), index_constant); CONSTRUCTOR_APPEND_ELT(elms, TREE_CHAIN(TYPE_FIELDS(string_record)), string_literal); @@ -373,11 +390,11 @@ namespace gcc } return; } - if (left_type != right_type && !are_compatible_pointers(left, right)) + if (left_type != right_type && !are_compatible_pointers(left, right) && !are_compatible_pointers(right, left)) { error_at(expression_location, "invalid operands of type %s and %s for operator %s", - print_type(left_type), print_type(right_type), + print_type(left_type).c_str(), print_type(right_type).c_str(), boot::print_binary_operator(expression->operation())); this->current_expression = error_mark_node; return; @@ -719,7 +736,7 @@ namespace gcc return; } if (TREE_TYPE(this->current_expression) == TREE_TYPE(lvalue) - || (is_pointer_type(TREE_TYPE(lvalue)) && this->current_expression == null_pointer_node)) + || are_compatible_pointers(lvalue, this->current_expression)) { tree assignment = build2_loc(statement_location, MODIFY_EXPR, void_type_node, lvalue, this->current_expression); @@ -731,8 +748,8 @@ namespace gcc { error_at(statement_location, "cannot assign value of type %s to variable of type %s", - print_type(TREE_TYPE(this->current_expression)), - print_type(TREE_TYPE(lvalue))); + print_type(TREE_TYPE(this->current_expression)).c_str(), + print_type(TREE_TYPE(lvalue)).c_str()); this->current_expression = error_mark_node; } } @@ -771,7 +788,7 @@ namespace gcc { error_at(get_location(&branch.prerequisite().position()), "expected expression of boolean type but its type is %s", - print_type(TREE_TYPE(this->current_expression))); + print_type(TREE_TYPE(this->current_expression)).c_str()); this->current_expression = error_mark_node; return; } @@ -818,7 +835,7 @@ namespace gcc { error_at(get_location(&statement->body().prerequisite().position()), "expected expression of boolean type but its type is %s", - print_type(TREE_TYPE(this->current_expression))); + print_type(TREE_TYPE(this->current_expression)).c_str()); this->current_expression = error_mark_node; return; } diff --git a/gcc/elna-spec.cc b/gcc/elna-spec.cc index d23606c..0b5749c 100644 --- a/gcc/elna-spec.cc +++ b/gcc/elna-spec.cc @@ -1,3 +1,20 @@ +/* elna-spec.c -- Specific flags and argument handling of the Elna front end. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + void lang_specific_driver (struct cl_decoded_option ** /* in_decoded_options */, unsigned int * /* in_decoded_options_count */, diff --git a/gcc/elna-tree.cc b/gcc/elna-tree.cc index 6f8a9af..e7945bf 100644 --- a/gcc/elna-tree.cc +++ b/gcc/elna-tree.cc @@ -1,3 +1,20 @@ +/* elna-tree.cc -- Utilities to manipulate GCC trees. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #include "elna/gcc/elna-tree.h" #include "elna/gcc/elna-diagnostic.h" @@ -24,8 +41,11 @@ namespace gcc bool are_compatible_pointers(tree lhs, tree rhs) { - return (lhs == null_pointer_node || rhs == null_pointer_node) - && is_pointer_type(TREE_TYPE(lhs)) && is_pointer_type(TREE_TYPE(rhs)); + tree lhs_type = TREE_TYPE(lhs); + tree rhs_type = TREE_TYPE(rhs); + + return (is_pointer_type(lhs_type) && rhs == null_pointer_node) + || (is_pointer_type(lhs_type) && lhs_type == rhs_type); } tree tree_chain_base::head() @@ -141,7 +161,7 @@ namespace gcc record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "length", initial_table->lookup("Word"))); record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "ptr", - build_pointer_type(initial_table->lookup("Char")))); + build_pointer_type_for_mode(initial_table->lookup("Char"), VOIDmode, true))); TYPE_FIELDS(string_record) = record_chain.head(); layout_type(string_record); @@ -216,7 +236,7 @@ namespace gcc { error_at(expression_location, "invalid operands of type %s and %s for operator %s", - print_type(left_type), print_type(right_type), + print_type(left_type).c_str(), print_type(right_type).c_str(), elna::boot::print_binary_operator(expression->operation())); return error_mark_node; } diff --git a/gcc/elna1.cc b/gcc/elna1.cc index e3c3487..fe5a817 100644 --- a/gcc/elna1.cc +++ b/gcc/elna1.cc @@ -1,3 +1,20 @@ +/* elna1.cc -- Language-dependent hooks for Elna. + Copyright (C) 2006-2024 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #include "config.h" #include "system.h" #include "coretypes.h" diff --git a/gcc/lang-specs.h b/gcc/lang-specs.h index e3e0e74..c9743a1 100644 --- a/gcc/lang-specs.h +++ b/gcc/lang-specs.h @@ -1,3 +1,20 @@ +/* lang-specs.h -- GCC driver specs for Elna frontend. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + /* gcc/gcc.cc */ {".elna", "@elna", nullptr, 0, 0}, {"@elna", diff --git a/include/elna/boot/ast.h b/include/elna/boot/ast.h index 97ab5ba..b151f4e 100644 --- a/include/elna/boot/ast.h +++ b/include/elna/boot/ast.h @@ -648,11 +648,11 @@ namespace boot template class number_literal : public literal { - T m_number; + T m_value; public: - number_literal(const struct position position, const T value) - : literal(position), m_number(value) + number_literal(const struct position position, const T& value) + : literal(position), m_value(value) { } @@ -663,7 +663,7 @@ namespace boot const T& number() const { - return m_number; + return m_value; } }; diff --git a/include/elna/gcc/elna-diagnostic.h b/include/elna/gcc/elna-diagnostic.h index 08c56dd..57130bf 100644 --- a/include/elna/gcc/elna-diagnostic.h +++ b/include/elna/gcc/elna-diagnostic.h @@ -1,3 +1,20 @@ +/* elna-diagnostic.h -- Elna frontend specific diagnostic routines. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #pragma once #include "config.h" @@ -14,6 +31,6 @@ namespace gcc { location_t get_location(const boot::position *position); - const char *print_type(tree type); + std::string print_type(tree type); } } diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h index 9d85ec4..caa0926 100644 --- a/include/elna/gcc/elna-generic.h +++ b/include/elna/gcc/elna-generic.h @@ -1,3 +1,20 @@ +/* elna-generic.h -- Visitor generating a GENERIC tree. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #pragma once #include "elna/boot/ast.h" diff --git a/include/elna/gcc/elna-tree.h b/include/elna/gcc/elna-tree.h index 2694376..fe0f70d 100644 --- a/include/elna/gcc/elna-tree.h +++ b/include/elna/gcc/elna-tree.h @@ -1,3 +1,20 @@ +/* elna-tree.h -- Utilities to manipulate GCC trees. + Copyright (C) 2025 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + #pragma once #include @@ -17,6 +34,12 @@ namespace gcc { bool is_pointer_type(tree type); bool is_integral_type(tree type); + + /** + * \param lhs Left hand value. + * \param rhs Right hand value. + * \return Whether rhs can be assigned to lhs. + */ bool are_compatible_pointers(tree lhs, tree rhs); class tree_chain_base diff --git a/source.elna b/source.elna index 6923fc5..4873d2b 100644 --- a/source.elna +++ b/source.elna @@ -25,9 +25,14 @@ type first: Position; last: Position end, + SourceCode = record + position: Position; + text: String + end, TokenValue* = union int_value: Int; string_value: pointer to Char; + string: String; boolean_value: Bool; char_value: Char end, @@ -179,11 +184,32 @@ begin return strncmp(this.ptr, that, length) = 0 end +proc open_substring(string: String, start: Word): String; +begin + string.ptr := string.ptr + start; + string.length := string.length - start; + return string +end + +proc char_at(string: String, position: Word): Char; +begin + return (string.ptr + position)^ +end + (* End of standard procedures. *) -proc read_source(filename: pointer to Char): pointer to Char; +proc make_position(): Position; +var + result: Position; +begin + result.line := 1u; + result.column := 1u; + return result +end + +proc read_source(filename: pointer to Char, result: pointer to String): Bool; var input_file: pointer to FILE, source_size: Int, @@ -192,26 +218,28 @@ begin input_file := fopen(filename, "rb\0".ptr); if input_file = nil then - return nil + return false + end; + defer + fclose(input_file) end; if fseek(input_file, 0, SEEK_END) <> 0 then - fclose(input_file); - return nil + return false end; source_size := ftell(input_file); if source_size < 0 then - fclose(input_file); - return nil + return false end; rewind(input_file); - input := calloc(source_size + 1, 1); + input := malloc(source_size); if fread(input, source_size, 1, input_file) <> 1u then - input := nil + return false end; - fclose(input_file); + result^.length := cast(source_size as Word); + result^.ptr := cast(input as pointer to Char); - return input + return true end proc escape_char(escape: Char, result: pointer to Char): Bool; @@ -257,12 +285,26 @@ begin end end -proc skip_spaces(input: pointer to Char): pointer to Char; +proc advance_source(source_code: SourceCode, length: Word): SourceCode; begin - while is_space(input^) do - input := input + 1 + source_code.text := open_substring(source_code.text, length); + source_code.position.column := source_code.position.column + length; + + return source_code +end + +proc skip_spaces(source_code: SourceCode): SourceCode; +begin + while source_code.text.length > 0u and is_space(char_at(source_code.text, 0)) do + if char_at(source_code.text, 0) = '\n' then + source_code.position.line := source_code.position.line + 1u; + source_code.position.column := 1u + else + source_code.position.column := source_code.position.column + 1u + end; + source_code.text := open_substring(source_code.text, 1u) end; - return input + return source_code end proc lex_identifier(input: pointer to Char): pointer to Char; @@ -273,19 +315,29 @@ begin return input end -proc lex_comment(input: pointer to Char): pointer to Char; +proc lex_comment(source_code: pointer to SourceCode, token_content: pointer to String): Bool; var - next: pointer to Char; + result: pointer to Char; begin - while input^ <> '\0' do - next := input + 1; + token_content^.ptr := source_code^.text.ptr; + token_content^.length := 0u; - if input^ = '*' and next^ = ')' then - return next + 1 + while source_code^.text.length > 1u do + if char_at(source_code^.text, 0) = '*' and char_at(source_code^.text, 1) = ')' then + source_code^ := advance_source(source_code^, 2u); + + result := cast(malloc(token_content^.length) as pointer to Char); + strncpy(result, token_content^.ptr, token_content^.length); + token_content^.ptr := result; + + return true end; - input := next + token_content^.length := token_content^.length + 1u; + source_code^ := advance_source(source_code^, 1) end; - return nil + token_content^.ptr := nil; + token_content^.length := 0u; + return false end proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char; @@ -458,7 +510,7 @@ begin elsif current_token^.kind = TOKEN_AT then write_c('@') elsif current_token^.kind = TOKEN_COMMENT then - write_s("COMMENT") + write_s("(* COMMENT *)") elsif current_token^.kind = TOKEN_INTEGER then write_c('<'); write_i(current_token^.value.int_value); @@ -562,154 +614,167 @@ begin return current_token end -proc tokenize(input_pointer: pointer to Char, tokens_size: pointer to Word): pointer to Token; +proc tokenize(source_code: SourceCode, tokens_size: pointer to Word): pointer to Token; var token_end: pointer to Char, tokens: pointer to Token, current_token: pointer to Token, - token_length: Word; + token_length: Word, + first_char: Char, + token_content: String; begin tokens_size^ := 0u; tokens := nil; + source_code := skip_spaces(source_code); - input_pointer := skip_spaces(input_pointer); - - while input_pointer^ <> '\0' do + while source_code.text.length <> 0u do tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token); current_token := tokens + tokens_size^; + first_char := char_at(source_code.text, 0); - if is_alpha(input_pointer^) or input_pointer^ = '_' then - token_end := lex_identifier(input_pointer + 1); - token_length := cast(token_end as Word) - cast(input_pointer as Word); + if is_alpha(first_char) or first_char = '_' then + token_end := lex_identifier(source_code.text.ptr + 1); + token_length := cast(token_end - source_code.text.ptr as Word); - current_token^ := categorize_identifier(input_pointer, token_length); + current_token^ := categorize_identifier(source_code.text.ptr, token_length); - input_pointer := token_end - elsif is_digit(input_pointer^) then + source_code := advance_source(source_code, token_length) + elsif is_digit(first_char) then token_end := nil; - current_token^.value.int_value := strtol(input_pointer, @token_end, 10); + current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10); + token_length := cast(token_end - source_code.text.ptr as Word); if token_end^ = 'u' then current_token^.kind := TOKEN_WORD; - input_pointer := token_end + 1 + source_code := advance_source(source_code, token_length + 1u) else current_token^.kind := TOKEN_INTEGER; - input_pointer := token_end + source_code := advance_source(source_code, token_length) end - elsif input_pointer^ = '(' then - input_pointer := input_pointer + 1; - if input_pointer^ = '*' then - token_end := lex_comment(input_pointer + 1); + elsif first_char = '(' then + source_code := advance_source(source_code, 1u); - if token_end <> nil then - token_length := cast(token_end as Word) - cast(input_pointer as Word); - current_token^.value.string_value := cast(calloc(token_length + 1u, 1) as pointer to Char); - strncpy(current_token^.value.string_value, input_pointer, token_length); - current_token^.kind := TOKEN_COMMENT; + if source_code.text.length = 0u then + current_token^.kind := TOKEN_LEFT_PAREN + elsif char_at(source_code.text, 0u) = '*' then + source_code := advance_source(source_code, 1u); - input_pointer := token_end + if lex_comment(@source_code, @token_content) then + current_token^.value.string := token_content; + current_token^.kind := TOKEN_COMMENT else current_token^.kind := 0 end else current_token^.kind := TOKEN_LEFT_PAREN end - elsif input_pointer^ = ')' then + elsif first_char = ')' then current_token^.kind := TOKEN_RIGHT_PAREN; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '\'' then - token_end := lex_character(input_pointer + 1, current_token); + source_code := advance_source(source_code, 1u) + elsif first_char = '\'' then + token_end := lex_character(source_code.text.ptr + 1, current_token); + token_length := cast(token_end - source_code.text.ptr as Word); if token_end^ = '\'' then current_token^.kind := TOKEN_CHARACTER; - input_pointer := token_end + 1 + source_code := advance_source(source_code, token_length + 1u) else - input_pointer := input_pointer + 1 + source_code := advance_source(source_code, 1u) end - elsif input_pointer^ = '"' then - token_end := lex_string(input_pointer + 1, current_token); + elsif first_char = '"' then + token_end := lex_string(source_code.text.ptr + 1, current_token); if token_end^ = '"' then current_token^.kind := TOKEN_STRING; - input_pointer := token_end + 1 + token_length := cast(token_end - source_code.text.ptr as Word); + source_code := advance_source(source_code, token_length + 1u) end - elsif input_pointer^ = '[' then + elsif first_char = '[' then current_token^.kind := TOKEN_LEFT_SQUARE; - input_pointer := input_pointer + 1 - elsif input_pointer^ = ']' then + source_code := advance_source(source_code, 1u) + elsif first_char = ']' then current_token^.kind := TOKEN_RIGHT_SQUARE; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '>' then - input_pointer := input_pointer + 1; - if input_pointer^ = '=' then + source_code := advance_source(source_code, 1u) + elsif first_char = '>' then + source_code := advance_source(source_code, 1u); + + if source_code.text.length = 0u then + current_token^.kind := TOKEN_GREATER_THAN + elsif char_at(source_code.text, 0) = '=' then current_token^.kind := TOKEN_GREATER_EQUAL; - input_pointer := input_pointer + 1 + source_code := advance_source(source_code, 1u) else current_token^.kind := TOKEN_GREATER_THAN end - elsif input_pointer^ = '<' then - input_pointer := input_pointer + 1; - if input_pointer^ = '=' then + elsif first_char = '<' then + source_code := advance_source(source_code, 1u); + + if source_code.text.length = 0u then + current_token^.kind := TOKEN_LESS_THAN + elsif char_at(source_code.text, 0) = '=' then current_token^.kind := TOKEN_LESS_EQUAL; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '>' then + source_code := advance_source(source_code, 1u) + elsif char_at(source_code.text, 0) = '>' then current_token^.kind := TOKEN_NOT_EQUAL; - input_pointer := input_pointer + 1 + source_code := advance_source(source_code, 1u) else current_token^.kind := TOKEN_LESS_THAN end - elsif input_pointer^ = '=' then + elsif first_char = '=' then current_token^.kind := TOKEN_EQUAL; - input_pointer := input_pointer + 1 - elsif input_pointer^ = ';' then + source_code := advance_source(source_code, 1u) + elsif first_char = ';' then current_token^.kind := TOKEN_SEMICOLON; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '.' then + source_code := advance_source(source_code, 1u) + elsif first_char = '.' then current_token^.kind := TOKEN_DOT; - input_pointer := input_pointer + 1 - elsif input_pointer^ = ',' then + source_code := advance_source(source_code, 1u) + elsif first_char = ',' then current_token^.kind := TOKEN_COMMA; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '+' then + source_code := advance_source(source_code, 1u) + elsif first_char = '+' then current_token^.kind := TOKEN_PLUS; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '-' then + source_code := advance_source(source_code, 1u) + elsif first_char = '-' then current_token^.kind := TOKEN_MINUS; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '*' then + source_code := advance_source(source_code, 1u) + elsif first_char = '*' then current_token^.kind := TOKEN_MULTIPLICATION; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '/' then + source_code := advance_source(source_code, 1u) + elsif first_char = '/' then current_token^.kind := TOKEN_DIVISION; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '%' then + source_code := advance_source(source_code, 1u) + elsif first_char = '%' then current_token^.kind := TOKEN_REMAINDER; - input_pointer := input_pointer + 1 - elsif input_pointer^ = ':' then - input_pointer := input_pointer + 1; - if input_pointer^ = '=' then + source_code := advance_source(source_code, 1u) + elsif first_char = ':' then + source_code := advance_source(source_code, 1u); + + if source_code.text.length = 0u then + current_token^.kind := TOKEN_COLON + elsif char_at(source_code.text, 0) = '=' then current_token^.kind := TOKEN_ASSIGNMENT; - input_pointer := input_pointer + 1 + source_code := advance_source(source_code, 1u) else current_token^.kind := TOKEN_COLON end - elsif input_pointer^ = '^' then + elsif first_char = '^' then current_token^.kind := TOKEN_HAT; - input_pointer := input_pointer + 1 - elsif input_pointer^ = '@' then + source_code := advance_source(source_code, 1u) + elsif first_char = '@' then current_token^.kind := TOKEN_AT; - input_pointer := input_pointer + 1 + source_code := advance_source(source_code, 1u) else current_token^.kind := 0; - input_pointer := input_pointer + 1 + source_code := advance_source(source_code, 1u) end; if current_token^.kind <> 0 then tokens_size^ := tokens_size^ + 1u; - input_pointer := skip_spaces(input_pointer) + source_code := skip_spaces(source_code) else write_s("Lexical analysis error on \""); - write_c(input_pointer^); + write_c(first_char); write_s("\".\n") end end; @@ -819,9 +884,9 @@ end proc process(argc: Int, argv: pointer to pointer to Char): Int; var - input: pointer to Char, tokens: pointer to Token, tokens_size: Word, + source_code: SourceCode, command_line: pointer to CommandLine; begin command_line := parse_command_line(argc, argv); @@ -829,12 +894,12 @@ begin return 2 end; - input := read_source(command_line^.input); - if input = nil then + source_code.position := make_position(); + if not read_source(command_line^.input, @source_code.text) then perror(command_line^.input); return 3 end; - tokens := tokenize(input, @tokens_size); + tokens := tokenize(source_code, @tokens_size); if command_line^.tokenize then print_tokens(tokens, tokens_size)