Make string and char pointers comparable

This commit is contained in:
Eugen Wissner 2025-02-08 23:02:27 +01:00
parent 8a0f282714
commit d88bd652a4
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
15 changed files with 425 additions and 123 deletions

View File

@ -1,3 +1,20 @@
# config-lang.in -- Top level configure fragment for gcc Elna frontend.
# Copyright (C) 2025 Free Software Foundation, Inc.
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
language="elna" language="elna"
gcc_subdir="elna/gcc" gcc_subdir="elna/gcc"

View File

@ -1,3 +1,20 @@
# Make-lang.in -- Top level -*- makefile -*- fragment for the Elna frontend.
# Copyright (C) 2025 Free Software Foundation, Inc.
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
ELNA_INSTALL_NAME := $(shell echo gelna|sed '$(program_transform_name)') ELNA_INSTALL_NAME := $(shell echo gelna|sed '$(program_transform_name)')
ELNA_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gelna|sed '$(program_transform_name)') ELNA_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gelna|sed '$(program_transform_name)')

View File

@ -1,3 +1,27 @@
# config-lang.in -- Top level configure fragment for gcc Elna frontend.
# Copyright (C) 2025 Free Software Foundation, Inc.
# GCC is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
# GCC is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
# Configure looks for the existence of this file to auto-config each language.
# We define several parameters used by configure:
#
# language - name of language as it would appear in $(LANGUAGES)
# boot_language - "yes" if we need to build this language in stage1
# compilers - value to add to $(COMPILERS)
language="elna" language="elna"
gcc_subdir="elna/gcc" gcc_subdir="elna/gcc"

View File

@ -1,3 +1,20 @@
/* elna-convert.cc -- Data type conversion routines.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h" #include "config.h"
#include "system.h" #include "system.h"
#include "coretypes.h" #include "coretypes.h"

View File

@ -1,3 +1,20 @@
/* elna-diagnostic.cc -- Elna frontend specific diagnostic routines.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "elna/gcc/elna-diagnostic.h" #include "elna/gcc/elna-diagnostic.h"
#include "elna/gcc/elna-tree.h" #include "elna/gcc/elna-tree.h"
@ -12,7 +29,7 @@ namespace gcc
return linemap_position_for_column(line_table, position->column); return linemap_position_for_column(line_table, position->column);
} }
const char *print_type(tree type) std::string print_type(tree type)
{ {
gcc_assert(TYPE_P(type)); gcc_assert(TYPE_P(type));
@ -38,7 +55,7 @@ namespace gcc
} }
else if (is_pointer_type(type)) else if (is_pointer_type(type))
{ {
return "pointer"; return std::string("\"pointer to " + print_type(TREE_TYPE(type)) + "\"");
} }
else if (TREE_CODE(type) == ARRAY_TYPE) else if (TREE_CODE(type) == ARRAY_TYPE)
{ {

View File

@ -1,3 +1,20 @@
/* elna-generic.cc -- Visitor generating a GENERIC tree.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include <array> #include <array>
#include "elna/gcc/elna-generic.h" #include "elna/gcc/elna-generic.h"
@ -295,6 +312,7 @@ namespace gcc
tree index_constant = build_int_cstu(index_type, string->number().size()); tree index_constant = build_int_cstu(index_type, string->number().size());
tree element_type = this->symbol_map->lookup("Char"); tree element_type = this->symbol_map->lookup("Char");
tree string_type = build_array_type(element_type, build_index_type(index_constant)); tree string_type = build_array_type(element_type, build_index_type(index_constant));
tree string_record = this->symbol_map->lookup("String");
tree string_literal = build_string(string->number().size(), string->number().c_str()); tree string_literal = build_string(string->number().size(), string->number().c_str());
TREE_TYPE(string_literal) = string_type; TREE_TYPE(string_literal) = string_type;
@ -302,12 +320,11 @@ namespace gcc
TREE_READONLY(string_literal) = 1; TREE_READONLY(string_literal) = 1;
TREE_STATIC(string_literal) = 1; TREE_STATIC(string_literal) = 1;
string_type = build_pointer_type(element_type); string_type = TREE_TYPE(TREE_CHAIN(TYPE_FIELDS(string_record)));
string_literal = build4(ARRAY_REF, element_type, string_literal, integer_zero_node, NULL_TREE, NULL_TREE); string_literal = build4(ARRAY_REF, element_type, string_literal, integer_zero_node, NULL_TREE, NULL_TREE);
string_literal = build1(ADDR_EXPR, string_type, string_literal); string_literal = build1(ADDR_EXPR, string_type, string_literal);
vec<constructor_elt, va_gc> *elms = NULL; vec<constructor_elt, va_gc> *elms = NULL;
tree string_record = this->symbol_map->lookup("String");
CONSTRUCTOR_APPEND_ELT(elms, TYPE_FIELDS(string_record), index_constant); CONSTRUCTOR_APPEND_ELT(elms, TYPE_FIELDS(string_record), index_constant);
CONSTRUCTOR_APPEND_ELT(elms, TREE_CHAIN(TYPE_FIELDS(string_record)), string_literal); CONSTRUCTOR_APPEND_ELT(elms, TREE_CHAIN(TYPE_FIELDS(string_record)), string_literal);
@ -373,11 +390,11 @@ namespace gcc
} }
return; return;
} }
if (left_type != right_type && !are_compatible_pointers(left, right)) if (left_type != right_type && !are_compatible_pointers(left, right) && !are_compatible_pointers(right, left))
{ {
error_at(expression_location, error_at(expression_location,
"invalid operands of type %s and %s for operator %s", "invalid operands of type %s and %s for operator %s",
print_type(left_type), print_type(right_type), print_type(left_type).c_str(), print_type(right_type).c_str(),
boot::print_binary_operator(expression->operation())); boot::print_binary_operator(expression->operation()));
this->current_expression = error_mark_node; this->current_expression = error_mark_node;
return; return;
@ -719,7 +736,7 @@ namespace gcc
return; return;
} }
if (TREE_TYPE(this->current_expression) == TREE_TYPE(lvalue) if (TREE_TYPE(this->current_expression) == TREE_TYPE(lvalue)
|| (is_pointer_type(TREE_TYPE(lvalue)) && this->current_expression == null_pointer_node)) || are_compatible_pointers(lvalue, this->current_expression))
{ {
tree assignment = build2_loc(statement_location, MODIFY_EXPR, tree assignment = build2_loc(statement_location, MODIFY_EXPR,
void_type_node, lvalue, this->current_expression); void_type_node, lvalue, this->current_expression);
@ -731,8 +748,8 @@ namespace gcc
{ {
error_at(statement_location, error_at(statement_location,
"cannot assign value of type %s to variable of type %s", "cannot assign value of type %s to variable of type %s",
print_type(TREE_TYPE(this->current_expression)), print_type(TREE_TYPE(this->current_expression)).c_str(),
print_type(TREE_TYPE(lvalue))); print_type(TREE_TYPE(lvalue)).c_str());
this->current_expression = error_mark_node; this->current_expression = error_mark_node;
} }
} }
@ -771,7 +788,7 @@ namespace gcc
{ {
error_at(get_location(&branch.prerequisite().position()), error_at(get_location(&branch.prerequisite().position()),
"expected expression of boolean type but its type is %s", "expected expression of boolean type but its type is %s",
print_type(TREE_TYPE(this->current_expression))); print_type(TREE_TYPE(this->current_expression)).c_str());
this->current_expression = error_mark_node; this->current_expression = error_mark_node;
return; return;
} }
@ -818,7 +835,7 @@ namespace gcc
{ {
error_at(get_location(&statement->body().prerequisite().position()), error_at(get_location(&statement->body().prerequisite().position()),
"expected expression of boolean type but its type is %s", "expected expression of boolean type but its type is %s",
print_type(TREE_TYPE(this->current_expression))); print_type(TREE_TYPE(this->current_expression)).c_str());
this->current_expression = error_mark_node; this->current_expression = error_mark_node;
return; return;
} }

View File

@ -1,3 +1,20 @@
/* elna-spec.c -- Specific flags and argument handling of the Elna front end.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
void void
lang_specific_driver (struct cl_decoded_option ** /* in_decoded_options */, lang_specific_driver (struct cl_decoded_option ** /* in_decoded_options */,
unsigned int * /* in_decoded_options_count */, unsigned int * /* in_decoded_options_count */,

View File

@ -1,3 +1,20 @@
/* elna-tree.cc -- Utilities to manipulate GCC trees.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "elna/gcc/elna-tree.h" #include "elna/gcc/elna-tree.h"
#include "elna/gcc/elna-diagnostic.h" #include "elna/gcc/elna-diagnostic.h"
@ -24,8 +41,11 @@ namespace gcc
bool are_compatible_pointers(tree lhs, tree rhs) bool are_compatible_pointers(tree lhs, tree rhs)
{ {
return (lhs == null_pointer_node || rhs == null_pointer_node) tree lhs_type = TREE_TYPE(lhs);
&& is_pointer_type(TREE_TYPE(lhs)) && is_pointer_type(TREE_TYPE(rhs)); tree rhs_type = TREE_TYPE(rhs);
return (is_pointer_type(lhs_type) && rhs == null_pointer_node)
|| (is_pointer_type(lhs_type) && lhs_type == rhs_type);
} }
tree tree_chain_base::head() tree tree_chain_base::head()
@ -141,7 +161,7 @@ namespace gcc
record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "length", initial_table->lookup("Word"))); record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "length", initial_table->lookup("Word")));
record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "ptr", record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "ptr",
build_pointer_type(initial_table->lookup("Char")))); build_pointer_type_for_mode(initial_table->lookup("Char"), VOIDmode, true)));
TYPE_FIELDS(string_record) = record_chain.head(); TYPE_FIELDS(string_record) = record_chain.head();
layout_type(string_record); layout_type(string_record);
@ -216,7 +236,7 @@ namespace gcc
{ {
error_at(expression_location, error_at(expression_location,
"invalid operands of type %s and %s for operator %s", "invalid operands of type %s and %s for operator %s",
print_type(left_type), print_type(right_type), print_type(left_type).c_str(), print_type(right_type).c_str(),
elna::boot::print_binary_operator(expression->operation())); elna::boot::print_binary_operator(expression->operation()));
return error_mark_node; return error_mark_node;
} }

View File

@ -1,3 +1,20 @@
/* elna1.cc -- Language-dependent hooks for Elna.
Copyright (C) 2006-2024 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h" #include "config.h"
#include "system.h" #include "system.h"
#include "coretypes.h" #include "coretypes.h"

View File

@ -1,3 +1,20 @@
/* lang-specs.h -- GCC driver specs for Elna frontend.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* gcc/gcc.cc */ /* gcc/gcc.cc */
{".elna", "@elna", nullptr, 0, 0}, {".elna", "@elna", nullptr, 0, 0},
{"@elna", {"@elna",

View File

@ -648,11 +648,11 @@ namespace boot
template<typename T> template<typename T>
class number_literal : public literal class number_literal : public literal
{ {
T m_number; T m_value;
public: public:
number_literal(const struct position position, const T value) number_literal(const struct position position, const T& value)
: literal(position), m_number(value) : literal(position), m_value(value)
{ {
} }
@ -663,7 +663,7 @@ namespace boot
const T& number() const const T& number() const
{ {
return m_number; return m_value;
} }
}; };

View File

@ -1,3 +1,20 @@
/* elna-diagnostic.h -- Elna frontend specific diagnostic routines.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#pragma once #pragma once
#include "config.h" #include "config.h"
@ -14,6 +31,6 @@ namespace gcc
{ {
location_t get_location(const boot::position *position); location_t get_location(const boot::position *position);
const char *print_type(tree type); std::string print_type(tree type);
} }
} }

View File

@ -1,3 +1,20 @@
/* elna-generic.h -- Visitor generating a GENERIC tree.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#pragma once #pragma once
#include "elna/boot/ast.h" #include "elna/boot/ast.h"

View File

@ -1,3 +1,20 @@
/* elna-tree.h -- Utilities to manipulate GCC trees.
Copyright (C) 2025 Free Software Foundation, Inc.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#pragma once #pragma once
#include <forward_list> #include <forward_list>
@ -17,6 +34,12 @@ namespace gcc
{ {
bool is_pointer_type(tree type); bool is_pointer_type(tree type);
bool is_integral_type(tree type); bool is_integral_type(tree type);
/**
* \param lhs Left hand value.
* \param rhs Right hand value.
* \return Whether rhs can be assigned to lhs.
*/
bool are_compatible_pointers(tree lhs, tree rhs); bool are_compatible_pointers(tree lhs, tree rhs);
class tree_chain_base class tree_chain_base

View File

@ -25,9 +25,14 @@ type
first: Position; first: Position;
last: Position last: Position
end, end,
SourceCode = record
position: Position;
text: String
end,
TokenValue* = union TokenValue* = union
int_value: Int; int_value: Int;
string_value: pointer to Char; string_value: pointer to Char;
string: String;
boolean_value: Bool; boolean_value: Bool;
char_value: Char char_value: Char
end, end,
@ -179,11 +184,32 @@ begin
return strncmp(this.ptr, that, length) = 0 return strncmp(this.ptr, that, length) = 0
end end
proc open_substring(string: String, start: Word): String;
begin
string.ptr := string.ptr + start;
string.length := string.length - start;
return string
end
proc char_at(string: String, position: Word): Char;
begin
return (string.ptr + position)^
end
(* (*
End of standard procedures. End of standard procedures.
*) *)
proc read_source(filename: pointer to Char): pointer to Char; proc make_position(): Position;
var
result: Position;
begin
result.line := 1u;
result.column := 1u;
return result
end
proc read_source(filename: pointer to Char, result: pointer to String): Bool;
var var
input_file: pointer to FILE, input_file: pointer to FILE,
source_size: Int, source_size: Int,
@ -192,26 +218,28 @@ begin
input_file := fopen(filename, "rb\0".ptr); input_file := fopen(filename, "rb\0".ptr);
if input_file = nil then if input_file = nil then
return nil return false
end;
defer
fclose(input_file)
end; end;
if fseek(input_file, 0, SEEK_END) <> 0 then if fseek(input_file, 0, SEEK_END) <> 0 then
fclose(input_file); return false
return nil
end; end;
source_size := ftell(input_file); source_size := ftell(input_file);
if source_size < 0 then if source_size < 0 then
fclose(input_file); return false
return nil
end; end;
rewind(input_file); rewind(input_file);
input := calloc(source_size + 1, 1); input := malloc(source_size);
if fread(input, source_size, 1, input_file) <> 1u then if fread(input, source_size, 1, input_file) <> 1u then
input := nil return false
end; end;
fclose(input_file); result^.length := cast(source_size as Word);
result^.ptr := cast(input as pointer to Char);
return input return true
end end
proc escape_char(escape: Char, result: pointer to Char): Bool; proc escape_char(escape: Char, result: pointer to Char): Bool;
@ -257,12 +285,26 @@ begin
end end
end end
proc skip_spaces(input: pointer to Char): pointer to Char; proc advance_source(source_code: SourceCode, length: Word): SourceCode;
begin begin
while is_space(input^) do source_code.text := open_substring(source_code.text, length);
input := input + 1 source_code.position.column := source_code.position.column + length;
return source_code
end
proc skip_spaces(source_code: SourceCode): SourceCode;
begin
while source_code.text.length > 0u and is_space(char_at(source_code.text, 0)) do
if char_at(source_code.text, 0) = '\n' then
source_code.position.line := source_code.position.line + 1u;
source_code.position.column := 1u
else
source_code.position.column := source_code.position.column + 1u
end; end;
return input source_code.text := open_substring(source_code.text, 1u)
end;
return source_code
end end
proc lex_identifier(input: pointer to Char): pointer to Char; proc lex_identifier(input: pointer to Char): pointer to Char;
@ -273,19 +315,29 @@ begin
return input return input
end end
proc lex_comment(input: pointer to Char): pointer to Char; proc lex_comment(source_code: pointer to SourceCode, token_content: pointer to String): Bool;
var var
next: pointer to Char; result: pointer to Char;
begin begin
while input^ <> '\0' do token_content^.ptr := source_code^.text.ptr;
next := input + 1; token_content^.length := 0u;
if input^ = '*' and next^ = ')' then while source_code^.text.length > 1u do
return next + 1 if char_at(source_code^.text, 0) = '*' and char_at(source_code^.text, 1) = ')' then
source_code^ := advance_source(source_code^, 2u);
result := cast(malloc(token_content^.length) as pointer to Char);
strncpy(result, token_content^.ptr, token_content^.length);
token_content^.ptr := result;
return true
end; end;
input := next token_content^.length := token_content^.length + 1u;
source_code^ := advance_source(source_code^, 1)
end; end;
return nil token_content^.ptr := nil;
token_content^.length := 0u;
return false
end end
proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char; proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char;
@ -458,7 +510,7 @@ begin
elsif current_token^.kind = TOKEN_AT then elsif current_token^.kind = TOKEN_AT then
write_c('@') write_c('@')
elsif current_token^.kind = TOKEN_COMMENT then elsif current_token^.kind = TOKEN_COMMENT then
write_s("COMMENT") write_s("(* COMMENT *)")
elsif current_token^.kind = TOKEN_INTEGER then elsif current_token^.kind = TOKEN_INTEGER then
write_c('<'); write_c('<');
write_i(current_token^.value.int_value); write_i(current_token^.value.int_value);
@ -562,154 +614,167 @@ begin
return current_token return current_token
end end
proc tokenize(input_pointer: pointer to Char, tokens_size: pointer to Word): pointer to Token; proc tokenize(source_code: SourceCode, tokens_size: pointer to Word): pointer to Token;
var var
token_end: pointer to Char, token_end: pointer to Char,
tokens: pointer to Token, tokens: pointer to Token,
current_token: pointer to Token, current_token: pointer to Token,
token_length: Word; token_length: Word,
first_char: Char,
token_content: String;
begin begin
tokens_size^ := 0u; tokens_size^ := 0u;
tokens := nil; tokens := nil;
source_code := skip_spaces(source_code);
input_pointer := skip_spaces(input_pointer); while source_code.text.length <> 0u do
while input_pointer^ <> '\0' do
tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token); tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token);
current_token := tokens + tokens_size^; current_token := tokens + tokens_size^;
first_char := char_at(source_code.text, 0);
if is_alpha(input_pointer^) or input_pointer^ = '_' then if is_alpha(first_char) or first_char = '_' then
token_end := lex_identifier(input_pointer + 1); token_end := lex_identifier(source_code.text.ptr + 1);
token_length := cast(token_end as Word) - cast(input_pointer as Word); token_length := cast(token_end - source_code.text.ptr as Word);
current_token^ := categorize_identifier(input_pointer, token_length); current_token^ := categorize_identifier(source_code.text.ptr, token_length);
input_pointer := token_end source_code := advance_source(source_code, token_length)
elsif is_digit(input_pointer^) then elsif is_digit(first_char) then
token_end := nil; token_end := nil;
current_token^.value.int_value := strtol(input_pointer, @token_end, 10); current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10);
token_length := cast(token_end - source_code.text.ptr as Word);
if token_end^ = 'u' then if token_end^ = 'u' then
current_token^.kind := TOKEN_WORD; current_token^.kind := TOKEN_WORD;
input_pointer := token_end + 1 source_code := advance_source(source_code, token_length + 1u)
else else
current_token^.kind := TOKEN_INTEGER; current_token^.kind := TOKEN_INTEGER;
input_pointer := token_end source_code := advance_source(source_code, token_length)
end end
elsif input_pointer^ = '(' then elsif first_char = '(' then
input_pointer := input_pointer + 1; source_code := advance_source(source_code, 1u);
if input_pointer^ = '*' then
token_end := lex_comment(input_pointer + 1);
if token_end <> nil then if source_code.text.length = 0u then
token_length := cast(token_end as Word) - cast(input_pointer as Word); current_token^.kind := TOKEN_LEFT_PAREN
current_token^.value.string_value := cast(calloc(token_length + 1u, 1) as pointer to Char); elsif char_at(source_code.text, 0u) = '*' then
strncpy(current_token^.value.string_value, input_pointer, token_length); source_code := advance_source(source_code, 1u);
current_token^.kind := TOKEN_COMMENT;
input_pointer := token_end if lex_comment(@source_code, @token_content) then
current_token^.value.string := token_content;
current_token^.kind := TOKEN_COMMENT
else else
current_token^.kind := 0 current_token^.kind := 0
end end
else else
current_token^.kind := TOKEN_LEFT_PAREN current_token^.kind := TOKEN_LEFT_PAREN
end end
elsif input_pointer^ = ')' then elsif first_char = ')' then
current_token^.kind := TOKEN_RIGHT_PAREN; current_token^.kind := TOKEN_RIGHT_PAREN;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '\'' then elsif first_char = '\'' then
token_end := lex_character(input_pointer + 1, current_token); token_end := lex_character(source_code.text.ptr + 1, current_token);
token_length := cast(token_end - source_code.text.ptr as Word);
if token_end^ = '\'' then if token_end^ = '\'' then
current_token^.kind := TOKEN_CHARACTER; current_token^.kind := TOKEN_CHARACTER;
input_pointer := token_end + 1 source_code := advance_source(source_code, token_length + 1u)
else else
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
end end
elsif input_pointer^ = '"' then elsif first_char = '"' then
token_end := lex_string(input_pointer + 1, current_token); token_end := lex_string(source_code.text.ptr + 1, current_token);
if token_end^ = '"' then if token_end^ = '"' then
current_token^.kind := TOKEN_STRING; current_token^.kind := TOKEN_STRING;
input_pointer := token_end + 1 token_length := cast(token_end - source_code.text.ptr as Word);
source_code := advance_source(source_code, token_length + 1u)
end end
elsif input_pointer^ = '[' then elsif first_char = '[' then
current_token^.kind := TOKEN_LEFT_SQUARE; current_token^.kind := TOKEN_LEFT_SQUARE;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = ']' then elsif first_char = ']' then
current_token^.kind := TOKEN_RIGHT_SQUARE; current_token^.kind := TOKEN_RIGHT_SQUARE;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '>' then elsif first_char = '>' then
input_pointer := input_pointer + 1; source_code := advance_source(source_code, 1u);
if input_pointer^ = '=' then
if source_code.text.length = 0u then
current_token^.kind := TOKEN_GREATER_THAN
elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_GREATER_EQUAL; current_token^.kind := TOKEN_GREATER_EQUAL;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
else else
current_token^.kind := TOKEN_GREATER_THAN current_token^.kind := TOKEN_GREATER_THAN
end end
elsif input_pointer^ = '<' then elsif first_char = '<' then
input_pointer := input_pointer + 1; source_code := advance_source(source_code, 1u);
if input_pointer^ = '=' then
if source_code.text.length = 0u then
current_token^.kind := TOKEN_LESS_THAN
elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_LESS_EQUAL; current_token^.kind := TOKEN_LESS_EQUAL;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '>' then elsif char_at(source_code.text, 0) = '>' then
current_token^.kind := TOKEN_NOT_EQUAL; current_token^.kind := TOKEN_NOT_EQUAL;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
else else
current_token^.kind := TOKEN_LESS_THAN current_token^.kind := TOKEN_LESS_THAN
end end
elsif input_pointer^ = '=' then elsif first_char = '=' then
current_token^.kind := TOKEN_EQUAL; current_token^.kind := TOKEN_EQUAL;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = ';' then elsif first_char = ';' then
current_token^.kind := TOKEN_SEMICOLON; current_token^.kind := TOKEN_SEMICOLON;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '.' then elsif first_char = '.' then
current_token^.kind := TOKEN_DOT; current_token^.kind := TOKEN_DOT;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = ',' then elsif first_char = ',' then
current_token^.kind := TOKEN_COMMA; current_token^.kind := TOKEN_COMMA;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '+' then elsif first_char = '+' then
current_token^.kind := TOKEN_PLUS; current_token^.kind := TOKEN_PLUS;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '-' then elsif first_char = '-' then
current_token^.kind := TOKEN_MINUS; current_token^.kind := TOKEN_MINUS;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '*' then elsif first_char = '*' then
current_token^.kind := TOKEN_MULTIPLICATION; current_token^.kind := TOKEN_MULTIPLICATION;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '/' then elsif first_char = '/' then
current_token^.kind := TOKEN_DIVISION; current_token^.kind := TOKEN_DIVISION;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '%' then elsif first_char = '%' then
current_token^.kind := TOKEN_REMAINDER; current_token^.kind := TOKEN_REMAINDER;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = ':' then elsif first_char = ':' then
input_pointer := input_pointer + 1; source_code := advance_source(source_code, 1u);
if input_pointer^ = '=' then
if source_code.text.length = 0u then
current_token^.kind := TOKEN_COLON
elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_ASSIGNMENT; current_token^.kind := TOKEN_ASSIGNMENT;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
else else
current_token^.kind := TOKEN_COLON current_token^.kind := TOKEN_COLON
end end
elsif input_pointer^ = '^' then elsif first_char = '^' then
current_token^.kind := TOKEN_HAT; current_token^.kind := TOKEN_HAT;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
elsif input_pointer^ = '@' then elsif first_char = '@' then
current_token^.kind := TOKEN_AT; current_token^.kind := TOKEN_AT;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
else else
current_token^.kind := 0; current_token^.kind := 0;
input_pointer := input_pointer + 1 source_code := advance_source(source_code, 1u)
end; end;
if current_token^.kind <> 0 then if current_token^.kind <> 0 then
tokens_size^ := tokens_size^ + 1u; tokens_size^ := tokens_size^ + 1u;
input_pointer := skip_spaces(input_pointer) source_code := skip_spaces(source_code)
else else
write_s("Lexical analysis error on \""); write_s("Lexical analysis error on \"");
write_c(input_pointer^); write_c(first_char);
write_s("\".\n") write_s("\".\n")
end end
end; end;
@ -819,9 +884,9 @@ end
proc process(argc: Int, argv: pointer to pointer to Char): Int; proc process(argc: Int, argv: pointer to pointer to Char): Int;
var var
input: pointer to Char,
tokens: pointer to Token, tokens: pointer to Token,
tokens_size: Word, tokens_size: Word,
source_code: SourceCode,
command_line: pointer to CommandLine; command_line: pointer to CommandLine;
begin begin
command_line := parse_command_line(argc, argv); command_line := parse_command_line(argc, argv);
@ -829,12 +894,12 @@ begin
return 2 return 2
end; end;
input := read_source(command_line^.input); source_code.position := make_position();
if input = nil then if not read_source(command_line^.input, @source_code.text) then
perror(command_line^.input); perror(command_line^.input);
return 3 return 3
end; end;
tokens := tokenize(input, @tokens_size); tokens := tokenize(source_code, @tokens_size);
if command_line^.tokenize then if command_line^.tokenize then
print_tokens(tokens, tokens_size) print_tokens(tokens, tokens_size)