diff --git a/config-lang.in b/config-lang.in
index 5e58f8b..c544997 100644
--- a/config-lang.in
+++ b/config-lang.in
@@ -1,3 +1,20 @@
+# config-lang.in -- Top level configure fragment for gcc Elna frontend.
+# Copyright (C) 2025 Free Software Foundation, Inc.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# .
+
language="elna"
gcc_subdir="elna/gcc"
diff --git a/gcc/Make-lang.in b/gcc/Make-lang.in
index 0e96ea4..fbd620b 100644
--- a/gcc/Make-lang.in
+++ b/gcc/Make-lang.in
@@ -1,3 +1,20 @@
+# Make-lang.in -- Top level -*- makefile -*- fragment for the Elna frontend.
+# Copyright (C) 2025 Free Software Foundation, Inc.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# .
+
ELNA_INSTALL_NAME := $(shell echo gelna|sed '$(program_transform_name)')
ELNA_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gelna|sed '$(program_transform_name)')
diff --git a/gcc/config-lang.in b/gcc/config-lang.in
index 2d9f57a..3b3e9bc 100644
--- a/gcc/config-lang.in
+++ b/gcc/config-lang.in
@@ -1,3 +1,27 @@
+# config-lang.in -- Top level configure fragment for gcc Elna frontend.
+# Copyright (C) 2025 Free Software Foundation, Inc.
+
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# .
+
+# Configure looks for the existence of this file to auto-config each language.
+# We define several parameters used by configure:
+#
+# language - name of language as it would appear in $(LANGUAGES)
+# boot_language - "yes" if we need to build this language in stage1
+# compilers - value to add to $(COMPILERS)
+
language="elna"
gcc_subdir="elna/gcc"
diff --git a/gcc/elna-convert.cc b/gcc/elna-convert.cc
index d338150..71baf47 100644
--- a/gcc/elna-convert.cc
+++ b/gcc/elna-convert.cc
@@ -1,3 +1,20 @@
+/* elna-convert.cc -- Data type conversion routines.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#include "config.h"
#include "system.h"
#include "coretypes.h"
diff --git a/gcc/elna-diagnostic.cc b/gcc/elna-diagnostic.cc
index c05321c..1e8a2db 100644
--- a/gcc/elna-diagnostic.cc
+++ b/gcc/elna-diagnostic.cc
@@ -1,3 +1,20 @@
+/* elna-diagnostic.cc -- Elna frontend specific diagnostic routines.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#include "elna/gcc/elna-diagnostic.h"
#include "elna/gcc/elna-tree.h"
@@ -12,7 +29,7 @@ namespace gcc
return linemap_position_for_column(line_table, position->column);
}
- const char *print_type(tree type)
+ std::string print_type(tree type)
{
gcc_assert(TYPE_P(type));
@@ -38,7 +55,7 @@ namespace gcc
}
else if (is_pointer_type(type))
{
- return "pointer";
+ return std::string("\"pointer to " + print_type(TREE_TYPE(type)) + "\"");
}
else if (TREE_CODE(type) == ARRAY_TYPE)
{
diff --git a/gcc/elna-generic.cc b/gcc/elna-generic.cc
index db29edf..3cbf86d 100644
--- a/gcc/elna-generic.cc
+++ b/gcc/elna-generic.cc
@@ -1,3 +1,20 @@
+/* elna-generic.cc -- Visitor generating a GENERIC tree.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#include
#include "elna/gcc/elna-generic.h"
@@ -295,6 +312,7 @@ namespace gcc
tree index_constant = build_int_cstu(index_type, string->number().size());
tree element_type = this->symbol_map->lookup("Char");
tree string_type = build_array_type(element_type, build_index_type(index_constant));
+ tree string_record = this->symbol_map->lookup("String");
tree string_literal = build_string(string->number().size(), string->number().c_str());
TREE_TYPE(string_literal) = string_type;
@@ -302,12 +320,11 @@ namespace gcc
TREE_READONLY(string_literal) = 1;
TREE_STATIC(string_literal) = 1;
- string_type = build_pointer_type(element_type);
+ string_type = TREE_TYPE(TREE_CHAIN(TYPE_FIELDS(string_record)));
string_literal = build4(ARRAY_REF, element_type, string_literal, integer_zero_node, NULL_TREE, NULL_TREE);
string_literal = build1(ADDR_EXPR, string_type, string_literal);
vec *elms = NULL;
- tree string_record = this->symbol_map->lookup("String");
CONSTRUCTOR_APPEND_ELT(elms, TYPE_FIELDS(string_record), index_constant);
CONSTRUCTOR_APPEND_ELT(elms, TREE_CHAIN(TYPE_FIELDS(string_record)), string_literal);
@@ -373,11 +390,11 @@ namespace gcc
}
return;
}
- if (left_type != right_type && !are_compatible_pointers(left, right))
+ if (left_type != right_type && !are_compatible_pointers(left, right) && !are_compatible_pointers(right, left))
{
error_at(expression_location,
"invalid operands of type %s and %s for operator %s",
- print_type(left_type), print_type(right_type),
+ print_type(left_type).c_str(), print_type(right_type).c_str(),
boot::print_binary_operator(expression->operation()));
this->current_expression = error_mark_node;
return;
@@ -719,7 +736,7 @@ namespace gcc
return;
}
if (TREE_TYPE(this->current_expression) == TREE_TYPE(lvalue)
- || (is_pointer_type(TREE_TYPE(lvalue)) && this->current_expression == null_pointer_node))
+ || are_compatible_pointers(lvalue, this->current_expression))
{
tree assignment = build2_loc(statement_location, MODIFY_EXPR,
void_type_node, lvalue, this->current_expression);
@@ -731,8 +748,8 @@ namespace gcc
{
error_at(statement_location,
"cannot assign value of type %s to variable of type %s",
- print_type(TREE_TYPE(this->current_expression)),
- print_type(TREE_TYPE(lvalue)));
+ print_type(TREE_TYPE(this->current_expression)).c_str(),
+ print_type(TREE_TYPE(lvalue)).c_str());
this->current_expression = error_mark_node;
}
}
@@ -771,7 +788,7 @@ namespace gcc
{
error_at(get_location(&branch.prerequisite().position()),
"expected expression of boolean type but its type is %s",
- print_type(TREE_TYPE(this->current_expression)));
+ print_type(TREE_TYPE(this->current_expression)).c_str());
this->current_expression = error_mark_node;
return;
}
@@ -818,7 +835,7 @@ namespace gcc
{
error_at(get_location(&statement->body().prerequisite().position()),
"expected expression of boolean type but its type is %s",
- print_type(TREE_TYPE(this->current_expression)));
+ print_type(TREE_TYPE(this->current_expression)).c_str());
this->current_expression = error_mark_node;
return;
}
diff --git a/gcc/elna-spec.cc b/gcc/elna-spec.cc
index d23606c..0b5749c 100644
--- a/gcc/elna-spec.cc
+++ b/gcc/elna-spec.cc
@@ -1,3 +1,20 @@
+/* elna-spec.c -- Specific flags and argument handling of the Elna front end.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
void
lang_specific_driver (struct cl_decoded_option ** /* in_decoded_options */,
unsigned int * /* in_decoded_options_count */,
diff --git a/gcc/elna-tree.cc b/gcc/elna-tree.cc
index 6f8a9af..e7945bf 100644
--- a/gcc/elna-tree.cc
+++ b/gcc/elna-tree.cc
@@ -1,3 +1,20 @@
+/* elna-tree.cc -- Utilities to manipulate GCC trees.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#include "elna/gcc/elna-tree.h"
#include "elna/gcc/elna-diagnostic.h"
@@ -24,8 +41,11 @@ namespace gcc
bool are_compatible_pointers(tree lhs, tree rhs)
{
- return (lhs == null_pointer_node || rhs == null_pointer_node)
- && is_pointer_type(TREE_TYPE(lhs)) && is_pointer_type(TREE_TYPE(rhs));
+ tree lhs_type = TREE_TYPE(lhs);
+ tree rhs_type = TREE_TYPE(rhs);
+
+ return (is_pointer_type(lhs_type) && rhs == null_pointer_node)
+ || (is_pointer_type(lhs_type) && lhs_type == rhs_type);
}
tree tree_chain_base::head()
@@ -141,7 +161,7 @@ namespace gcc
record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "length", initial_table->lookup("Word")));
record_chain.append(build_field(UNKNOWN_LOCATION, string_record, "ptr",
- build_pointer_type(initial_table->lookup("Char"))));
+ build_pointer_type_for_mode(initial_table->lookup("Char"), VOIDmode, true)));
TYPE_FIELDS(string_record) = record_chain.head();
layout_type(string_record);
@@ -216,7 +236,7 @@ namespace gcc
{
error_at(expression_location,
"invalid operands of type %s and %s for operator %s",
- print_type(left_type), print_type(right_type),
+ print_type(left_type).c_str(), print_type(right_type).c_str(),
elna::boot::print_binary_operator(expression->operation()));
return error_mark_node;
}
diff --git a/gcc/elna1.cc b/gcc/elna1.cc
index e3c3487..fe5a817 100644
--- a/gcc/elna1.cc
+++ b/gcc/elna1.cc
@@ -1,3 +1,20 @@
+/* elna1.cc -- Language-dependent hooks for Elna.
+ Copyright (C) 2006-2024 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#include "config.h"
#include "system.h"
#include "coretypes.h"
diff --git a/gcc/lang-specs.h b/gcc/lang-specs.h
index e3e0e74..c9743a1 100644
--- a/gcc/lang-specs.h
+++ b/gcc/lang-specs.h
@@ -1,3 +1,20 @@
+/* lang-specs.h -- GCC driver specs for Elna frontend.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
/* gcc/gcc.cc */
{".elna", "@elna", nullptr, 0, 0},
{"@elna",
diff --git a/include/elna/boot/ast.h b/include/elna/boot/ast.h
index 97ab5ba..b151f4e 100644
--- a/include/elna/boot/ast.h
+++ b/include/elna/boot/ast.h
@@ -648,11 +648,11 @@ namespace boot
template
class number_literal : public literal
{
- T m_number;
+ T m_value;
public:
- number_literal(const struct position position, const T value)
- : literal(position), m_number(value)
+ number_literal(const struct position position, const T& value)
+ : literal(position), m_value(value)
{
}
@@ -663,7 +663,7 @@ namespace boot
const T& number() const
{
- return m_number;
+ return m_value;
}
};
diff --git a/include/elna/gcc/elna-diagnostic.h b/include/elna/gcc/elna-diagnostic.h
index 08c56dd..57130bf 100644
--- a/include/elna/gcc/elna-diagnostic.h
+++ b/include/elna/gcc/elna-diagnostic.h
@@ -1,3 +1,20 @@
+/* elna-diagnostic.h -- Elna frontend specific diagnostic routines.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#pragma once
#include "config.h"
@@ -14,6 +31,6 @@ namespace gcc
{
location_t get_location(const boot::position *position);
- const char *print_type(tree type);
+ std::string print_type(tree type);
}
}
diff --git a/include/elna/gcc/elna-generic.h b/include/elna/gcc/elna-generic.h
index 9d85ec4..caa0926 100644
--- a/include/elna/gcc/elna-generic.h
+++ b/include/elna/gcc/elna-generic.h
@@ -1,3 +1,20 @@
+/* elna-generic.h -- Visitor generating a GENERIC tree.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#pragma once
#include "elna/boot/ast.h"
diff --git a/include/elna/gcc/elna-tree.h b/include/elna/gcc/elna-tree.h
index 2694376..fe0f70d 100644
--- a/include/elna/gcc/elna-tree.h
+++ b/include/elna/gcc/elna-tree.h
@@ -1,3 +1,20 @@
+/* elna-tree.h -- Utilities to manipulate GCC trees.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+. */
+
#pragma once
#include
@@ -17,6 +34,12 @@ namespace gcc
{
bool is_pointer_type(tree type);
bool is_integral_type(tree type);
+
+ /**
+ * \param lhs Left hand value.
+ * \param rhs Right hand value.
+ * \return Whether rhs can be assigned to lhs.
+ */
bool are_compatible_pointers(tree lhs, tree rhs);
class tree_chain_base
diff --git a/source.elna b/source.elna
index 6923fc5..4873d2b 100644
--- a/source.elna
+++ b/source.elna
@@ -25,9 +25,14 @@ type
first: Position;
last: Position
end,
+ SourceCode = record
+ position: Position;
+ text: String
+ end,
TokenValue* = union
int_value: Int;
string_value: pointer to Char;
+ string: String;
boolean_value: Bool;
char_value: Char
end,
@@ -179,11 +184,32 @@ begin
return strncmp(this.ptr, that, length) = 0
end
+proc open_substring(string: String, start: Word): String;
+begin
+ string.ptr := string.ptr + start;
+ string.length := string.length - start;
+ return string
+end
+
+proc char_at(string: String, position: Word): Char;
+begin
+ return (string.ptr + position)^
+end
+
(*
End of standard procedures.
*)
-proc read_source(filename: pointer to Char): pointer to Char;
+proc make_position(): Position;
+var
+ result: Position;
+begin
+ result.line := 1u;
+ result.column := 1u;
+ return result
+end
+
+proc read_source(filename: pointer to Char, result: pointer to String): Bool;
var
input_file: pointer to FILE,
source_size: Int,
@@ -192,26 +218,28 @@ begin
input_file := fopen(filename, "rb\0".ptr);
if input_file = nil then
- return nil
+ return false
+ end;
+ defer
+ fclose(input_file)
end;
if fseek(input_file, 0, SEEK_END) <> 0 then
- fclose(input_file);
- return nil
+ return false
end;
source_size := ftell(input_file);
if source_size < 0 then
- fclose(input_file);
- return nil
+ return false
end;
rewind(input_file);
- input := calloc(source_size + 1, 1);
+ input := malloc(source_size);
if fread(input, source_size, 1, input_file) <> 1u then
- input := nil
+ return false
end;
- fclose(input_file);
+ result^.length := cast(source_size as Word);
+ result^.ptr := cast(input as pointer to Char);
- return input
+ return true
end
proc escape_char(escape: Char, result: pointer to Char): Bool;
@@ -257,12 +285,26 @@ begin
end
end
-proc skip_spaces(input: pointer to Char): pointer to Char;
+proc advance_source(source_code: SourceCode, length: Word): SourceCode;
begin
- while is_space(input^) do
- input := input + 1
+ source_code.text := open_substring(source_code.text, length);
+ source_code.position.column := source_code.position.column + length;
+
+ return source_code
+end
+
+proc skip_spaces(source_code: SourceCode): SourceCode;
+begin
+ while source_code.text.length > 0u and is_space(char_at(source_code.text, 0)) do
+ if char_at(source_code.text, 0) = '\n' then
+ source_code.position.line := source_code.position.line + 1u;
+ source_code.position.column := 1u
+ else
+ source_code.position.column := source_code.position.column + 1u
+ end;
+ source_code.text := open_substring(source_code.text, 1u)
end;
- return input
+ return source_code
end
proc lex_identifier(input: pointer to Char): pointer to Char;
@@ -273,19 +315,29 @@ begin
return input
end
-proc lex_comment(input: pointer to Char): pointer to Char;
+proc lex_comment(source_code: pointer to SourceCode, token_content: pointer to String): Bool;
var
- next: pointer to Char;
+ result: pointer to Char;
begin
- while input^ <> '\0' do
- next := input + 1;
+ token_content^.ptr := source_code^.text.ptr;
+ token_content^.length := 0u;
- if input^ = '*' and next^ = ')' then
- return next + 1
+ while source_code^.text.length > 1u do
+ if char_at(source_code^.text, 0) = '*' and char_at(source_code^.text, 1) = ')' then
+ source_code^ := advance_source(source_code^, 2u);
+
+ result := cast(malloc(token_content^.length) as pointer to Char);
+ strncpy(result, token_content^.ptr, token_content^.length);
+ token_content^.ptr := result;
+
+ return true
end;
- input := next
+ token_content^.length := token_content^.length + 1u;
+ source_code^ := advance_source(source_code^, 1)
end;
- return nil
+ token_content^.ptr := nil;
+ token_content^.length := 0u;
+ return false
end
proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char;
@@ -458,7 +510,7 @@ begin
elsif current_token^.kind = TOKEN_AT then
write_c('@')
elsif current_token^.kind = TOKEN_COMMENT then
- write_s("COMMENT")
+ write_s("(* COMMENT *)")
elsif current_token^.kind = TOKEN_INTEGER then
write_c('<');
write_i(current_token^.value.int_value);
@@ -562,154 +614,167 @@ begin
return current_token
end
-proc tokenize(input_pointer: pointer to Char, tokens_size: pointer to Word): pointer to Token;
+proc tokenize(source_code: SourceCode, tokens_size: pointer to Word): pointer to Token;
var
token_end: pointer to Char,
tokens: pointer to Token,
current_token: pointer to Token,
- token_length: Word;
+ token_length: Word,
+ first_char: Char,
+ token_content: String;
begin
tokens_size^ := 0u;
tokens := nil;
+ source_code := skip_spaces(source_code);
- input_pointer := skip_spaces(input_pointer);
-
- while input_pointer^ <> '\0' do
+ while source_code.text.length <> 0u do
tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token);
current_token := tokens + tokens_size^;
+ first_char := char_at(source_code.text, 0);
- if is_alpha(input_pointer^) or input_pointer^ = '_' then
- token_end := lex_identifier(input_pointer + 1);
- token_length := cast(token_end as Word) - cast(input_pointer as Word);
+ if is_alpha(first_char) or first_char = '_' then
+ token_end := lex_identifier(source_code.text.ptr + 1);
+ token_length := cast(token_end - source_code.text.ptr as Word);
- current_token^ := categorize_identifier(input_pointer, token_length);
+ current_token^ := categorize_identifier(source_code.text.ptr, token_length);
- input_pointer := token_end
- elsif is_digit(input_pointer^) then
+ source_code := advance_source(source_code, token_length)
+ elsif is_digit(first_char) then
token_end := nil;
- current_token^.value.int_value := strtol(input_pointer, @token_end, 10);
+ current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10);
+ token_length := cast(token_end - source_code.text.ptr as Word);
if token_end^ = 'u' then
current_token^.kind := TOKEN_WORD;
- input_pointer := token_end + 1
+ source_code := advance_source(source_code, token_length + 1u)
else
current_token^.kind := TOKEN_INTEGER;
- input_pointer := token_end
+ source_code := advance_source(source_code, token_length)
end
- elsif input_pointer^ = '(' then
- input_pointer := input_pointer + 1;
- if input_pointer^ = '*' then
- token_end := lex_comment(input_pointer + 1);
+ elsif first_char = '(' then
+ source_code := advance_source(source_code, 1u);
- if token_end <> nil then
- token_length := cast(token_end as Word) - cast(input_pointer as Word);
- current_token^.value.string_value := cast(calloc(token_length + 1u, 1) as pointer to Char);
- strncpy(current_token^.value.string_value, input_pointer, token_length);
- current_token^.kind := TOKEN_COMMENT;
+ if source_code.text.length = 0u then
+ current_token^.kind := TOKEN_LEFT_PAREN
+ elsif char_at(source_code.text, 0u) = '*' then
+ source_code := advance_source(source_code, 1u);
- input_pointer := token_end
+ if lex_comment(@source_code, @token_content) then
+ current_token^.value.string := token_content;
+ current_token^.kind := TOKEN_COMMENT
else
current_token^.kind := 0
end
else
current_token^.kind := TOKEN_LEFT_PAREN
end
- elsif input_pointer^ = ')' then
+ elsif first_char = ')' then
current_token^.kind := TOKEN_RIGHT_PAREN;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '\'' then
- token_end := lex_character(input_pointer + 1, current_token);
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '\'' then
+ token_end := lex_character(source_code.text.ptr + 1, current_token);
+ token_length := cast(token_end - source_code.text.ptr as Word);
if token_end^ = '\'' then
current_token^.kind := TOKEN_CHARACTER;
- input_pointer := token_end + 1
+ source_code := advance_source(source_code, token_length + 1u)
else
- input_pointer := input_pointer + 1
+ source_code := advance_source(source_code, 1u)
end
- elsif input_pointer^ = '"' then
- token_end := lex_string(input_pointer + 1, current_token);
+ elsif first_char = '"' then
+ token_end := lex_string(source_code.text.ptr + 1, current_token);
if token_end^ = '"' then
current_token^.kind := TOKEN_STRING;
- input_pointer := token_end + 1
+ token_length := cast(token_end - source_code.text.ptr as Word);
+ source_code := advance_source(source_code, token_length + 1u)
end
- elsif input_pointer^ = '[' then
+ elsif first_char = '[' then
current_token^.kind := TOKEN_LEFT_SQUARE;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = ']' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = ']' then
current_token^.kind := TOKEN_RIGHT_SQUARE;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '>' then
- input_pointer := input_pointer + 1;
- if input_pointer^ = '=' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '>' then
+ source_code := advance_source(source_code, 1u);
+
+ if source_code.text.length = 0u then
+ current_token^.kind := TOKEN_GREATER_THAN
+ elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_GREATER_EQUAL;
- input_pointer := input_pointer + 1
+ source_code := advance_source(source_code, 1u)
else
current_token^.kind := TOKEN_GREATER_THAN
end
- elsif input_pointer^ = '<' then
- input_pointer := input_pointer + 1;
- if input_pointer^ = '=' then
+ elsif first_char = '<' then
+ source_code := advance_source(source_code, 1u);
+
+ if source_code.text.length = 0u then
+ current_token^.kind := TOKEN_LESS_THAN
+ elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_LESS_EQUAL;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '>' then
+ source_code := advance_source(source_code, 1u)
+ elsif char_at(source_code.text, 0) = '>' then
current_token^.kind := TOKEN_NOT_EQUAL;
- input_pointer := input_pointer + 1
+ source_code := advance_source(source_code, 1u)
else
current_token^.kind := TOKEN_LESS_THAN
end
- elsif input_pointer^ = '=' then
+ elsif first_char = '=' then
current_token^.kind := TOKEN_EQUAL;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = ';' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = ';' then
current_token^.kind := TOKEN_SEMICOLON;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '.' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '.' then
current_token^.kind := TOKEN_DOT;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = ',' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = ',' then
current_token^.kind := TOKEN_COMMA;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '+' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '+' then
current_token^.kind := TOKEN_PLUS;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '-' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '-' then
current_token^.kind := TOKEN_MINUS;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '*' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '*' then
current_token^.kind := TOKEN_MULTIPLICATION;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '/' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '/' then
current_token^.kind := TOKEN_DIVISION;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '%' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '%' then
current_token^.kind := TOKEN_REMAINDER;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = ':' then
- input_pointer := input_pointer + 1;
- if input_pointer^ = '=' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = ':' then
+ source_code := advance_source(source_code, 1u);
+
+ if source_code.text.length = 0u then
+ current_token^.kind := TOKEN_COLON
+ elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_ASSIGNMENT;
- input_pointer := input_pointer + 1
+ source_code := advance_source(source_code, 1u)
else
current_token^.kind := TOKEN_COLON
end
- elsif input_pointer^ = '^' then
+ elsif first_char = '^' then
current_token^.kind := TOKEN_HAT;
- input_pointer := input_pointer + 1
- elsif input_pointer^ = '@' then
+ source_code := advance_source(source_code, 1u)
+ elsif first_char = '@' then
current_token^.kind := TOKEN_AT;
- input_pointer := input_pointer + 1
+ source_code := advance_source(source_code, 1u)
else
current_token^.kind := 0;
- input_pointer := input_pointer + 1
+ source_code := advance_source(source_code, 1u)
end;
if current_token^.kind <> 0 then
tokens_size^ := tokens_size^ + 1u;
- input_pointer := skip_spaces(input_pointer)
+ source_code := skip_spaces(source_code)
else
write_s("Lexical analysis error on \"");
- write_c(input_pointer^);
+ write_c(first_char);
write_s("\".\n")
end
end;
@@ -819,9 +884,9 @@ end
proc process(argc: Int, argv: pointer to pointer to Char): Int;
var
- input: pointer to Char,
tokens: pointer to Token,
tokens_size: Word,
+ source_code: SourceCode,
command_line: pointer to CommandLine;
begin
command_line := parse_command_line(argc, argv);
@@ -829,12 +894,12 @@ begin
return 2
end;
- input := read_source(command_line^.input);
- if input = nil then
+ source_code.position := make_position();
+ if not read_source(command_line^.input, @source_code.text) then
perror(command_line^.input);
return 3
end;
- tokens := tokenize(input, @tokens_size);
+ tokens := tokenize(source_code, @tokens_size);
if command_line^.tokenize then
print_tokens(tokens, tokens_size)