Replace type expression with traits

This commit is contained in:
Eugen Wissner 2025-02-25 23:39:31 +01:00
parent 85b6843ecf
commit f091344cce
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
7 changed files with 423 additions and 373 deletions

View File

@ -44,30 +44,25 @@ namespace boot
definition->body().accept(this);
}
void empty_visitor::visit(call_expression *expression)
void empty_visitor::visit(procedure_call *call)
{
for (struct expression *const argument : expression->arguments)
for (expression *const argument : call->arguments)
{
argument->accept(this);
}
}
void empty_visitor::visit(traits_expression *trait)
{
trait->type().accept(this);
}
void empty_visitor::visit(cast_expression *expression)
{
expression->target().accept(this);
expression->value().accept(this);
}
void empty_visitor::visit(type_expression *expression)
{
expression->body().accept(this);
}
void empty_visitor::visit(call_statement *statement)
{
statement->body().accept(this);
}
void empty_visitor::visit(assign_statement *statement)
{
statement->rvalue().accept(this);
@ -249,13 +244,11 @@ namespace boot
return this->source_position;
}
statement::statement(const struct position position)
: node(position)
statement::statement()
{
}
expression::expression(const struct position position)
: node(position)
expression::expression()
{
}
@ -539,13 +532,12 @@ namespace boot
}
}
literal::literal(const struct position position)
: expression(position)
literal::literal()
{
}
defer_statement::defer_statement(const struct position position)
: statement(position)
: node(position)
{
}
@ -562,13 +554,12 @@ namespace boot
}
}
designator_expression::designator_expression(const struct position position)
: expression(position)
designator_expression::designator_expression()
{
}
variable_expression::variable_expression(const struct position position, const std::string& name)
: designator_expression(position), m_name(name)
: node(position), name(name)
{
}
@ -577,11 +568,6 @@ namespace boot
visitor->visit(this);
}
const std::string& variable_expression::name() const
{
return m_name;
}
variable_expression *variable_expression::is_variable()
{
return this;
@ -589,7 +575,7 @@ namespace boot
array_access_expression::array_access_expression(const struct position position,
expression *base, expression *index)
: designator_expression(position), m_base(base), m_index(index)
: node(position), m_base(base), m_index(index)
{
}
@ -621,7 +607,7 @@ namespace boot
field_access_expression::field_access_expression(const struct position position,
expression *base, const std::string& field)
: designator_expression(position), m_base(base), m_field(field)
: node(position), m_base(base), m_field(field)
{
}
@ -652,7 +638,7 @@ namespace boot
dereference_expression::dereference_expression(const struct position position,
expression *base)
: designator_expression(position), m_base(base)
: node(position), m_base(base)
{
}
@ -678,7 +664,7 @@ namespace boot
binary_expression::binary_expression(const struct position position, expression *lhs,
expression *rhs, const binary_operator operation)
: expression(position), m_lhs(lhs), m_rhs(rhs), m_operator(operation)
: node(position), m_lhs(lhs), m_rhs(rhs), m_operator(operation)
{
}
@ -710,7 +696,7 @@ namespace boot
unary_expression::unary_expression(const struct position position, expression *operand,
const unary_operator operation)
: expression(position), m_operand(std::move(operand)), m_operator(operation)
: node(position), m_operand(std::move(operand)), m_operator(operation)
{
}
@ -734,22 +720,22 @@ namespace boot
delete m_operand;
}
call_expression::call_expression(const struct position position, designator_expression *callable)
: expression(position), m_callable(callable)
procedure_call::procedure_call(const struct position position, designator_expression *callable)
: node(position), m_callable(callable)
{
}
void call_expression::accept(parser_visitor *visitor)
void procedure_call::accept(parser_visitor *visitor)
{
visitor->visit(this);
}
designator_expression& call_expression::callable()
designator_expression& procedure_call::callable()
{
return *m_callable;
}
call_expression::~call_expression()
procedure_call::~procedure_call()
{
for (expression *const argument : arguments)
{
@ -760,7 +746,7 @@ namespace boot
cast_expression::cast_expression(const struct position position,
std::shared_ptr<top_type> target, expression *value)
: expression(position), m_target(target), m_value(value)
: node(position), m_target(target), m_value(value)
{
}
@ -784,39 +770,20 @@ namespace boot
delete m_value;
}
type_expression::type_expression(const struct position position, std::shared_ptr<top_type> body)
: expression(position), m_body(body)
traits_expression::traits_expression(const struct position position,
const std::string& name, std::shared_ptr<top_type> type)
: node(position), m_type(type), name(name)
{
}
void type_expression::accept(parser_visitor *visitor)
void traits_expression::accept(parser_visitor *visitor)
{
visitor->visit(this);
}
top_type& type_expression::body()
top_type& traits_expression::type()
{
return *m_body;
}
call_statement::call_statement(const struct position position, call_expression *body)
: statement(position), m_body(body)
{
}
void call_statement::accept(parser_visitor *visitor)
{
visitor->visit(this);
}
call_expression& call_statement::body()
{
return *m_body;
}
call_statement::~call_statement()
{
delete m_body;
return *m_type;
}
conditional_statements::conditional_statements(expression *prerequisite)
@ -839,7 +806,7 @@ namespace boot
}
return_statement::return_statement(const struct position position, expression *return_expression)
: statement(position), m_return_expression(return_expression)
: node(position), m_return_expression(return_expression)
{
}
@ -865,7 +832,7 @@ namespace boot
assign_statement::assign_statement(const struct position position, designator_expression *lvalue,
expression *rvalue)
: statement(position), m_lvalue(lvalue), m_rvalue(rvalue)
: node(position), m_lvalue(lvalue), m_rvalue(rvalue)
{
}
@ -906,7 +873,7 @@ namespace boot
if_statement::if_statement(const struct position position, conditional_statements *body,
std::vector<statement *> *alternative)
: statement(position), m_body(body), m_alternative(alternative)
: node(position), m_body(body), m_alternative(alternative)
{
}
@ -936,7 +903,7 @@ namespace boot
}
while_statement::while_statement(const struct position position, conditional_statements *body)
: statement(position), m_body(body)
: node(position), m_body(body)
{
}

View File

@ -131,6 +131,9 @@ defer {
[A-Za-z_][A-Za-z0-9_]* {
return yy::parser::make_IDENTIFIER(yytext, this->location);
}
#[A-Za-z_][A-Za-z0-9_]* {
return yy::parser::make_TRAIT(yytext + 1, this->location);
}
[0-9]+u {
return yy::parser::make_WORD(strtoul(yytext, NULL, 10), this->location);
}

View File

@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. If not see
%start program;
%token <std::string> IDENTIFIER
%token <std::string> TRAIT
%token <std::int32_t> INTEGER
%token <std::uint32_t> WORD
%token <float> FLOAT
@ -125,11 +126,12 @@ along with GCC; see the file COPYING3. If not see
formal_parameters formal_parameter_list;
%type <elna::boot::variable_declaration *> formal_parameter
%type <std::shared_ptr<elna::boot::top_type>> type_expression;
%type <elna::boot::traits_expression *> traits_expression;
%type <elna::boot::expression *> expression operand unary;
%type <std::vector<elna::boot::expression *>> expressions actual_parameter_list;
%type <elna::boot::designator_expression *> designator_expression;
%type <elna::boot::assign_statement *> assign_statement;
%type <elna::boot::call_expression *> call_expression;
%type <elna::boot::procedure_call*> call_expression;
%type <elna::boot::while_statement *> while_statement;
%type <elna::boot::if_statement *> if_statement;
%type <elna::boot::return_statement *> return_statement;
@ -227,7 +229,7 @@ assign_statement: designator_expression ":=" expression
}
call_expression: designator_expression actual_parameter_list
{
$$ = new elna::boot::call_expression(elna::boot::make_position(@1), $1);
$$ = new elna::boot::procedure_call(elna::boot::make_position(@1), $1);
std::swap($$->arguments, $2);
}
cast_expression: "cast" "(" expression ":" type_expression ")"
@ -313,10 +315,15 @@ literal:
{
$$ = new elna::boot::number_literal<std::string>(elna::boot::make_position(@1), $1);
}
traits_expression:
TRAIT "(" type_expression ")"
{
$$ = new elna::boot::traits_expression(elna::boot::make_position(@1), $1, $3);
}
operand:
literal { $$ = $1; }
| designator_expression { $$ = $1; }
| "(" type_expression ")" { $$ = new elna::boot::type_expression(elna::boot::make_position(@1), $2); }
| traits_expression { $$ = $1; }
| cast_expression { $$ = $1; }
| call_expression { $$ = $1; }
| "(" expression ")" { $$ = $2; }
@ -448,10 +455,7 @@ statement:
| while_statement { $$ = $1; }
| if_statement { $$ = $1; }
| return_statement { $$ = $1; }
| call_expression
{
$$ = new elna::boot::call_statement(elna::boot::make_position(@1), $1);
}
| call_expression { $$ = $1; }
| defer_statement { $$ = $1; }
statements:
statement statements

View File

@ -84,11 +84,6 @@ namespace gcc
list_length(TYPE_ARG_TYPES(symbol_type)) - 1, arguments.size());
this->current_expression = error_mark_node;
}
else if (TREE_TYPE(symbol_type) == void_type_node)
{
append_statement(stmt);
this->current_expression = NULL_TREE;
}
else
{
this->current_expression = stmt;
@ -136,10 +131,10 @@ namespace gcc
}
}
void generic_visitor::visit(boot::call_expression *expression)
void generic_visitor::visit(boot::procedure_call *call)
{
location_t call_location = get_location(&expression->position());
expression->callable().accept(this);
location_t call_location = get_location(&call->position());
call->callable().accept(this);
tree expression_type = TYPE_P(this->current_expression)
? this->current_expression
@ -147,17 +142,17 @@ namespace gcc
if (TYPE_P(this->current_expression) && TREE_CODE(expression_type) == RECORD_TYPE)
{
build_record_call(call_location, this->current_expression, expression->arguments);
build_record_call(call_location, this->current_expression, call->arguments);
}
else if (TREE_CODE(expression_type) == FUNCTION_TYPE)
{
this->current_expression = build1(ADDR_EXPR,
build_pointer_type_for_mode(expression_type, VOIDmode, true), this->current_expression);
build_procedure_call(call_location, this->current_expression, expression->arguments);
build_procedure_call(call_location, this->current_expression, call->arguments);
}
else if (is_pointer_type(expression_type) && TREE_CODE(TREE_TYPE(expression_type)) == FUNCTION_TYPE)
{
build_procedure_call(call_location, this->current_expression, expression->arguments);
build_procedure_call(call_location, this->current_expression, call->arguments);
}
else
{
@ -178,11 +173,6 @@ namespace gcc
cast_target, this->current_expression);
}
void generic_visitor::visit(boot::type_expression *expression)
{
this->current_expression = build_type(expression->body());
}
void generic_visitor::visit(boot::program *program)
{
for (boot::constant_definition *const constant : program->constants)
@ -226,10 +216,7 @@ namespace gcc
DECL_ARGUMENTS(fndecl) = chainon(DECL_ARGUMENTS(fndecl), declaration_tree);
parameter_type = TREE_CHAIN(parameter_type);
}
for (boot::statement *const body_statement : program->body)
{
body_statement->accept(this);
}
visit_statements(program->body);
tree set_result = build2(INIT_EXPR, void_type_node, DECL_RESULT(fndecl),
build_int_cst_type(integer_type_node, 0));
tree return_stmt = build1(RETURN_EXPR, void_type_node, set_result);
@ -248,6 +235,19 @@ namespace gcc
cgraph_node::finalize_function(fndecl, true);
}
void generic_visitor::visit(boot::block *block)
{
for (boot::constant_definition *const constant : block->constants)
{
constant->accept(this);
}
for (boot::variable_declaration *const variable : block->variables)
{
variable->accept(this);
}
visit_statements(block->body);
}
void generic_visitor::visit(boot::procedure_definition *definition)
{
tree declaration_type = build_procedure_type(definition->heading());
@ -903,13 +903,13 @@ namespace gcc
void generic_visitor::visit(boot::variable_expression *expression)
{
auto symbol = this->lookup(expression->name());
auto symbol = this->lookup(expression->name);
if (symbol == NULL_TREE)
{
error_at(get_location(&expression->position()),
"symbol '%s' not declared in the current scope",
expression->name().c_str());
expression->name.c_str());
this->current_expression = error_mark_node;
}
else
@ -964,40 +964,40 @@ namespace gcc
}
}
void generic_visitor::visit(boot::traits_expression *trait)
{
tree type_expression = build_type(trait->type());
if (trait->name == "size")
{
this->current_expression = build1(CONVERT_EXPR, elna_word_type_node, size_in_bytes(type_expression));
}
else if (trait->name == "alignment")
{
this->current_expression = build_int_cstu(elna_word_type_node, TYPE_ALIGN_UNIT(type_expression));
}
else if (trait->name == "min" && is_integral_type(type_expression))
{
this->current_expression = TYPE_MIN_VALUE(type_expression);
}
else if (trait->name == "max" && is_integral_type(type_expression))
{
this->current_expression = TYPE_MAX_VALUE(type_expression);
}
else
{
error_at(get_location(&trait->position()), "type '%s' does not have property '%s'",
print_type(type_expression).c_str(), trait->name.c_str());
this->current_expression = error_mark_node;
}
}
void generic_visitor::visit(boot::field_access_expression *expression)
{
expression->base().accept(this);
location_t expression_location = get_location(&expression->position());
if (TYPE_P(this->current_expression))
{
if (expression->field() == "size")
{
this->current_expression = build1(CONVERT_EXPR, elna_word_type_node,
size_in_bytes(this->current_expression));
}
else if (expression->field() == "alignment")
{
this->current_expression = build_int_cstu(elna_word_type_node,
TYPE_ALIGN_UNIT(this->current_expression));
}
else if (expression->field() == "min" && is_integral_type(this->current_expression))
{
this->current_expression = TYPE_MIN_VALUE(this->current_expression);
}
else if (expression->field() == "max" && is_integral_type(this->current_expression))
{
this->current_expression = TYPE_MAX_VALUE(this->current_expression);
}
else
{
error_at(expression_location, "type '%s' does not have property '%s'",
print_type(this->current_expression).c_str(), expression->field().c_str());
this->current_expression = error_mark_node;
}
}
else if (is_aggregate_type(TREE_TYPE(this->current_expression)))
if (is_aggregate_type(TREE_TYPE(this->current_expression)))
{
tree field_declaration = TYPE_FIELDS(TREE_TYPE(this->current_expression));
@ -1026,6 +1026,12 @@ namespace gcc
field_declaration, NULL_TREE);
}
}
else
{
error_at(expression_location, "type '%s' does not have a field named '%s'",
print_type(TREE_TYPE(this->current_expression)).c_str(), expression->field().c_str());
this->current_expression = error_mark_node;
}
}
void generic_visitor::visit(boot::dereference_expression *expression)
@ -1049,7 +1055,7 @@ namespace gcc
if (TREE_CODE(lvalue) == CONST_DECL)
{
error_at(statement_location, "cannot modify constant '%s'",
statement->lvalue().is_variable()->name().c_str());
statement->lvalue().is_variable()->name.c_str());
this->current_expression = error_mark_node;
}
else if (is_assignable_from(TREE_TYPE(lvalue), rvalue))
@ -1083,10 +1089,7 @@ namespace gcc
if (statement->alternative() != nullptr)
{
enter_scope();
for (const auto body_statement : *statement->alternative())
{
body_statement->accept(this);
}
visit_statements(*statement->alternative());
tree mapping = leave_scope();
append_statement(mapping);
}
@ -1120,10 +1123,7 @@ namespace gcc
append_statement(then_label_expr);
enter_scope();
for (const auto body_statement : branch.statements)
{
body_statement->accept(this);
}
visit_statements(branch.statements);
tree mapping = leave_scope();
append_statement(mapping);
append_statement(goto_endif);
@ -1160,12 +1160,19 @@ namespace gcc
this->current_expression = NULL_TREE;
}
void generic_visitor::visit(boot::call_statement *statement)
void generic_visitor::visit_statements(const std::vector<boot::statement *>& statements)
{
for (boot::statement *const statement : statements)
{
statement->accept(this);
if (this->current_expression != NULL_TREE && this->current_expression != error_mark_node)
{
statement->body().accept(this);
append_statement(this->current_expression);
this->current_expression = NULL_TREE;
}
}
}
void generic_visitor::visit(boot::return_statement *statement)
{
@ -1181,15 +1188,14 @@ namespace gcc
this->current_expression);
tree return_stmt = build1(RETURN_EXPR, void_type_node, set_result);
append_statement(return_stmt);
this->current_expression = NULL_TREE;
}
void generic_visitor::visit(boot::defer_statement *statement)
{
enter_scope();
for (boot::statement *const body_statement : statement->statements)
{
body_statement->accept(this);
}
visit_statements(statement->statements);
defer(leave_scope());
}
}

View File

@ -58,14 +58,13 @@ namespace boot
class constant_definition;
class procedure_definition;
class type_definition;
class call_expression;
class procedure_call;
class cast_expression;
class type_expression;
class assign_statement;
class if_statement;
class while_statement;
class return_statement;
class call_statement;
class traits_expression;
class block;
class program;
class binary_expression;
@ -93,10 +92,9 @@ namespace boot
virtual void visit(constant_definition *) = 0;
virtual void visit(procedure_definition *) = 0;
virtual void visit(type_definition *) = 0;
virtual void visit(call_expression *) = 0;
virtual void visit(procedure_call *) = 0;
virtual void visit(cast_expression *) = 0;
virtual void visit(type_expression *) = 0;
virtual void visit(call_statement *) = 0;
virtual void visit(traits_expression *) = 0;
virtual void visit(assign_statement *) = 0;
virtual void visit(if_statement *) = 0;
virtual void visit(while_statement *) = 0;
@ -134,10 +132,9 @@ namespace boot
virtual void visit(constant_definition *definition) override;
virtual void visit(procedure_definition *definition) override;
virtual void visit(type_definition *definition) override;
virtual void visit(call_expression *expression) override;
virtual void visit(traits_expression *trait) override;
virtual void visit(procedure_call *call) override;
virtual void visit(cast_expression *expression) override;
virtual void visit(type_expression *expression) override;
virtual void visit(call_statement *statement) override;
virtual void visit(assign_statement *statement) override;
virtual void visit(if_statement *) override;
virtual void visit(while_statement *) override;
@ -189,22 +186,16 @@ namespace boot
const struct position& position() const;
};
class statement : public node
class statement : public virtual node
{
protected:
/**
* \param position Source code position.
*/
explicit statement(const struct position position);
statement();
};
class expression : public node
class expression : public virtual node
{
protected:
/**
* \param position Source code position.
*/
explicit expression(const struct position position);
expression();
};
/**
@ -331,7 +322,7 @@ namespace boot
class literal : public expression
{
protected:
explicit literal(const struct position position);
literal();
};
/**
@ -435,31 +426,17 @@ namespace boot
virtual ~cast_expression() override;
};
/**
* Type inside an expression.
*/
class type_expression : public expression
class traits_expression : public expression
{
std::shared_ptr<top_type> m_body;
std::shared_ptr<top_type> m_type;
public:
type_expression(const struct position position, std::shared_ptr<top_type> body);
const std::string name;
traits_expression(const struct position position, const std::string& name, std::shared_ptr<top_type> type);
virtual void accept(parser_visitor *visitor) override;
top_type& body();
};
class call_statement : public statement
{
call_expression *m_body;
public:
call_statement(const struct position position, call_expression *body);
virtual void accept(parser_visitor *visitor) override;
call_expression& body();
virtual ~call_statement() override;
top_type& type();
};
/**
@ -501,19 +478,17 @@ namespace boot
virtual dereference_expression *is_dereference();
protected:
designator_expression(const struct position position);
designator_expression();
};
class variable_expression : public designator_expression
{
std::string m_name;
public:
const std::string name;
variable_expression(const struct position position, const std::string& name);
virtual void accept(parser_visitor *visitor) override;
const std::string& name() const;
variable_expression *is_variable() override;
};
@ -570,19 +545,19 @@ namespace boot
/**
* Procedure call expression.
*/
class call_expression : public expression
class procedure_call : public expression, public statement
{
designator_expression *m_callable;
public:
std::vector<expression *> arguments;
call_expression(const struct position position, designator_expression *callable);
procedure_call(const struct position position, designator_expression *callable);
virtual void accept(parser_visitor *visitor) override;
designator_expression& callable();
virtual ~call_expression() override;
virtual ~procedure_call() override;
};
class assign_statement : public statement
@ -676,7 +651,7 @@ namespace boot
T value;
number_literal(const struct position position, const T& value)
: literal(position), value(value)
: node(position), value(value)
{
}

View File

@ -60,15 +60,16 @@ namespace gcc
tree procedure_address, const std::vector<boot::expression *>& arguments);
void build_record_call(location_t call_location,
tree symbol, const std::vector<boot::expression *>& arguments);
void visit_statements(const std::vector<boot::statement *>& statements);
public:
generic_visitor(std::shared_ptr<symbol_table> symbol_table);
void visit(boot::program *program) override;
void visit(boot::procedure_definition *definition) override;
void visit(boot::call_expression *expression) override;
void visit(boot::procedure_call *call) override;
void visit(boot::cast_expression *expression) override;
void visit(boot::type_expression *expression) override;
void visit(boot::traits_expression *trait) override;
void visit(boot::number_literal<std::int32_t> *literal) override;
void visit(boot::number_literal<std::uint32_t> *literal) override;
void visit(boot::number_literal<double> *literal) override;
@ -85,10 +86,10 @@ namespace gcc
void visit(boot::array_access_expression *expression) override;
void visit(boot::field_access_expression *expression) override;
void visit(boot::dereference_expression *expression) override;
void visit(boot::block *block) override;
void visit(boot::assign_statement *statement) override;
void visit(boot::if_statement *statement) override;
void visit(boot::while_statement *statement) override;
void visit(boot::call_statement *statement) override;
void visit(boot::return_statement *statement) override;
void visit(boot::defer_statement *statement) override;
};

View File

@ -72,13 +72,28 @@ type
first: Position
last: Position
end
FILE* = record end
SourceFile* = record
buffer: [1024]Char
handle: ^FILE
size: Word
index: Word
end
StringBuffer* = record
data: ^Byte
size: Word
capacity: Word
end
SourceCode = record
position: Position
text: String
input: ^Byte
empty: proc(data: ^Byte) -> Bool
advance: proc(data: ^Byte)
head: proc(data: ^Byte) -> Char
end
TokenValue* = union
int_value: Int
string_value: ^Char
string: String
boolean_value: Bool
char_value: Char
@ -88,7 +103,6 @@ type
value: TokenValue
location: Location
end
FILE* = record end
CommandLine* = record
input: ^Char
tokenize: Bool
@ -119,8 +133,6 @@ proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
proc strlen(ptr: ^Char) -> Word; extern
proc strtol(nptr: ^Char, endptr: ^^Char, base: Int) -> Int; extern
proc perror(s: ^Char); extern
proc exit(code: Int) -> !; extern
@ -225,6 +237,41 @@ begin
return String(copy, origin.length)
end
proc string_buffer_new() -> StringBuffer;
var
result: StringBuffer
begin
result.capacity := 64u
result.data := malloc(result.capacity)
result.size := 0u
return result
end
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
begin
if buffer^.size >= buffer^.capacity then
buffer^.capacity := buffer^.capacity + 1024u
buffer^.data := realloc(buffer^.data, buffer^.capacity)
end
(buffer^.data + buffer^.size)^ := cast(char: Byte)
buffer^.size := buffer^.size + 1u
end
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
begin
buffer^.size := buffer^.size - count
end
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
var
result: String
begin
result := String(cast(buffer^.data: ^Char), buffer^.size)
buffer^.size := 0u
return result
end
(*
End of standard procedures.
*)
@ -234,36 +281,20 @@ begin
return Position(1u, 1u)
end
proc read_source(filename: ^Char, result: ^String) -> Bool;
proc read_source(filename: ^Char) -> ^SourceFile;
var
input_file: ^FILE
source_size: Int
input: ^Byte
result: ^SourceFile
file_handle: ^FILE
begin
input_file := fopen(filename, "rb\0".ptr)
file_handle := fopen(filename, "rb\0".ptr)
if input_file = nil then
return false
if file_handle <> nil then
result := cast(malloc(#size(SourceFile)): ^SourceFile)
result^.handle := file_handle
result^.size := 0u
result^.index := 1u
end
defer
fclose(input_file)
end
if fseek(input_file, 0, SEEK_END) <> 0 then
return false
end
source_size := ftell(input_file)
if source_size < 0 then
return false
end
rewind(input_file)
input := malloc(cast(source_size: Word))
if fread(input, cast(source_size: Word), 1u, input_file) <> 1u then
return false
end
result^ := String(cast(input: ^Char), cast(source_size: Word))
return true
return result
end
proc escape_char(escape: Char, result: ^Char) -> Bool;
@ -312,118 +343,169 @@ begin
return successful
end
proc advance_source(source_code: SourceCode, length: Word) -> SourceCode;
proc source_file_empty(source_input: ^Byte) -> Bool;
var
source_file: ^SourceFile
begin
source_code.text := open_substring(source_code.text, length)
source_code.position.column := source_code.position.column + length
source_file := cast(source_input: ^SourceFile)
return source_code
if source_file^.index > source_file^.size then
source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle)
source_file^.index := 1u
end
return source_file^.size = 0u
end
proc skip_spaces(source_code: SourceCode) -> SourceCode;
proc source_file_head(source_input: ^Byte) -> Char;
var
source_file: ^SourceFile
begin
while source_code.text.length > 0u and is_space(source_code.text[1u]) do
if source_code.text[1u] = '\n' then
source_code.position.line := source_code.position.line + 1u
source_code.position.column := 1u
else
source_code.position.column := source_code.position.column + 1u
end
source_code.text := open_substring(source_code.text, 1u)
end
return source_code
source_file := cast(source_input: ^SourceFile)
return source_file^.buffer[source_file^.index]
end
proc lex_identifier(source_code: ^SourceCode, token_content: ^String);
proc source_file_advance(source_input: ^Byte);
var
source_file: ^SourceFile
begin
source_file := cast(source_input: ^SourceFile)
source_file^.index := source_file^.index + 1u
end
proc source_code_empty(source_code: ^SourceCode) -> Bool;
begin
return source_code^.empty(source_code^.input)
end
proc source_code_head(source_code: SourceCode) -> Char;
begin
return source_code.head(source_code.input)
end
proc source_code_advance(source_code: ^SourceCode);
begin
source_code^.advance(source_code^.input)
source_code^.position.column := source_code^.position.column
end
proc source_code_break(source_code: ^SourceCode);
begin
source_code^.position.line := source_code^.position.line + 1u
source_code^.position.column := 0u
end
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
begin
return not source_code_empty(source_code) and source_code_head(source_code^) = expected
end
proc skip_spaces(source_code: ^SourceCode);
begin
while not source_code_empty(source_code) and is_space(source_code_head(source_code^)) do
if source_code_head(source_code^) = '\n' then
source_code_break(source_code)
end
source_code_advance(source_code)
end
end
proc is_ident(char: Char) -> Bool;
begin
return is_alnum(char) or char = '_'
end
proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
var
content_length: Word
begin
content_length := 0u
token_content^ := source_code^.text
while is_alnum(source_code^.text[1u]) or source_code^.text[1u] = '_' do
content_length := content_length + 1u
source_code^ := advance_source(source_code^, 1u)
while not source_code_empty(source_code) and is_ident(source_code_head(source_code^)) do
string_buffer_push(token_content, source_code_head(source_code^))
source_code_advance(source_code)
end
token_content^ := substring(token_content^, 0u, content_length)
end
proc lex_comment(source_code: ^SourceCode, token_content: ^String) -> Bool;
proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
content_length: Word
trailing: Word
begin
content_length := 0u
token_content^ := source_code^.text
trailing := 0u
while source_code^.text.length > 0u and trailing < 2u do
if source_code^.text[1u] = '*' then
content_length := content_length + trailing
while not source_code_empty(source_code) and trailing < 2u do
if source_code_head(source_code^) = '*' then
string_buffer_push(token_content, '*')
trailing := 1u
elsif source_code^.text[1u] = ')' and trailing = 1u then
elsif source_code_head(source_code^) = ')' and trailing = 1u then
string_buffer_pop(token_content, 1u)
trailing := 2u
else
content_length := content_length + trailing + 1u
string_buffer_push(token_content, source_code_head(source_code^))
trailing := 0u
end
source_code^ := advance_source(source_code^, 1u)
source_code_advance(source_code)
end
return trailing = 2u
end
proc lex_character(input: ^Char, current_token: ^Token) -> ^Char;
proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
var
successful: Bool
begin
if input^ = '\\' then
input := input + 1
if escape_char(input^, @current_token^.value.char_value) then
input := input + 1
successful := not source_code_empty(source_code)
if successful then
if source_code_head(source_code^) = '\\' then
source_code_advance(source_code)
successful := not source_code_empty(source_code) and escape_char(source_code_head(source_code^), token_content)
else
token_content^ := source_code_head(source_code^)
successful := true
end
elsif input^ <> '\0' then
current_token^.value.char_value := input^
input := input + 1
end
return input
if successful then
source_code_advance(source_code)
end
return successful
end
proc lex_string(input: ^Char, current_token: ^Token) -> ^Char;
proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
var
token_end, constructed_string: ^Char
token_length: Word
is_valid: Bool
next_char: Char
begin
token_end := input
while token_end^ <> '\0' and not ((token_end - 1)^ <> '\\' and token_end^ = '"') do
token_end := token_end + 1
end
if token_end^ <> '\"' then
return input
end
token_length := cast(token_end - input: Word)
current_token^.value.string_value := cast(calloc(token_length, 1u): ^Char)
is_valid := true
constructed_string := current_token^.value.string_value
while input < token_end and is_valid do
if input^ = '\\' then
input := input + 1
if escape_char(input^, constructed_string) then
input := input + 1
while is_valid and not source_code_empty(source_code) and source_code_head(source_code^) <> '"' do
is_valid := lex_character(source_code, @next_char)
if is_valid then
string_buffer_push(token_content, next_char)
end
end
if is_valid and source_code_expect(source_code, '"') then
source_code_advance(source_code)
else
is_valid := false
end
elsif input^ <> '\0' then
constructed_string^ := input^
input := input + 1
end
return is_valid
end
constructed_string := constructed_string + 1
end
proc lex_number(source_code: ^SourceCode, token_content: ^Int);
begin
token_content^ := 0
return token_end
while not source_code_empty(source_code) and is_digit(source_code_head(source_code^)) do
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int))
source_code_advance(source_code)
end
end
proc print_tokens(tokens: ^Token, tokens_size: Word);
@ -647,46 +729,43 @@ end
proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token;
var
token_end: ^Char
tokens, current_token: ^Token
token_length: Word
first_char: Char
token_content: String
token_buffer: StringBuffer
begin
tokens_size^ := 0u
tokens := nil
source_code := skip_spaces(source_code)
token_buffer := string_buffer_new()
while source_code.text.length <> 0u do
tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, Token.size): ^Token)
skip_spaces(@source_code)
while not source_code_empty(@source_code) do
tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, #size(Token)): ^Token)
current_token := tokens + tokens_size^
first_char := source_code.text[1u]
first_char := source_code_head(source_code)
if is_alpha(first_char) or first_char = '_' then
lex_identifier(@source_code, @token_content)
current_token^ := categorize_identifier(token_content)
lex_identifier(@source_code, @token_buffer)
current_token^ := categorize_identifier(string_buffer_clear(@token_buffer))
elsif is_digit(first_char) then
token_end := nil
current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10)
token_length := cast(token_end - source_code.text.ptr: Word)
lex_number(@source_code, @current_token^.value.int_value)
if token_end^ = 'u' then
if source_code_expect(@source_code, 'u') then
current_token^.kind := TOKEN_WORD
source_code := advance_source(source_code, token_length + 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_INTEGER
source_code := advance_source(source_code, token_length)
end
elsif first_char = '(' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_LEFT_PAREN
elsif source_code.text[1u] = '*' then
source_code := advance_source(source_code, 1u)
elsif source_code_head(source_code) = '*' then
source_code_advance(@source_code)
if lex_comment(@source_code, @token_content) then
current_token^.value.string := string_dup(token_content)
if lex_comment(@source_code, @token_buffer) then
current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
current_token^.kind := TOKEN_COMMENT
else
current_token^.kind := 0
@ -696,125 +775,125 @@ begin
end
elsif first_char = ')' then
current_token^.kind := TOKEN_RIGHT_PAREN
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '\'' then
token_end := lex_character(source_code.text.ptr + 1, current_token)
token_length := cast(token_end - source_code.text.ptr: Word)
source_code_advance(@source_code)
if token_end^ = '\'' then
if lex_character(@source_code, @current_token^.value.char_value) and source_code_expect(@source_code, '\'') then
current_token^.kind := TOKEN_CHARACTER
source_code := advance_source(source_code, token_length + 1u)
source_code_advance(@source_code)
else
source_code := advance_source(source_code, 1u)
current_token^.kind := 0
end
elsif first_char = '"' then
token_end := lex_string(source_code.text.ptr + 1, current_token)
source_code_advance(@source_code)
if token_end^ = '"' then
if lex_string(@source_code, @token_buffer) then
current_token^.kind := TOKEN_STRING
token_length := cast(token_end - source_code.text.ptr: Word)
source_code := advance_source(source_code, token_length + 1u)
current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
else
current_token^.kind := 0
end
elsif first_char = '[' then
current_token^.kind := TOKEN_LEFT_SQUARE
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ']' then
current_token^.kind := TOKEN_RIGHT_SQUARE
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '>' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_GREATER_THAN
elsif source_code.text[1u] = '=' then
elsif source_code_head(source_code) = '=' then
current_token^.kind := TOKEN_GREATER_EQUAL
source_code := advance_source(source_code, 1u)
elsif source_code.text[1u] = '>' then
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token^.kind := TOKEN_SHIFT_RIGHT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_GREATER_THAN
end
elsif first_char = '<' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_LESS_THAN
elsif source_code.text[1u] = '=' then
elsif source_code_head(source_code) = '=' then
current_token^.kind := TOKEN_LESS_EQUAL
source_code := advance_source(source_code, 1u)
elsif source_code.text[1u] = '<' then
source_code_advance(@source_code)
elsif source_code_head(source_code) = '<' then
current_token^.kind := TOKEN_SHIFT_LEFT
source_code := advance_source(source_code, 1u)
elsif source_code.text[1u] = '>' then
source_code_advance(@source_code)
elsif source_code_head(source_code) = '>' then
current_token^.kind := TOKEN_NOT_EQUAL
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_LESS_THAN
end
elsif first_char = '=' then
current_token^.kind := TOKEN_EQUAL
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ';' then
current_token^.kind := TOKEN_SEMICOLON
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '.' then
current_token^.kind := TOKEN_DOT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ',' then
current_token^.kind := TOKEN_COMMA
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '+' then
current_token^.kind := TOKEN_PLUS
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '-' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_MINUS
elsif source_code.text[1u] = '>' then
elsif source_code_head(source_code) = '>' then
current_token^.kind := TOKEN_ARROW
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_MINUS
end
elsif first_char = '*' then
current_token^.kind := TOKEN_MULTIPLICATION
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '/' then
current_token^.kind := TOKEN_DIVISION
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '%' then
current_token^.kind := TOKEN_REMAINDER
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = ':' then
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
if source_code.text.length = 0u then
if source_code_empty(@source_code) then
current_token^.kind := TOKEN_COLON
elsif source_code.text[1u] = '=' then
elsif source_code_head(source_code) = '=' then
current_token^.kind := TOKEN_ASSIGNMENT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := TOKEN_COLON
end
elsif first_char = '^' then
current_token^.kind := TOKEN_HAT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '@' then
current_token^.kind := TOKEN_AT
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
elsif first_char = '!' then
current_token^.kind := TOKEN_EXCLAMATION
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
else
current_token^.kind := 0
source_code := advance_source(source_code, 1u)
source_code_advance(@source_code)
end
if current_token^.kind <> 0 then
tokens_size^ := tokens_size^ + 1u
source_code := skip_spaces(source_code)
skip_spaces(@source_code)
else
write_s("Lexical analysis error on \"")
write_c(first_char)
@ -832,7 +911,7 @@ var
result: ^CommandLine
begin
i := 1
result := cast(malloc(CommandLine.size): ^CommandLine)
result := cast(malloc(#size(CommandLine)): ^CommandLine)
result^.tokenize := false
result^.syntax_tree := false
result^.input := nil
@ -872,23 +951,38 @@ var
tokens_size: Word
source_code: SourceCode
command_line: ^CommandLine
return_code: Int
begin
return_code := 0
command_line := parse_command_line(argc, argv)
if command_line = nil then
return 2
return_code := 2
end
if return_code = 0 then
source_code.position := make_position()
if not read_source(command_line^.input, @source_code.text) then
source_code.input := cast(read_source(command_line^.input): ^Byte)
source_code.empty := source_file_empty
source_code.head := source_file_head
source_code.advance := source_file_advance
if source_code.input = nil then
perror(command_line^.input)
return 3
return_code := 3
end
end
if return_code = 0 then
tokens := tokenize(source_code, @tokens_size)
fclose(cast(source_code.input: ^SourceFile)^.handle)
if command_line^.tokenize then
print_tokens(tokens, tokens_size)
end
return 0
end
return return_code
end
begin