Support one hardcoded import

This commit is contained in:
2025-07-10 00:43:17 +02:00
parent 181b19eefe
commit 34abb6b4f5
18 changed files with 396 additions and 312 deletions

View File

@ -17,46 +17,67 @@ along with GCC; see the file COPYING3. If not see
#include "elna/boot/dependency.h" #include "elna/boot/dependency.h"
#include <fstream>
#include <sstream>
#include <string.h>
#include "elna/boot/driver.h" #include "elna/boot/driver.h"
#include "elna/boot/semantic.h"
#include "parser.hh" #include "parser.hh"
namespace elna::boot namespace elna::boot
{ {
dependency_graph::dependency_graph() dependency::dependency(const char *path)
: error_container(path)
{ {
} }
dependency_graph::dependency_graph(error_list&& errors) dependency read_sources(std::istream& entry_point, const char *entry_path)
: m_errors(std::move(errors))
{
}
bool dependency_graph::has_errors() const
{
return !errors().empty();
}
const error_list& dependency_graph::errors() const
{
return m_errors;
}
dependency_graph read_sources(std::istream& entry_point, const char *entry_path)
{ {
driver parse_driver{ entry_path }; driver parse_driver{ entry_path };
lexer tokenizer(entry_point); lexer tokenizer(entry_point);
yy::parser parser(tokenizer, parse_driver); yy::parser parser(tokenizer, parse_driver);
dependency outcome{ entry_path };
if (parser()) if (parser())
{ {
return dependency_graph(std::move(parse_driver.errors())); std::swap(outcome.errors(), parse_driver.errors());
return outcome;
} }
else else
{ {
dependency_graph outcome; std::swap(outcome.tree, parse_driver.tree);
outcome.modules.emplace_back(std::move(parse_driver.tree)); }
declaration_visitor declaration_visitor(entry_path);
outcome.tree->accept(&declaration_visitor);
if (!declaration_visitor.errors().empty())
{
std::swap(outcome.errors(), parse_driver.errors());
}
outcome.unresolved = declaration_visitor.unresolved;
return outcome; return outcome;
} }
std::filesystem::path build_path(const std::vector<std::string>& segments)
{
std::filesystem::path result;
std::vector<std::string>::const_iterator segment_iterator = std::cbegin(segments);
if (segment_iterator == std::cend(segments))
{
return result;
}
result = *segment_iterator;
++segment_iterator;
for (; segment_iterator != std::cend(segments); ++segment_iterator)
{
result /= *segment_iterator;
}
result.replace_extension(".elna");
return result;
} }
} }

View File

@ -182,7 +182,7 @@ program:
} }
| "module" ";" import_part constant_part type_part variable_part procedure_part "end" "." | "module" ";" import_part constant_part type_part variable_part procedure_part "end" "."
{ {
auto tree = new boot::program(boot::make_position(@1)); auto tree = new boot::unit(boot::make_position(@1));
std::swap(tree->imports, $3); std::swap(tree->imports, $3);
std::swap(tree->constants, $4); std::swap(tree->constants, $4);

View File

@ -43,4 +43,9 @@ namespace elna::boot
{ {
return m_errors; return m_errors;
} }
bool error_container::has_errors() const
{
return !m_errors.empty();
}
} }

View File

@ -74,9 +74,8 @@ namespace elna::boot
return message; return message;
} }
name_analysis_visitor::name_analysis_visitor(const char *path, std::shared_ptr<symbol_table> symbols, name_analysis_visitor::name_analysis_visitor(const char *path, symbol_bag bag)
std::unordered_map<std::string, std::shared_ptr<alias_type>>&& unresolved) : error_container(path), bag(bag)
: error_container(path), symbols(symbols), unresolved(std::move(unresolved))
{ {
} }
@ -120,20 +119,20 @@ namespace elna::boot
void name_analysis_visitor::visit(type_declaration *definition) void name_analysis_visitor::visit(type_declaration *definition)
{ {
definition->body().accept(this); definition->body().accept(this);
auto unresolved_declaration = this->unresolved.at(definition->identifier.identifier); auto unresolved_declaration = this->bag.unresolved.at(definition->identifier.identifier);
unresolved_declaration->reference = this->current_type; unresolved_declaration->reference = this->current_type;
} }
void name_analysis_visitor::visit(named_type_expression *type_expression) void name_analysis_visitor::visit(named_type_expression *type_expression)
{ {
auto unresolved_alias = this->unresolved.find(type_expression->name); auto unresolved_alias = this->bag.unresolved.find(type_expression->name);
if (unresolved_alias != this->unresolved.end()) if (unresolved_alias != this->bag.unresolved.end())
{ {
this->current_type = type(unresolved_alias->second); this->current_type = type(unresolved_alias->second);
} }
else if (auto from_symbol_table = this->symbols->lookup(type_expression->name)) else if (auto from_symbol_table = this->bag.lookup(type_expression->name))
{ {
this->current_type = from_symbol_table->is_type()->symbol; this->current_type = from_symbol_table->is_type()->symbol;
} }
@ -214,28 +213,24 @@ namespace elna::boot
{ {
declaration->variable_type().accept(this); declaration->variable_type().accept(this);
this->symbols->enter(declaration->identifier.identifier, this->bag.enter(declaration->identifier.identifier, std::make_shared<variable_info>(this->current_type));
std::make_shared<variable_info>(this->current_type));
} }
void name_analysis_visitor::visit(constant_declaration *definition) void name_analysis_visitor::visit(constant_declaration *definition)
{ {
definition->body().accept(this); definition->body().accept(this);
this->symbols->enter(definition->identifier.identifier, this->bag.enter(definition->identifier.identifier, std::make_shared<constant_info>(this->current_literal));
std::make_shared<constant_info>(this->current_literal));
} }
void name_analysis_visitor::visit(procedure_declaration *definition) void name_analysis_visitor::visit(procedure_declaration *definition)
{ {
std::shared_ptr<procedure_info> info; std::shared_ptr<procedure_info> info;
auto heading = build_procedure(definition->heading());
if (definition->body.has_value()) if (definition->body.has_value())
{ {
info = std::make_shared<procedure_info>(build_procedure(definition->heading()), info = std::make_shared<procedure_info>(heading, definition->parameter_names, this->bag.enter());
definition->parameter_names, this->symbols);
this->symbols = info->symbols;
for (constant_declaration *const constant : definition->body.value().constants()) for (constant_declaration *const constant : definition->body.value().constants())
{ {
@ -249,14 +244,13 @@ namespace elna::boot
{ {
statement->accept(this); statement->accept(this);
} }
this->symbols = this->symbols->scope(); this->bag.leave();
} }
else else
{ {
info = std::make_shared<procedure_info>(build_procedure(definition->heading()), info = std::make_shared<procedure_info>(heading, definition->parameter_names);
definition->parameter_names);
} }
this->symbols->enter(definition->identifier.identifier, info); this->bag.enter(definition->identifier.identifier, info);
} }
void name_analysis_visitor::visit(assign_statement *statement) void name_analysis_visitor::visit(assign_statement *statement)
@ -379,14 +373,14 @@ namespace elna::boot
{ {
type->accept(this); type->accept(this);
} }
for (auto& unresolved : this->unresolved) for (auto& unresolved : this->bag.unresolved)
{ {
std::vector<std::string> path; std::vector<std::string> path;
if (check_unresolved_symbol(unresolved.second, path)) if (check_unresolved_symbol(unresolved.second, path))
{ {
auto info = std::make_shared<type_info>(type_info(type(unresolved.second))); auto info = std::make_shared<type_info>(type_info(type(unresolved.second)));
this->symbols->enter(std::move(unresolved.first), info); this->bag.enter(unresolved.first, info);
} }
else else
{ {

View File

@ -313,13 +313,9 @@ namespace elna::boot
} }
procedure_info::procedure_info(const procedure_type symbol, const std::vector<std::string> names, procedure_info::procedure_info(const procedure_type symbol, const std::vector<std::string> names,
std::shared_ptr<symbol_table> parent_table) std::shared_ptr<symbol_table> scope)
: symbol(symbol), names(names) : symbol(symbol), names(names), symbols(scope)
{ {
if (parent_table != nullptr)
{
this->symbols = std::make_shared<symbol_table>(parent_table);
}
} }
std::shared_ptr<procedure_info> procedure_info::is_procedure() std::shared_ptr<procedure_info> procedure_info::is_procedure()
@ -361,4 +357,57 @@ namespace elna::boot
return result; return result;
} }
symbol_bag::symbol_bag()
{
this->symbols = std::make_shared<symbol_table>();
}
symbol_bag::symbol_bag(forward_table&& unresolved, std::shared_ptr<symbol_table> symbols)
: symbols(symbols), unresolved(unresolved)
{
}
std::shared_ptr<info> symbol_bag::lookup(const std::string& name)
{
for (auto import_bag : this->imports)
{
if (auto result = import_bag->lookup(name))
{
return result;
}
}
return this->symbols->lookup(name);
}
bool symbol_bag::enter(const std::string& name, std::shared_ptr<info> entry)
{
return this->symbols->enter(name, entry);
}
std::shared_ptr<symbol_table> symbol_bag::enter()
{
this->symbols = std::make_shared<symbol_table>(this->symbols);
return this->symbols;
}
void symbol_bag::enter(std::shared_ptr<symbol_table> child)
{
this->symbols = child;
}
void symbol_bag::leave()
{
this->symbols = this->symbols->scope();
}
void symbol_bag::add_import(std::shared_ptr<symbol_table> table)
{
this->imports.push_front(table);
}
void symbol_bag::add_import(const symbol_bag& bag)
{
add_import(bag.symbols);
}
} }

View File

@ -29,16 +29,14 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h" #include "stringpool.h"
#include "diagnostic.h" #include "diagnostic.h"
#include "realmpfr.h" #include "realmpfr.h"
#include "stor-layout.h"
#include "varasm.h" #include "varasm.h"
#include "fold-const.h" #include "fold-const.h"
#include "langhooks.h" #include "langhooks.h"
namespace elna::gcc namespace elna::gcc
{ {
generic_visitor::generic_visitor(std::shared_ptr<symbol_table> symbol_table, generic_visitor::generic_visitor(std::shared_ptr<symbol_table> symbol_table, elna::boot::symbol_bag bag)
std::shared_ptr<boot::symbol_table> info_table) : bag(bag), symbols(symbol_table)
: symbols(symbol_table), info_table(info_table)
{ {
} }
@ -321,7 +319,7 @@ namespace elna::gcc
DECL_STRUCT_FUNCTION(fndecl)->language = ggc_cleared_alloc<language_function>(); DECL_STRUCT_FUNCTION(fndecl)->language = ggc_cleared_alloc<language_function>();
enter_scope(); enter_scope();
this->info_table = this->info_table->lookup(definition->identifier.identifier)->is_procedure()->symbols; this->bag.enter(this->bag.lookup(definition->identifier.identifier)->is_procedure()->symbols);
tree argument_chain = DECL_ARGUMENTS(fndecl); tree argument_chain = DECL_ARGUMENTS(fndecl);
for (; argument_chain != NULL_TREE; argument_chain = TREE_CHAIN(argument_chain)) for (; argument_chain != NULL_TREE; argument_chain = TREE_CHAIN(argument_chain))
@ -339,7 +337,7 @@ namespace elna::gcc
visit_statements(definition->body.value().body()); visit_statements(definition->body.value().body());
tree mapping = leave_scope(); tree mapping = leave_scope();
this->info_table = this->info_table->scope(); this->bag.leave();
BLOCK_SUPERCONTEXT(BIND_EXPR_BLOCK(mapping)) = fndecl; BLOCK_SUPERCONTEXT(BIND_EXPR_BLOCK(mapping)) = fndecl;
DECL_INITIAL(fndecl) = BIND_EXPR_BLOCK(mapping); DECL_INITIAL(fndecl) = BIND_EXPR_BLOCK(mapping);
@ -746,7 +744,7 @@ namespace elna::gcc
void generic_visitor::visit(boot::variable_declaration *declaration) void generic_visitor::visit(boot::variable_declaration *declaration)
{ {
this->current_expression = get_inner_alias( this->current_expression = get_inner_alias(
this->info_table->lookup(declaration->identifier.identifier)->is_variable()->symbol, this->bag.lookup(declaration->identifier.identifier)->is_variable()->symbol,
this->symbols); this->symbols);
location_t declaration_location = get_location(&declaration->position()); location_t declaration_location = get_location(&declaration->position());

View File

@ -62,63 +62,62 @@ static bool elna_langhook_init(void)
return true; return true;
} }
static void elna_parse_file(const char *filename) using dependency_state = elna::boot::dependency_state<std::shared_ptr<elna::gcc::symbol_table>>;
{
std::ifstream file{ filename, std::ios::in };
if (!file) static elna::boot::dependency elna_parse_file(dependency_state& state, const char *filename)
{ {
fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename); auto module_table = std::make_shared<elna::boot::symbol_table>(state.globals);
std::ifstream entry_point{ filename, std::ios::in };
if (!entry_point)
{
fatal_error(UNKNOWN_LOCATION, "Cannot open filename %s: %m", filename);
} }
elna::boot::dependency_graph outcome = elna::boot::read_sources(file, filename);
std::shared_ptr<elna::boot::symbol_table> info_table = elna::boot::builtin_symbol_table();
std::shared_ptr<elna::gcc::symbol_table> symbol_table = elna::gcc::builtin_symbol_table();
linemap_add(line_table, LC_ENTER, 0, filename, 1); linemap_add(line_table, LC_ENTER, 0, filename, 1);
elna::boot::dependency outcome = elna::boot::read_sources(entry_point, filename);
if (outcome.has_errors()) if (outcome.has_errors())
{ {
elna::gcc::report_errors(outcome.errors()); elna::gcc::report_errors(outcome.errors());
} }
else elna::boot::symbol_bag outcome_bag = elna::boot::symbol_bag{ std::move(outcome.unresolved), module_table };
{
for (const std::unique_ptr<elna::boot::program>& module_tree : outcome.modules)
{
elna::boot::declaration_visitor declaration_visitor(filename);
declaration_visitor.visit(module_tree.get());
if (declaration_visitor.errors().empty()) for (const auto& sub_tree : outcome.tree->imports)
{ {
elna::boot::name_analysis_visitor name_analysis_visitor(filename, info_table, std::filesystem::path sub_path = "source" / elna::boot::build_path(sub_tree->segments);
std::move(declaration_visitor.unresolved));
name_analysis_visitor.visit(module_tree.get());
if (name_analysis_visitor.errors().empty()) if (state.cache.find(sub_path) == state.cache.end())
{ {
elna::gcc::rewrite_symbol_table(info_table, symbol_table); elna_parse_file(state, state.allocate_path(sub_path));
elna::gcc::generic_visitor generic_visitor{ symbol_table, info_table };
generic_visitor.visit(module_tree.get());
} }
else outcome_bag.add_import(state.cache.find(sub_path)->second);
}
elna::boot::name_analysis_visitor name_analysis_visitor(filename, outcome_bag);
outcome.tree->accept(&name_analysis_visitor);
if (name_analysis_visitor.has_errors())
{ {
elna::gcc::report_errors(name_analysis_visitor.errors()); elna::gcc::report_errors(name_analysis_visitor.errors());
} }
} state.cache.insert({ filename, outcome_bag });
else elna::gcc::rewrite_symbol_table(module_table, state.custom);
{
elna::gcc::report_errors(declaration_visitor.errors());
}
}
}
linemap_add(line_table, LC_LEAVE, 0, NULL, 0); linemap_add(line_table, LC_LEAVE, 0, NULL, 0);
return outcome;
} }
static void elna_langhook_parse_file(void) static void elna_langhook_parse_file(void)
{ {
dependency_state state{ elna::gcc::builtin_symbol_table() };
for (unsigned int i = 0; i < num_in_fnames; i++) for (unsigned int i = 0; i < num_in_fnames; i++)
{ {
elna_parse_file(in_fnames[i]); elna::boot::dependency outcome = elna_parse_file(state, in_fnames[i]);
linemap_add(line_table, LC_ENTER, 0, in_fnames[i], 1);
elna::gcc::generic_visitor generic_visitor{ state.custom, state.cache.find(in_fnames[i])->second };
outcome.tree->accept(&generic_visitor);
linemap_add(line_table, LC_LEAVE, 0, NULL, 0);
} }
} }

View File

@ -17,25 +17,48 @@ along with GCC; see the file COPYING3. If not see
#pragma once #pragma once
#include <filesystem>
#include <fstream> #include <fstream>
#include "elna/boot/result.h" #include "elna/boot/result.h"
#include "elna/boot/ast.h" #include "elna/boot/ast.h"
namespace elna::boot namespace elna::boot
{ {
class dependency_graph class dependency : public error_container
{ {
error_list m_errors; error_list m_errors;
public: public:
std::vector<std::unique_ptr<program>> modules; std::unique_ptr<unit> tree;
std::unordered_map<std::string, std::shared_ptr<alias_type>> unresolved;
bool has_errors() const; explicit dependency(const char *path);
const error_list& errors() const;
dependency_graph();
explicit dependency_graph(error_list&& errors);
}; };
dependency_graph read_sources(std::istream& entry_point, const char *entry_path); dependency read_sources(std::istream& entry_point, const char *entry_path);
std::filesystem::path build_path(const std::vector<std::string>& segments);
template<typename T>
struct dependency_state
{
const std::shared_ptr<symbol_table> globals;
T custom;
std::unordered_map<std::filesystem::path, elna::boot::symbol_bag> cache;
explicit dependency_state(T custom)
: globals(elna::boot::builtin_symbol_table()), custom(custom)
{
}
const char *allocate_path(const std::filesystem::path path)
{
std::size_t current_size = this->allocated_paths.size();
this->allocated_paths += path.native() + '\0';
return this->allocated_paths.data() + current_size;
}
private:
std::string allocated_paths;
};
} }

View File

@ -39,7 +39,7 @@ namespace elna::boot
class driver : public error_container class driver : public error_container
{ {
public: public:
std::unique_ptr<program> tree; std::unique_ptr<unit> tree;
driver(const char *input_file); driver(const char *input_file);
}; };

View File

@ -81,6 +81,8 @@ namespace elna::boot
auto new_error = std::make_unique<T>(arguments...); auto new_error = std::make_unique<T>(arguments...);
m_errors.emplace_back(std::move(new_error)); m_errors.emplace_back(std::move(new_error));
} }
bool has_errors() const;
}; };
/** /**

View File

@ -77,8 +77,7 @@ namespace elna::boot
type current_type; type current_type;
constant_info::variant current_literal; constant_info::variant current_literal;
std::shared_ptr<symbol_table> symbols; symbol_bag bag;
std::unordered_map<std::string, std::shared_ptr<alias_type>> unresolved;
procedure_type build_procedure(procedure_type_expression& type_expression); procedure_type build_procedure(procedure_type_expression& type_expression);
std::vector<type_field> build_composite_type(const std::vector<field_declaration>& fields); std::vector<type_field> build_composite_type(const std::vector<field_declaration>& fields);
@ -87,8 +86,7 @@ namespace elna::boot
std::vector<std::string>& path); std::vector<std::string>& path);
public: public:
explicit name_analysis_visitor(const char *path, std::shared_ptr<symbol_table> symbols, name_analysis_visitor(const char *path, symbol_bag bag);
std::unordered_map<std::string, std::shared_ptr<alias_type>>&& unresolved);
void visit(named_type_expression *type_expression) override; void visit(named_type_expression *type_expression) override;
void visit(array_type_expression *type_expression) override; void visit(array_type_expression *type_expression) override;

View File

@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
#include <string> #include <string>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <forward_list>
#include "elna/boot/result.h" #include "elna/boot/result.h"
@ -281,6 +282,7 @@ namespace elna::boot
}; };
using symbol_table = symbol_map<std::shared_ptr<info>, std::nullptr_t, nullptr>; using symbol_table = symbol_map<std::shared_ptr<info>, std::nullptr_t, nullptr>;
using forward_table = std::unordered_map<std::string, std::shared_ptr<alias_type>>;
class type_info : public info class type_info : public info
{ {
@ -299,7 +301,7 @@ namespace elna::boot
std::shared_ptr<symbol_table> symbols; std::shared_ptr<symbol_table> symbols;
procedure_info(const procedure_type symbol, const std::vector<std::string> names, procedure_info(const procedure_type symbol, const std::vector<std::string> names,
std::shared_ptr<symbol_table> parent_table = nullptr); std::shared_ptr<symbol_table> scope = nullptr);
std::shared_ptr<procedure_info> is_procedure() override; std::shared_ptr<procedure_info> is_procedure() override;
}; };
@ -325,4 +327,25 @@ namespace elna::boot
}; };
std::shared_ptr<symbol_table> builtin_symbol_table(); std::shared_ptr<symbol_table> builtin_symbol_table();
class symbol_bag
{
std::shared_ptr<symbol_table> symbols;
std::forward_list<std::shared_ptr<symbol_table>> imports;
public:
forward_table unresolved;
symbol_bag();
symbol_bag(forward_table&& unresolved, std::shared_ptr<symbol_table> symbols);
std::shared_ptr<info> lookup(const std::string& name);
bool enter(const std::string& name, std::shared_ptr<info> entry);
std::shared_ptr<symbol_table> enter();
void enter(std::shared_ptr<symbol_table> child);
void leave();
void add_import(std::shared_ptr<symbol_table> table);
void add_import(const symbol_bag& bag);
};
} }

View File

@ -36,8 +36,8 @@ namespace elna::gcc
class generic_visitor final : public boot::parser_visitor class generic_visitor final : public boot::parser_visitor
{ {
tree current_expression{ NULL_TREE }; tree current_expression{ NULL_TREE };
elna::boot::symbol_bag bag;
std::shared_ptr<symbol_table> symbols; std::shared_ptr<symbol_table> symbols;
std::shared_ptr<boot::symbol_table> info_table;
void enter_scope(); void enter_scope();
tree leave_scope(); tree leave_scope();
@ -63,7 +63,7 @@ namespace elna::gcc
bool assert_constant(location_t expression_location); bool assert_constant(location_t expression_location);
public: public:
generic_visitor(std::shared_ptr<symbol_table> symbol_table, std::shared_ptr<boot::symbol_table> info_table); generic_visitor(std::shared_ptr<symbol_table> symbol_table, elna::boot::symbol_bag bag);
void visit(boot::program *program) override; void visit(boot::program *program) override;
void visit(boot::procedure_declaration *definition) override; void visit(boot::procedure_declaration *definition) override;

View File

@ -1,12 +0,0 @@
DEFINITION MODULE Common;
TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common.

View File

@ -1,3 +1,13 @@
module; module;
type
ShortString = [256]Char;
Identifier = [256]Char;
PIdentifier = ^Identifier;
TextLocation* = record
line: Word;
column: Word
end;
FILE* = record end;
end. end.

View File

@ -1,107 +0,0 @@
DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE
PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD
input: File;
buffer: PLexerBuffer;
size: CARDINAL;
length: CARDINAL;
start: BufferPosition;
current: BufferPosition
END;
PLexer = POINTER TO Lexer;
LexerKind = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
LexerToken = RECORD
CASE kind: LexerKind OF
lexerKindBoolean: booleanKind: BOOLEAN |
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString
END;
start_location: TextLocation;
end_location: TextLocation
END;
PLexerToken = POINTER TO LexerToken;
PROCEDURE lexer_initialize(lexer: PLexer; input: File);
PROCEDURE lexer_destroy(lexer: PLexer);
(* Returns the last read token. *)
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
(* Read and return the next token. *)
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
END Lexer.

View File

@ -1,17 +1,9 @@
module; module;
from FIO import ReadNBytes; import Common;
from SYSTEM import ADR, TSIZE;
from DynamicStrings import String, InitStringCharStar, KillString;
from StringConvert import StringToInteger;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
const const
CHUNK_SIZE = 85536; CHUNK_SIZE := 85536;
type type
(* (*
@ -62,20 +54,109 @@ type
transitionStateDecimalSuffix, transitionStateDecimalSuffix,
transitionStateEnd transitionStateEnd
); );
TransitionAction = proc(PLexer, PLexerToken); LexerToken = record
kind: LexerKind;
value: union
booleanKind: Bool;
identifierKind: Identifier;
integerKind: Int;
stringKind: ShortString
end;
start_location: TextLocation;
end_location: TextLocation
end;
TransitionAction = proc(^Lexer, ^LexerToken);
Transition = record Transition = record
action: TransitionAction; action: TransitionAction;
next_state: TransitionState next_state: TransitionState
end; end;
TransitionClasses = [22]Transition; TransitionClasses = [22]Transition;
BufferPosition* = record
iterator: ^Char;
location: TextLocation
end;
Lexer* = record
input: ^FILE;
buffer: ^Char;
size: Word;
length: Word;
start: BufferPosition;
current: BufferPosition
end;
LexerKind* = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
var var
classification: [128]TransitionClass; classification: [128]TransitionClass;
transitions: [16]TransitionClasses; transitions: [16]TransitionClasses;
proc initialize_classification(); proc initialize_classification();
var var
i: CARDINAL; i: Word;
begin begin
classification[1] := transitionClassEof; (* NUL *) classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *) classification[2] := transitionClassInvalid; (* SOH *)
@ -213,12 +294,12 @@ begin
end end
end; end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN; proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
var var
result: BOOLEAN; result: Bool;
index: CARDINAL; index: Word;
keyword_length: CARDINAL; keyword_length: Word;
continue: BOOLEAN; continue: Bool;
begin begin
index := 0; index := 0;
result := true; result := true;
@ -237,25 +318,25 @@ begin
end; end;
(* Reached the end of file. *) (* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken); proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
begin begin
token^.kind := lexerKindEof token^.kind := lexerKindEof
end; end;
proc increment(position: PBufferPosition); proc increment(position: ^BufferPosition);
begin begin
INC(position^.iterator) INC(position^.iterator)
end; end;
(* Add the character to the token currently read and advance to the next character. *) (* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken); proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
begin begin
increment(ADR(lexer^.current)) increment(ADR(lexer^.current))
end; end;
(* The current character is not a part of the token. Finish the token already (* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *) * read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken); proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
begin begin
if lexer^.start.iterator^ = ':' then if lexer^.start.iterator^ = ':' then
token^.kind := lexerKindColon token^.kind := lexerKindColon
@ -278,7 +359,7 @@ begin
end; end;
(* An action for tokens containing multiple characters. *) (* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken); proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
begin begin
if lexer^.start.iterator^ = '<' then if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then if lexer^.current.iterator^ = '>' then
@ -304,7 +385,7 @@ begin
end; end;
(* Skip a space. *) (* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken); proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
begin begin
increment(ADR(lexer^.start)); increment(ADR(lexer^.start));
@ -316,9 +397,9 @@ begin
end; end;
(* Delimited string action. *) (* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken); proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
var var
text_length: CARDINAL; text_length: Word;
begin begin
if lexer^.start.iterator^ = '(' then if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindComment token^.kind := lexerKindComment
@ -347,7 +428,7 @@ begin
end; end;
(* Finalize keyword or identifier. *) (* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken); proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
begin begin
token^.kind := lexerKindIdentifier; token^.kind := lexerKindIdentifier;
@ -355,95 +436,95 @@ begin
DEC(token^.identifierKind[1], lexer^.start.iterator); DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2])); MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('program', lexer^.start, lexer^.current.iterator) then if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProgram token^.kind := lexerKindProgram
end; end;
if compare_keyword('import', lexer^.start, lexer^.current.iterator) then if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImport token^.kind := lexerKindImport
end; end;
if compare_keyword('const', lexer^.start, lexer^.current.iterator) then if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindConst token^.kind := lexerKindConst
end; end;
if compare_keyword('var', lexer^.start, lexer^.current.iterator) then if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindVar token^.kind := lexerKindVar
end; end;
if compare_keyword('if', lexer^.start, lexer^.current.iterator) then if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindIf token^.kind := lexerKindIf
end; end;
if compare_keyword('then', lexer^.start, lexer^.current.iterator) then if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindThen token^.kind := lexerKindThen
end; end;
if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElsif token^.kind := lexerKindElsif
end; end;
if compare_keyword('else', lexer^.start, lexer^.current.iterator) then if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElse token^.kind := lexerKindElse
end; end;
if compare_keyword('while', lexer^.start, lexer^.current.iterator) then if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindWhile token^.kind := lexerKindWhile
end; end;
if compare_keyword('do', lexer^.start, lexer^.current.iterator) then if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDo token^.kind := lexerKindDo
end; end;
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProc token^.kind := lexerKindProc
end; end;
if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBegin token^.kind := lexerKindBegin
end; end;
if compare_keyword('end', lexer^.start, lexer^.current.iterator) then if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindEnd token^.kind := lexerKindEnd
end; end;
if compare_keyword('type', lexer^.start, lexer^.current.iterator) then if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindType token^.kind := lexerKindType
end; end;
if compare_keyword('record', lexer^.start, lexer^.current.iterator) then if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindRecord token^.kind := lexerKindRecord
end; end;
if compare_keyword('union', lexer^.start, lexer^.current.iterator) then if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindUnion token^.kind := lexerKindUnion
end; end;
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindNull token^.kind := lexerKindNull
end; end;
if compare_keyword('or', lexer^.start, lexer^.current.iterator) then if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOr token^.kind := lexerKindOr
end; end;
if compare_keyword('return', lexer^.start, lexer^.current.iterator) then if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindReturn token^.kind := lexerKindReturn
end; end;
if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDefer token^.kind := lexerKindDefer
end; end;
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindTo token^.kind := lexerKindTo
end; end;
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindCase token^.kind := lexerKindCase
end; end;
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOf token^.kind := lexerKindOf
end; end;
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindFrom token^.kind := lexerKindFrom
end; end;
if compare_keyword('module', lexer^.start, lexer^.current.iterator) then if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindModule token^.kind := lexerKindModule
end; end;
if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindXor token^.kind := lexerKindXor
end; end;
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindPointer token^.kind := lexerKindPointer
end; end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindArray token^.kind := lexerKindArray
end; end;
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean; token^.kind := lexerKindBoolean;
token^.booleanKind := true token^.booleanKind := true
end; end;
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean; token^.kind := lexerKindBoolean;
token^.booleanKind := false token^.booleanKind := false
end end
@ -451,7 +532,7 @@ end;
(* Action for tokens containing only one character. The character cannot be (* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *) * followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken); proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
begin begin
if lexer^.current.iterator^ = '&' then if lexer^.current.iterator^ = '&' then
token^.kind := lexerKindAnd token^.kind := lexerKindAnd
@ -502,11 +583,11 @@ begin
end; end;
(* Handle an integer literal. *) (* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken); proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
var var
buffer: String; buffer: String;
integer_length: CARDINAL; integer_length: Word;
found: BOOLEAN; found: Bool;
begin begin
token^.kind := lexerKindInteger; token^.kind := lexerKindInteger;
@ -805,7 +886,7 @@ begin
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
end; end;
proc lexer_initialize(lexer: PLexer, input: File); proc lexer_make*(lexer: ^Lexer, input: ^FILE);
begin begin
lexer^.input := input; lexer^.input := input;
lexer^.length := 0; lexer^.length := 0;
@ -815,19 +896,20 @@ begin
lexer^.size := CHUNK_SIZE lexer^.size := CHUNK_SIZE
end; end;
proc lexer_current(lexer: PLexer) -> LexerToken; (* Returns the last read token. *)
proc lexer_current*(lexer: ^Lexer) -> LexerToken;
var var
current_class: TransitionClass; current_class: TransitionClass;
current_state: TransitionState; current_state: TransitionState;
current_transition: Transition; current_transition: Transition;
result: LexerToken; result: LexerToken;
index1: CARDINAL; index1: Word;
index2: CARDINAL; index2: Word;
begin begin
lexer^.current := lexer^.start; lexer^.current := lexer^.start;
current_state := transitionStateStart; current_state := transitionStateStart;
while current_state <> transitionStateEnd DO while current_state <> transitionStateEnd do
index1 := ORD(lexer^.current.iterator^); index1 := ORD(lexer^.current.iterator^);
INC(index1); INC(index1);
current_class := classification[index1]; current_class := classification[index1];
@ -849,7 +931,8 @@ begin
return result return result
end; end;
proc lexer_lex(lexer: PLexer) -> LexerToken; (* Read and return the next token. *)
proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
var var
result: LexerToken; result: LexerToken;
begin begin
@ -865,12 +948,15 @@ begin
return result return result
end; end;
proc lexer_destroy(lexer: PLexer); proc lexer_destroy*(lexer: ^Lexer);
begin begin
DEALLOCATE(lexer^.buffer, lexer^.size) DEALLOCATE(lexer^.buffer, lexer^.size)
end; end;
proc lexer_initialize();
begin begin
initialize_classification(); initialize_classification();
initialize_transitions() initialize_transitions()
end;
end. end.

View File

@ -3,7 +3,7 @@
obtain one at https://mozilla.org/MPL/2.0/. *) obtain one at https://mozilla.org/MPL/2.0/. *)
program; program;
import dummy; import Common, Lexer;
const const
SEEK_SET* := 0; SEEK_SET* := 0;
@ -80,13 +80,9 @@ type
_module, _module,
_import _import
); );
Position* = record
line: Word;
column: Word
end;
Location* = record Location* = record
first: Position; first: TextLocation;
last: Position last: TextLocation
end; end;
SourceFile* = record SourceFile* = record
buffer: [1024]Char; buffer: [1024]Char;
@ -94,14 +90,13 @@ type
size: Word; size: Word;
index: Word index: Word
end; end;
FILE* = record end;
StringBuffer* = record StringBuffer* = record
data: Pointer; data: Pointer;
size: Word; size: Word;
capacity: Word capacity: Word
end; end;
SourceCode = record SourceCode = record
position: Position; position: TextLocation;
input: Pointer; input: Pointer;
empty: proc(Pointer) -> Bool; empty: proc(Pointer) -> Bool;
@ -123,7 +118,7 @@ type
lex: Bool; lex: Bool;
parse: Bool parse: Bool
end; end;
Lexer* = record Tokenizer* = record
length: Word; length: Word;
data: ^Token data: ^Token
end; end;
@ -592,7 +587,7 @@ begin
return current_token return current_token
end; end;
proc lexer_add_token(lexer: ^Lexer, token: Token); proc lexer_add_token(lexer: ^Tokenizer, token: Token);
var var
new_length: Word; new_length: Word;
begin begin
@ -778,13 +773,13 @@ begin
end; end;
(* Split the source text into tokens. *) (* Split the source text into tokens. *)
proc lexer_text(source_code: SourceCode) -> Lexer; proc lexer_text(source_code: SourceCode) -> Tokenizer;
var var
current_token: Token; current_token: Token;
token_buffer: StringBuffer; token_buffer: StringBuffer;
lexer: Lexer; lexer: Tokenizer;
begin begin
lexer := Lexer(0u, nil); lexer := Tokenizer(0u, nil);
token_buffer := string_buffer_new(); token_buffer := string_buffer_new();
lexer_spaces(@source_code); lexer_spaces(@source_code);
@ -1024,7 +1019,7 @@ end;
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int; proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
var var
return_code: Int; return_code: Int;
lexer: Lexer; lexer: Tokenizer;
begin begin
return_code := 0; return_code := 0;
@ -1068,7 +1063,7 @@ begin
fclose(source_file^.handle) fclose(source_file^.handle)
end; end;
source_code.position := Position(1u, 1u); source_code.position := TextLocation(1u, 1u);
source_code.input := cast(source_file: Pointer); source_code.input := cast(source_file: Pointer);
source_code.empty := source_file_empty; source_code.empty := source_file_empty;
source_code.head := source_file_head; source_code.head := source_file_head;