Support one hardcoded import

This commit is contained in:
2025-07-10 00:43:17 +02:00
parent 181b19eefe
commit 34abb6b4f5
18 changed files with 396 additions and 312 deletions

View File

@ -17,46 +17,67 @@ along with GCC; see the file COPYING3. If not see
#include "elna/boot/dependency.h"
#include <fstream>
#include <sstream>
#include <string.h>
#include "elna/boot/driver.h"
#include "elna/boot/semantic.h"
#include "parser.hh"
namespace elna::boot
{
dependency_graph::dependency_graph()
dependency::dependency(const char *path)
: error_container(path)
{
}
dependency_graph::dependency_graph(error_list&& errors)
: m_errors(std::move(errors))
{
}
bool dependency_graph::has_errors() const
{
return !errors().empty();
}
const error_list& dependency_graph::errors() const
{
return m_errors;
}
dependency_graph read_sources(std::istream& entry_point, const char *entry_path)
dependency read_sources(std::istream& entry_point, const char *entry_path)
{
driver parse_driver{ entry_path };
lexer tokenizer(entry_point);
yy::parser parser(tokenizer, parse_driver);
dependency outcome{ entry_path };
if (parser())
{
return dependency_graph(std::move(parse_driver.errors()));
std::swap(outcome.errors(), parse_driver.errors());
return outcome;
}
else
{
dependency_graph outcome;
outcome.modules.emplace_back(std::move(parse_driver.tree));
std::swap(outcome.tree, parse_driver.tree);
}
declaration_visitor declaration_visitor(entry_path);
outcome.tree->accept(&declaration_visitor);
if (!declaration_visitor.errors().empty())
{
std::swap(outcome.errors(), parse_driver.errors());
}
outcome.unresolved = declaration_visitor.unresolved;
return outcome;
}
std::filesystem::path build_path(const std::vector<std::string>& segments)
{
std::filesystem::path result;
std::vector<std::string>::const_iterator segment_iterator = std::cbegin(segments);
if (segment_iterator == std::cend(segments))
{
return result;
}
result = *segment_iterator;
++segment_iterator;
for (; segment_iterator != std::cend(segments); ++segment_iterator)
{
result /= *segment_iterator;
}
result.replace_extension(".elna");
return result;
}
}

View File

@ -182,7 +182,7 @@ program:
}
| "module" ";" import_part constant_part type_part variable_part procedure_part "end" "."
{
auto tree = new boot::program(boot::make_position(@1));
auto tree = new boot::unit(boot::make_position(@1));
std::swap(tree->imports, $3);
std::swap(tree->constants, $4);

View File

@ -43,4 +43,9 @@ namespace elna::boot
{
return m_errors;
}
bool error_container::has_errors() const
{
return !m_errors.empty();
}
}

View File

@ -74,9 +74,8 @@ namespace elna::boot
return message;
}
name_analysis_visitor::name_analysis_visitor(const char *path, std::shared_ptr<symbol_table> symbols,
std::unordered_map<std::string, std::shared_ptr<alias_type>>&& unresolved)
: error_container(path), symbols(symbols), unresolved(std::move(unresolved))
name_analysis_visitor::name_analysis_visitor(const char *path, symbol_bag bag)
: error_container(path), bag(bag)
{
}
@ -120,20 +119,20 @@ namespace elna::boot
void name_analysis_visitor::visit(type_declaration *definition)
{
definition->body().accept(this);
auto unresolved_declaration = this->unresolved.at(definition->identifier.identifier);
auto unresolved_declaration = this->bag.unresolved.at(definition->identifier.identifier);
unresolved_declaration->reference = this->current_type;
}
void name_analysis_visitor::visit(named_type_expression *type_expression)
{
auto unresolved_alias = this->unresolved.find(type_expression->name);
auto unresolved_alias = this->bag.unresolved.find(type_expression->name);
if (unresolved_alias != this->unresolved.end())
if (unresolved_alias != this->bag.unresolved.end())
{
this->current_type = type(unresolved_alias->second);
}
else if (auto from_symbol_table = this->symbols->lookup(type_expression->name))
else if (auto from_symbol_table = this->bag.lookup(type_expression->name))
{
this->current_type = from_symbol_table->is_type()->symbol;
}
@ -214,28 +213,24 @@ namespace elna::boot
{
declaration->variable_type().accept(this);
this->symbols->enter(declaration->identifier.identifier,
std::make_shared<variable_info>(this->current_type));
this->bag.enter(declaration->identifier.identifier, std::make_shared<variable_info>(this->current_type));
}
void name_analysis_visitor::visit(constant_declaration *definition)
{
definition->body().accept(this);
this->symbols->enter(definition->identifier.identifier,
std::make_shared<constant_info>(this->current_literal));
this->bag.enter(definition->identifier.identifier, std::make_shared<constant_info>(this->current_literal));
}
void name_analysis_visitor::visit(procedure_declaration *definition)
{
std::shared_ptr<procedure_info> info;
auto heading = build_procedure(definition->heading());
if (definition->body.has_value())
{
info = std::make_shared<procedure_info>(build_procedure(definition->heading()),
definition->parameter_names, this->symbols);
this->symbols = info->symbols;
info = std::make_shared<procedure_info>(heading, definition->parameter_names, this->bag.enter());
for (constant_declaration *const constant : definition->body.value().constants())
{
@ -249,14 +244,13 @@ namespace elna::boot
{
statement->accept(this);
}
this->symbols = this->symbols->scope();
this->bag.leave();
}
else
{
info = std::make_shared<procedure_info>(build_procedure(definition->heading()),
definition->parameter_names);
info = std::make_shared<procedure_info>(heading, definition->parameter_names);
}
this->symbols->enter(definition->identifier.identifier, info);
this->bag.enter(definition->identifier.identifier, info);
}
void name_analysis_visitor::visit(assign_statement *statement)
@ -379,14 +373,14 @@ namespace elna::boot
{
type->accept(this);
}
for (auto& unresolved : this->unresolved)
for (auto& unresolved : this->bag.unresolved)
{
std::vector<std::string> path;
if (check_unresolved_symbol(unresolved.second, path))
{
auto info = std::make_shared<type_info>(type_info(type(unresolved.second)));
this->symbols->enter(std::move(unresolved.first), info);
this->bag.enter(unresolved.first, info);
}
else
{

View File

@ -313,13 +313,9 @@ namespace elna::boot
}
procedure_info::procedure_info(const procedure_type symbol, const std::vector<std::string> names,
std::shared_ptr<symbol_table> parent_table)
: symbol(symbol), names(names)
std::shared_ptr<symbol_table> scope)
: symbol(symbol), names(names), symbols(scope)
{
if (parent_table != nullptr)
{
this->symbols = std::make_shared<symbol_table>(parent_table);
}
}
std::shared_ptr<procedure_info> procedure_info::is_procedure()
@ -361,4 +357,57 @@ namespace elna::boot
return result;
}
symbol_bag::symbol_bag()
{
this->symbols = std::make_shared<symbol_table>();
}
symbol_bag::symbol_bag(forward_table&& unresolved, std::shared_ptr<symbol_table> symbols)
: symbols(symbols), unresolved(unresolved)
{
}
std::shared_ptr<info> symbol_bag::lookup(const std::string& name)
{
for (auto import_bag : this->imports)
{
if (auto result = import_bag->lookup(name))
{
return result;
}
}
return this->symbols->lookup(name);
}
bool symbol_bag::enter(const std::string& name, std::shared_ptr<info> entry)
{
return this->symbols->enter(name, entry);
}
std::shared_ptr<symbol_table> symbol_bag::enter()
{
this->symbols = std::make_shared<symbol_table>(this->symbols);
return this->symbols;
}
void symbol_bag::enter(std::shared_ptr<symbol_table> child)
{
this->symbols = child;
}
void symbol_bag::leave()
{
this->symbols = this->symbols->scope();
}
void symbol_bag::add_import(std::shared_ptr<symbol_table> table)
{
this->imports.push_front(table);
}
void symbol_bag::add_import(const symbol_bag& bag)
{
add_import(bag.symbols);
}
}

View File

@ -29,16 +29,14 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h"
#include "diagnostic.h"
#include "realmpfr.h"
#include "stor-layout.h"
#include "varasm.h"
#include "fold-const.h"
#include "langhooks.h"
namespace elna::gcc
{
generic_visitor::generic_visitor(std::shared_ptr<symbol_table> symbol_table,
std::shared_ptr<boot::symbol_table> info_table)
: symbols(symbol_table), info_table(info_table)
generic_visitor::generic_visitor(std::shared_ptr<symbol_table> symbol_table, elna::boot::symbol_bag bag)
: bag(bag), symbols(symbol_table)
{
}
@ -321,7 +319,7 @@ namespace elna::gcc
DECL_STRUCT_FUNCTION(fndecl)->language = ggc_cleared_alloc<language_function>();
enter_scope();
this->info_table = this->info_table->lookup(definition->identifier.identifier)->is_procedure()->symbols;
this->bag.enter(this->bag.lookup(definition->identifier.identifier)->is_procedure()->symbols);
tree argument_chain = DECL_ARGUMENTS(fndecl);
for (; argument_chain != NULL_TREE; argument_chain = TREE_CHAIN(argument_chain))
@ -339,7 +337,7 @@ namespace elna::gcc
visit_statements(definition->body.value().body());
tree mapping = leave_scope();
this->info_table = this->info_table->scope();
this->bag.leave();
BLOCK_SUPERCONTEXT(BIND_EXPR_BLOCK(mapping)) = fndecl;
DECL_INITIAL(fndecl) = BIND_EXPR_BLOCK(mapping);
@ -746,7 +744,7 @@ namespace elna::gcc
void generic_visitor::visit(boot::variable_declaration *declaration)
{
this->current_expression = get_inner_alias(
this->info_table->lookup(declaration->identifier.identifier)->is_variable()->symbol,
this->bag.lookup(declaration->identifier.identifier)->is_variable()->symbol,
this->symbols);
location_t declaration_location = get_location(&declaration->position());

View File

@ -62,63 +62,62 @@ static bool elna_langhook_init(void)
return true;
}
static void elna_parse_file(const char *filename)
using dependency_state = elna::boot::dependency_state<std::shared_ptr<elna::gcc::symbol_table>>;
static elna::boot::dependency elna_parse_file(dependency_state& state, const char *filename)
{
std::ifstream file{ filename, std::ios::in };
auto module_table = std::make_shared<elna::boot::symbol_table>(state.globals);
std::ifstream entry_point{ filename, std::ios::in };
if (!file)
if (!entry_point)
{
fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename);
fatal_error(UNKNOWN_LOCATION, "Cannot open filename %s: %m", filename);
}
elna::boot::dependency_graph outcome = elna::boot::read_sources(file, filename);
std::shared_ptr<elna::boot::symbol_table> info_table = elna::boot::builtin_symbol_table();
std::shared_ptr<elna::gcc::symbol_table> symbol_table = elna::gcc::builtin_symbol_table();
linemap_add(line_table, LC_ENTER, 0, filename, 1);
elna::boot::dependency outcome = elna::boot::read_sources(entry_point, filename);
if (outcome.has_errors())
{
elna::gcc::report_errors(outcome.errors());
}
else
{
for (const std::unique_ptr<elna::boot::program>& module_tree : outcome.modules)
{
elna::boot::declaration_visitor declaration_visitor(filename);
declaration_visitor.visit(module_tree.get());
elna::boot::symbol_bag outcome_bag = elna::boot::symbol_bag{ std::move(outcome.unresolved), module_table };
if (declaration_visitor.errors().empty())
for (const auto& sub_tree : outcome.tree->imports)
{
elna::boot::name_analysis_visitor name_analysis_visitor(filename, info_table,
std::move(declaration_visitor.unresolved));
name_analysis_visitor.visit(module_tree.get());
std::filesystem::path sub_path = "source" / elna::boot::build_path(sub_tree->segments);
if (name_analysis_visitor.errors().empty())
if (state.cache.find(sub_path) == state.cache.end())
{
elna::gcc::rewrite_symbol_table(info_table, symbol_table);
elna::gcc::generic_visitor generic_visitor{ symbol_table, info_table };
generic_visitor.visit(module_tree.get());
elna_parse_file(state, state.allocate_path(sub_path));
}
else
outcome_bag.add_import(state.cache.find(sub_path)->second);
}
elna::boot::name_analysis_visitor name_analysis_visitor(filename, outcome_bag);
outcome.tree->accept(&name_analysis_visitor);
if (name_analysis_visitor.has_errors())
{
elna::gcc::report_errors(name_analysis_visitor.errors());
}
}
else
{
elna::gcc::report_errors(declaration_visitor.errors());
}
}
}
state.cache.insert({ filename, outcome_bag });
elna::gcc::rewrite_symbol_table(module_table, state.custom);
linemap_add(line_table, LC_LEAVE, 0, NULL, 0);
return outcome;
}
static void elna_langhook_parse_file(void)
{
dependency_state state{ elna::gcc::builtin_symbol_table() };
for (unsigned int i = 0; i < num_in_fnames; i++)
{
elna_parse_file(in_fnames[i]);
elna::boot::dependency outcome = elna_parse_file(state, in_fnames[i]);
linemap_add(line_table, LC_ENTER, 0, in_fnames[i], 1);
elna::gcc::generic_visitor generic_visitor{ state.custom, state.cache.find(in_fnames[i])->second };
outcome.tree->accept(&generic_visitor);
linemap_add(line_table, LC_LEAVE, 0, NULL, 0);
}
}

View File

@ -17,25 +17,48 @@ along with GCC; see the file COPYING3. If not see
#pragma once
#include <filesystem>
#include <fstream>
#include "elna/boot/result.h"
#include "elna/boot/ast.h"
namespace elna::boot
{
class dependency_graph
class dependency : public error_container
{
error_list m_errors;
public:
std::vector<std::unique_ptr<program>> modules;
std::unique_ptr<unit> tree;
std::unordered_map<std::string, std::shared_ptr<alias_type>> unresolved;
bool has_errors() const;
const error_list& errors() const;
dependency_graph();
explicit dependency_graph(error_list&& errors);
explicit dependency(const char *path);
};
dependency_graph read_sources(std::istream& entry_point, const char *entry_path);
dependency read_sources(std::istream& entry_point, const char *entry_path);
std::filesystem::path build_path(const std::vector<std::string>& segments);
template<typename T>
struct dependency_state
{
const std::shared_ptr<symbol_table> globals;
T custom;
std::unordered_map<std::filesystem::path, elna::boot::symbol_bag> cache;
explicit dependency_state(T custom)
: globals(elna::boot::builtin_symbol_table()), custom(custom)
{
}
const char *allocate_path(const std::filesystem::path path)
{
std::size_t current_size = this->allocated_paths.size();
this->allocated_paths += path.native() + '\0';
return this->allocated_paths.data() + current_size;
}
private:
std::string allocated_paths;
};
}

View File

@ -39,7 +39,7 @@ namespace elna::boot
class driver : public error_container
{
public:
std::unique_ptr<program> tree;
std::unique_ptr<unit> tree;
driver(const char *input_file);
};

View File

@ -81,6 +81,8 @@ namespace elna::boot
auto new_error = std::make_unique<T>(arguments...);
m_errors.emplace_back(std::move(new_error));
}
bool has_errors() const;
};
/**

View File

@ -77,8 +77,7 @@ namespace elna::boot
type current_type;
constant_info::variant current_literal;
std::shared_ptr<symbol_table> symbols;
std::unordered_map<std::string, std::shared_ptr<alias_type>> unresolved;
symbol_bag bag;
procedure_type build_procedure(procedure_type_expression& type_expression);
std::vector<type_field> build_composite_type(const std::vector<field_declaration>& fields);
@ -87,8 +86,7 @@ namespace elna::boot
std::vector<std::string>& path);
public:
explicit name_analysis_visitor(const char *path, std::shared_ptr<symbol_table> symbols,
std::unordered_map<std::string, std::shared_ptr<alias_type>>&& unresolved);
name_analysis_visitor(const char *path, symbol_bag bag);
void visit(named_type_expression *type_expression) override;
void visit(array_type_expression *type_expression) override;

View File

@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
#include <string>
#include <memory>
#include <vector>
#include <forward_list>
#include "elna/boot/result.h"
@ -281,6 +282,7 @@ namespace elna::boot
};
using symbol_table = symbol_map<std::shared_ptr<info>, std::nullptr_t, nullptr>;
using forward_table = std::unordered_map<std::string, std::shared_ptr<alias_type>>;
class type_info : public info
{
@ -299,7 +301,7 @@ namespace elna::boot
std::shared_ptr<symbol_table> symbols;
procedure_info(const procedure_type symbol, const std::vector<std::string> names,
std::shared_ptr<symbol_table> parent_table = nullptr);
std::shared_ptr<symbol_table> scope = nullptr);
std::shared_ptr<procedure_info> is_procedure() override;
};
@ -325,4 +327,25 @@ namespace elna::boot
};
std::shared_ptr<symbol_table> builtin_symbol_table();
class symbol_bag
{
std::shared_ptr<symbol_table> symbols;
std::forward_list<std::shared_ptr<symbol_table>> imports;
public:
forward_table unresolved;
symbol_bag();
symbol_bag(forward_table&& unresolved, std::shared_ptr<symbol_table> symbols);
std::shared_ptr<info> lookup(const std::string& name);
bool enter(const std::string& name, std::shared_ptr<info> entry);
std::shared_ptr<symbol_table> enter();
void enter(std::shared_ptr<symbol_table> child);
void leave();
void add_import(std::shared_ptr<symbol_table> table);
void add_import(const symbol_bag& bag);
};
}

View File

@ -36,8 +36,8 @@ namespace elna::gcc
class generic_visitor final : public boot::parser_visitor
{
tree current_expression{ NULL_TREE };
elna::boot::symbol_bag bag;
std::shared_ptr<symbol_table> symbols;
std::shared_ptr<boot::symbol_table> info_table;
void enter_scope();
tree leave_scope();
@ -63,7 +63,7 @@ namespace elna::gcc
bool assert_constant(location_t expression_location);
public:
generic_visitor(std::shared_ptr<symbol_table> symbol_table, std::shared_ptr<boot::symbol_table> info_table);
generic_visitor(std::shared_ptr<symbol_table> symbol_table, elna::boot::symbol_bag bag);
void visit(boot::program *program) override;
void visit(boot::procedure_declaration *definition) override;

View File

@ -1,12 +0,0 @@
DEFINITION MODULE Common;
TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
TextLocation = RECORD
line: CARDINAL;
column: CARDINAL
END;
END Common.

View File

@ -1,3 +1,13 @@
module;
type
ShortString = [256]Char;
Identifier = [256]Char;
PIdentifier = ^Identifier;
TextLocation* = record
line: Word;
column: Word
end;
FILE* = record end;
end.

View File

@ -1,107 +0,0 @@
DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier, ShortString, TextLocation;
TYPE
PLexerBuffer = POINTER TO CHAR;
BufferPosition = RECORD
iterator: PLexerBuffer;
location: TextLocation
END;
PBufferPosition = POINTER TO BufferPosition;
Lexer = RECORD
input: File;
buffer: PLexerBuffer;
size: CARDINAL;
length: CARDINAL;
start: BufferPosition;
current: BufferPosition
END;
PLexer = POINTER TO Lexer;
LexerKind = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
LexerToken = RECORD
CASE kind: LexerKind OF
lexerKindBoolean: booleanKind: BOOLEAN |
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER |
lexerKindString: stringKind: ShortString
END;
start_location: TextLocation;
end_location: TextLocation
END;
PLexerToken = POINTER TO LexerToken;
PROCEDURE lexer_initialize(lexer: PLexer; input: File);
PROCEDURE lexer_destroy(lexer: PLexer);
(* Returns the last read token. *)
PROCEDURE lexer_current(lexer: PLexer): LexerToken;
(* Read and return the next token. *)
PROCEDURE lexer_lex(lexer: PLexer): LexerToken;
END Lexer.

View File

@ -1,17 +1,9 @@
module;
from FIO import ReadNBytes;
from SYSTEM import ADR, TSIZE;
from DynamicStrings import String, InitStringCharStar, KillString;
from StringConvert import StringToInteger;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
import Common;
const
CHUNK_SIZE = 85536;
CHUNK_SIZE := 85536;
type
(*
@ -62,20 +54,109 @@ type
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = proc(PLexer, PLexerToken);
LexerToken = record
kind: LexerKind;
value: union
booleanKind: Bool;
identifierKind: Identifier;
integerKind: Int;
stringKind: ShortString
end;
start_location: TextLocation;
end_location: TextLocation
end;
TransitionAction = proc(^Lexer, ^LexerToken);
Transition = record
action: TransitionAction;
next_state: TransitionState
end;
TransitionClasses = [22]Transition;
BufferPosition* = record
iterator: ^Char;
location: TextLocation
end;
Lexer* = record
input: ^FILE;
buffer: ^Char;
size: Word;
length: Word;
start: BufferPosition;
current: BufferPosition
end;
LexerKind* = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindXor,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindTilde,
lexerKindReturn,
lexerKindDefer,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindAsterisk,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
i: Word;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
@ -213,12 +294,12 @@ begin
end
end;
proc compare_keyword(keyword: ARRAY OF CHAR, token_start: BufferPosition, token_end: PLexerBuffer) -> BOOLEAN;
proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool;
var
result: BOOLEAN;
index: CARDINAL;
keyword_length: CARDINAL;
continue: BOOLEAN;
result: Bool;
index: Word;
keyword_length: Word;
continue: Bool;
begin
index := 0;
result := true;
@ -237,25 +318,25 @@ begin
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := lexerKindEof
end;
proc increment(position: PBufferPosition);
proc increment(position: ^BufferPosition);
begin
INC(position^.iterator)
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken);
begin
increment(ADR(lexer^.current))
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = ':' then
token^.kind := lexerKindColon
@ -278,7 +359,7 @@ begin
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.start.iterator^ = '<' then
if lexer^.current.iterator^ = '>' then
@ -304,7 +385,7 @@ begin
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken);
begin
increment(ADR(lexer^.start));
@ -316,9 +397,9 @@ begin
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken);
var
text_length: CARDINAL;
text_length: Word;
begin
if lexer^.start.iterator^ = '(' then
token^.kind := lexerKindComment
@ -347,7 +428,7 @@ begin
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken);
begin
token^.kind := lexerKindIdentifier;
@ -355,95 +436,95 @@ begin
DEC(token^.identifierKind[1], lexer^.start.iterator);
MemCopy(lexer^.start.iterator, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('program', lexer^.start, lexer^.current.iterator) then
if compare_keyword("program", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProgram
end;
if compare_keyword('import', lexer^.start, lexer^.current.iterator) then
if compare_keyword("import", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindImport
end;
if compare_keyword('const', lexer^.start, lexer^.current.iterator) then
if compare_keyword("const", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindConst
end;
if compare_keyword('var', lexer^.start, lexer^.current.iterator) then
if compare_keyword("var", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindVar
end;
if compare_keyword('if', lexer^.start, lexer^.current.iterator) then
if compare_keyword("if", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindIf
end;
if compare_keyword('then', lexer^.start, lexer^.current.iterator) then
if compare_keyword("then", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindThen
end;
if compare_keyword('elsif', lexer^.start, lexer^.current.iterator) then
if compare_keyword("elsif", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElsif
end;
if compare_keyword('else', lexer^.start, lexer^.current.iterator) then
if compare_keyword("else", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindElse
end;
if compare_keyword('while', lexer^.start, lexer^.current.iterator) then
if compare_keyword("while", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindWhile
end;
if compare_keyword('do', lexer^.start, lexer^.current.iterator) then
if compare_keyword("do", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.start, lexer^.current.iterator) then
if compare_keyword("proc", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindProc
end;
if compare_keyword('begin', lexer^.start, lexer^.current.iterator) then
if compare_keyword("begin", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBegin
end;
if compare_keyword('end', lexer^.start, lexer^.current.iterator) then
if compare_keyword("end", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindEnd
end;
if compare_keyword('type', lexer^.start, lexer^.current.iterator) then
if compare_keyword("type", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindType
end;
if compare_keyword('record', lexer^.start, lexer^.current.iterator) then
if compare_keyword("record", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindRecord
end;
if compare_keyword('union', lexer^.start, lexer^.current.iterator) then
if compare_keyword("union", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.start, lexer^.current.iterator) then
if compare_keyword("NIL", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindNull
end;
if compare_keyword('or', lexer^.start, lexer^.current.iterator) then
if compare_keyword("or", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOr
end;
if compare_keyword('return', lexer^.start, lexer^.current.iterator) then
if compare_keyword("return", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindReturn
end;
if compare_keyword('defer', lexer^.start, lexer^.current.iterator) then
if compare_keyword("defer", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindDefer
end;
if compare_keyword('TO', lexer^.start, lexer^.current.iterator) then
if compare_keyword("TO", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("CASE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.start, lexer^.current.iterator) then
if compare_keyword("OF", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.start, lexer^.current.iterator) then
if compare_keyword("FROM", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindFrom
end;
if compare_keyword('module', lexer^.start, lexer^.current.iterator) then
if compare_keyword("module", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindModule
end;
if compare_keyword('xor', lexer^.start, lexer^.current.iterator) then
if compare_keyword("xor", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindXor
end;
if compare_keyword('POINTER', lexer^.start, lexer^.current.iterator) then
if compare_keyword("POINTER", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.start, lexer^.current.iterator) then
if compare_keyword("ARRAY", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("TRUE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.start, lexer^.current.iterator) then
if compare_keyword("FALSE", lexer^.start, lexer^.current.iterator) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
@ -451,7 +532,7 @@ end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
proc transition_action_single(lexer: ^Lexer, token: ^LexerToken);
begin
if lexer^.current.iterator^ = '&' then
token^.kind := lexerKindAnd
@ -502,11 +583,11 @@ begin
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken);
var
buffer: String;
integer_length: CARDINAL;
found: BOOLEAN;
integer_length: Word;
found: Bool;
begin
token^.kind := lexerKindInteger;
@ -805,7 +886,7 @@ begin
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].next_state := transitionStateEnd
end;
proc lexer_initialize(lexer: PLexer, input: File);
proc lexer_make*(lexer: ^Lexer, input: ^FILE);
begin
lexer^.input := input;
lexer^.length := 0;
@ -815,19 +896,20 @@ begin
lexer^.size := CHUNK_SIZE
end;
proc lexer_current(lexer: PLexer) -> LexerToken;
(* Returns the last read token. *)
proc lexer_current*(lexer: ^Lexer) -> LexerToken;
var
current_class: TransitionClass;
current_state: TransitionState;
current_transition: Transition;
result: LexerToken;
index1: CARDINAL;
index2: CARDINAL;
index1: Word;
index2: Word;
begin
lexer^.current := lexer^.start;
current_state := transitionStateStart;
while current_state <> transitionStateEnd DO
while current_state <> transitionStateEnd do
index1 := ORD(lexer^.current.iterator^);
INC(index1);
current_class := classification[index1];
@ -849,7 +931,8 @@ begin
return result
end;
proc lexer_lex(lexer: PLexer) -> LexerToken;
(* Read and return the next token. *)
proc lexer_lex*(lexer: ^Lexer) -> LexerToken;
var
result: LexerToken;
begin
@ -865,12 +948,15 @@ begin
return result
end;
proc lexer_destroy(lexer: PLexer);
proc lexer_destroy*(lexer: ^Lexer);
begin
DEALLOCATE(lexer^.buffer, lexer^.size)
end;
proc lexer_initialize();
begin
initialize_classification();
initialize_transitions()
end;
end.

View File

@ -3,7 +3,7 @@
obtain one at https://mozilla.org/MPL/2.0/. *)
program;
import dummy;
import Common, Lexer;
const
SEEK_SET* := 0;
@ -80,13 +80,9 @@ type
_module,
_import
);
Position* = record
line: Word;
column: Word
end;
Location* = record
first: Position;
last: Position
first: TextLocation;
last: TextLocation
end;
SourceFile* = record
buffer: [1024]Char;
@ -94,14 +90,13 @@ type
size: Word;
index: Word
end;
FILE* = record end;
StringBuffer* = record
data: Pointer;
size: Word;
capacity: Word
end;
SourceCode = record
position: Position;
position: TextLocation;
input: Pointer;
empty: proc(Pointer) -> Bool;
@ -123,7 +118,7 @@ type
lex: Bool;
parse: Bool
end;
Lexer* = record
Tokenizer* = record
length: Word;
data: ^Token
end;
@ -592,7 +587,7 @@ begin
return current_token
end;
proc lexer_add_token(lexer: ^Lexer, token: Token);
proc lexer_add_token(lexer: ^Tokenizer, token: Token);
var
new_length: Word;
begin
@ -778,13 +773,13 @@ begin
end;
(* Split the source text into tokens. *)
proc lexer_text(source_code: SourceCode) -> Lexer;
proc lexer_text(source_code: SourceCode) -> Tokenizer;
var
current_token: Token;
token_buffer: StringBuffer;
lexer: Lexer;
lexer: Tokenizer;
begin
lexer := Lexer(0u, nil);
lexer := Tokenizer(0u, nil);
token_buffer := string_buffer_new();
lexer_spaces(@source_code);
@ -1024,7 +1019,7 @@ end;
proc compile_in_stages(command_line: ^CommandLine, source_code: SourceCode) -> Int;
var
return_code: Int;
lexer: Lexer;
lexer: Tokenizer;
begin
return_code := 0;
@ -1068,7 +1063,7 @@ begin
fclose(source_file^.handle)
end;
source_code.position := Position(1u, 1u);
source_code.position := TextLocation(1u, 1u);
source_code.input := cast(source_file: Pointer);
source_code.empty := source_file_empty;
source_code.head := source_file_head;