From 2d31c77c1405ce07336b1a2c9059d0f0e6f792a0 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 10 Mar 2024 08:50:55 +0100 Subject: [PATCH] Support surrounding begin and end --- TODO | 4 + source/lexer.cpp | 20 +++ source/parser.cpp | 220 ++++++++++++++++++++---- source/symboltable.cpp | 40 ++++- tests/declare_variable.eln | 4 + tests/expectations/declare_variable.txt | 1 + 6 files changed, 248 insertions(+), 41 deletions(-) create mode 100644 tests/declare_variable.eln create mode 100644 tests/expectations/declare_variable.txt diff --git a/TODO b/TODO index 4d0e299..e768a6e 100644 --- a/TODO +++ b/TODO @@ -8,6 +8,10 @@ - Allow defining variables. - Don't pass raw pointers to the visitor methods. - Make error abstract and derive unexpected_token in the lex module from it. +- Wrap the tokens in a struct with methods for incrementing and lookups. +- While loop. +- If condition. +- Grouping multiple statements with begin and end (compound_statement). # Shell - Persist the history. diff --git a/source/lexer.cpp b/source/lexer.cpp index 3e26c1f..1fc71e0 100644 --- a/source/lexer.cpp +++ b/source/lexer.cpp @@ -209,6 +209,16 @@ namespace elna::source return ss.str(); } + unexpected_token::unexpected_token(const token& token) + : error(token.position()), m_token(token) + { + } + + std::string unexpected_token::what() const + { + return "Unexpected token"; + } + source_result lex(const std::string& buffer) { std::vector tokens; @@ -272,6 +282,14 @@ namespace elna::source { tokens.emplace_back(token::type::var, iterator.position()); } + else if (word == "begin") + { + tokens.emplace_back(token::type::begin, iterator.position()); + } + else if (word == "end") + { + tokens.emplace_back(token::type::end, iterator.position()); + } else { tokens.emplace_back(token::type::identifier, word.c_str(), iterator.position()); @@ -297,6 +315,8 @@ namespace elna::source } ++iterator; } + tokens.push_back(token(token::type::eof, iterator.position())); + return source_result(std::move(tokens)); } } diff --git a/source/parser.cpp b/source/parser.cpp index eb27736..160f940 100644 --- a/source/parser.cpp +++ b/source/parser.cpp @@ -10,11 +10,26 @@ namespace elna::source { } - definition::definition(std::string&& identifier, std::unique_ptr&& body) + declaration::declaration(const std::string& identifier) + : m_identifier(identifier) + { + } + + std::string& declaration::identifier() noexcept + { + return m_identifier; + } + + definition::definition(const std::string& identifier, std::unique_ptr&& body) : m_identifier(std::move(identifier)), m_body(std::move(body)) { } + void declaration::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + void definition::accept(parser_visitor *visitor) { visitor->visit(this); @@ -30,8 +45,10 @@ namespace elna::source return *m_body; } - block::block(std::vector>&& definitions, std::unique_ptr&& body) - : m_definitions(std::move(definitions)), m_body(std::move(body)) + block::block(std::vector>&& definitions, + std::vector>&& declarations, + std::unique_ptr&& body) + : m_definitions(std::move(definitions)), m_declarations(std::move(declarations)), m_body(std::move(body)) { } @@ -50,6 +67,11 @@ namespace elna::source return m_definitions; } + std::vector>& block::declarations() noexcept + { + return m_declarations; + } + integer_literal::integer_literal(const std::int32_t value) : m_number(value) { @@ -138,6 +160,21 @@ namespace elna::source return *m_body; } + compound_statement::compound_statement(std::vector>&& statements) + : m_statements(std::move(statements)) + { + } + + void compound_statement::accept(parser_visitor *visitor) + { + visitor->visit(this); + } + + std::vector>& compound_statement::statements() + { + return m_statements; + } + parser::parser(const std::vector& tokens) : tokens(tokens.cbegin()), end(tokens.cend()) { @@ -215,14 +252,20 @@ namespace elna::source std::unique_ptr parser::parse_definition() { - std::string definition_identifier = tokens->identifier(); // Copy. + auto definition_identifier = advance(token::type::identifier); - ++tokens; - ++tokens; // Skip the equals sign. + if (!definition_identifier.has_value()) + { + return nullptr; + } + if (!skip(token::type::equals)) + { + return nullptr; + } if (tokens->of() == source::token::type::number) { - auto result = std::make_unique(std::move(definition_identifier), + auto result = std::make_unique(definition_identifier.value().get().identifier(), std::make_unique(tokens->number())); ++tokens; return result; @@ -230,61 +273,168 @@ namespace elna::source return nullptr; } - std::unique_ptr parser::parse_bang_statement() + std::unique_ptr parser::parse_declaration() + { + auto declaration_identifier = advance(token::type::identifier); + + if (!declaration_identifier.has_value()) + { + return nullptr; + } + return std::make_unique(declaration_identifier.value().get().identifier()); + } + + std::unique_ptr parser::parse_statement() { if (tokens->of() == source::token::type::bang) { - ++tokens; - auto bang_body = parse_expression(); - if (bang_body != nullptr) - { - return std::make_unique(std::move(bang_body)); - } + return parse_bang_statement(); } + else if (tokens->of() == source::token::type::begin) + { + return parse_compound_statement(); + } + errors.push_back(std::make_unique(unexpected_token{ *tokens })); return nullptr; } + std::unique_ptr parser::parse_bang_statement() + { + if (!advance(token::type::bang)) + { + return nullptr; + } + auto bang_body = parse_expression(); + + if (bang_body != nullptr) + { + return std::make_unique(std::move(bang_body)); + } + + return nullptr; + } + + std::unique_ptr parser::parse_compound_statement() + { + if (!advance(token::type::begin)) + { + return nullptr; + } + auto result = std::make_unique(); + std::unique_ptr next_statement; + + do + { + if ((next_statement = parse_statement()) == nullptr) + { + return nullptr; + } + result->statements().push_back(std::move(next_statement)); + } + while (tokens->of() != token::type::end); + ++tokens; + + return result; + } + std::vector> parser::parse_definitions() { - ++tokens; // Skip const. - std::vector> definitions; - while (tokens != end) + if (tokens->of() != token::type::let) + { + return definitions; + } + ++tokens; // Skip const. + + std::unique_ptr parsed_definition; + while ((parsed_definition = parse_definition()) != nullptr) { - auto parsed_definition = parse_definition(); - if (parsed_definition == nullptr) - { - return definitions; - } definitions.push_back(std::move(parsed_definition)); - if (tokens->of() == source::token::type::semicolon) - { - break; - } if (tokens->of() == source::token::type::comma) { ++tokens; } + else if (tokens->of() == source::token::type::semicolon) + { + ++tokens; + break; + } + else + { + errors.push_back(std::make_unique(*tokens)); + break; + } } - return definitions; } + std::vector> parser::parse_declarations() + { + std::vector> declarations; + + if (tokens->of() != token::type::var) + { + return declarations; + } + ++tokens; // Skip var. + + std::unique_ptr parsed_declaration; + while ((parsed_declaration = parse_declaration()) != nullptr) + { + declarations.push_back(std::move(parsed_declaration)); + + if (tokens->of() == token::type::comma) + { + ++tokens; + } + else if (tokens->of() == token::type::semicolon) + { + ++tokens; + break; + } + else + { + errors.push_back(std::make_unique(*tokens)); + break; + } + } + return declarations; + } + std::unique_ptr parser::parse_block() { - std::vector> definitions; - if (tokens->of() == source::token::type::let) - { - definitions = parse_definitions(); - ++tokens; - } - auto parsed_statement = parse_bang_statement(); + auto definitions = parse_definitions(); + auto declarations = parse_declarations(); + auto parsed_statement = parse_statement(); + if (parsed_statement == nullptr) { return nullptr; } - return std::make_unique(std::move(definitions), std::move(parsed_statement)); + return std::make_unique(std::move(definitions), + std::move(declarations), std::move(parsed_statement)); + } + + std::optional> parser::advance(const token::type token_type) + { + if (tokens->of() == token_type) + { + return std::make_optional<>(std::cref(*tokens++)); + } + errors.push_back(std::make_unique(*tokens)); + return std::optional>(); + } + + bool parser::skip(const token::type token_type) + { + if (tokens->of() == token_type) + { + ++tokens; + return true; + } + errors.push_back(std::make_unique(*tokens)); + return false; } } diff --git a/source/symboltable.cpp b/source/symboltable.cpp index 81e27ef..49bff30 100644 --- a/source/symboltable.cpp +++ b/source/symboltable.cpp @@ -2,6 +2,16 @@ namespace elna::source { + name_collision::name_collision(const std::string& name, const position current, const position previous) + : error(current), name(name), previous(previous) + { + } + + std::string name_collision::what() const + { + return "Name '" + name + "' was already defined"; + } + std::shared_ptr symbol_table::lookup(const std::string& name) { auto entry = entries.find(name); @@ -42,31 +52,49 @@ namespace elna::source return m_value; } + variable_info::variable_info(std::size_t offset) + : m_offset(offset) + { + } + variable_info::~variable_info() { } - void name_analysis_visitor::visit(source::definition *definition) + std::size_t variable_info::offset() const noexcept + { + return m_offset; + } + + void name_analysis_visitor::visit(declaration *declaration) { } - void name_analysis_visitor::visit(source::bang_statement *statement) + void name_analysis_visitor::visit(definition *definition) { } - void name_analysis_visitor::visit(source::block *block) + void name_analysis_visitor::visit(bang_statement *statement) { } - void name_analysis_visitor::visit(source::integer_literal *number) + void name_analysis_visitor::visit(compound_statement *statement) { } - void name_analysis_visitor::visit(source::variable_expression *variable) + void name_analysis_visitor::visit(block *block) { } - void name_analysis_visitor::visit(source::binary_expression *expression) + void name_analysis_visitor::visit(integer_literal *number) + { + } + + void name_analysis_visitor::visit(variable_expression *variable) + { + } + + void name_analysis_visitor::visit(binary_expression *expression) { } } diff --git a/tests/declare_variable.eln b/tests/declare_variable.eln new file mode 100644 index 0000000..9c17195 --- /dev/null +++ b/tests/declare_variable.eln @@ -0,0 +1,4 @@ +var x; +begin + ! 5 +end. diff --git a/tests/expectations/declare_variable.txt b/tests/expectations/declare_variable.txt new file mode 100644 index 0000000..7ed6ff8 --- /dev/null +++ b/tests/expectations/declare_variable.txt @@ -0,0 +1 @@ +5