From 9df075ca9de1a53679d754d2e6080cdc404336a7 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sun, 7 Jul 2024 18:50:15 +0200 Subject: [PATCH] Generate a bison parser --- CMakeLists.txt | 20 ++++- parser/lexer.ll | 131 +++++++++++++++++++++++++++++++ parser/main.cpp | 22 ++++++ parser/parser.yy | 141 ++++++++++++++++++++++++++++++++++ tests/empty_file.eln | 0 tests/failures/empty_file.txt | 0 6 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 parser/lexer.ll create mode 100644 parser/main.cpp create mode 100644 parser/parser.yy create mode 100644 tests/empty_file.eln create mode 100644 tests/failures/empty_file.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 215a0ab..1fa0eeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_CXX_STANDARD 17) find_package(Boost COMPONENTS program_options REQUIRED) -find_package(FLEX) +find_package(FLEX REQUIRED) +find_package(BISON REQUIRED) include_directories(${Boost_INCLUDE_DIR}) FLEX_TARGET(scanner source/scanner.l ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) @@ -26,3 +27,20 @@ add_executable(elna cli/main.cpp ) target_include_directories(elna PRIVATE include) target_link_libraries(elna LINK_PUBLIC ${Boost_LIBRARIES}) + +FLEX_TARGET(lexer parser/lexer.ll ${CMAKE_CURRENT_BINARY_DIR}/lexer.cpp) +BISON_TARGET(parser parser/parser.yy ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp) +add_flex_bison_dependency(lexer parser) + +add_executable(test parser/main.cpp + source/lexer.cpp include/elna/source/lexer.hpp + source/parser.cpp include/elna/source/parser.hpp + source/types.cpp include/elna/source/types.hpp + source/symbol_table.cpp include/elna/source/symbol_table.hpp + source/result.cpp include/elna/source/result.hpp + source/semantic.cpp include/elna/source/semantic.hpp + source/optimizer.cpp include/elna/source/optimizer.hpp + ${BISON_parser_OUTPUTS} ${FLEX_lexer_OUTPUTS} +) +target_include_directories(test PRIVATE ${CMAKE_CURRENT_BINARY_DIR} parser include) +# target_link_libraries(test ${FLEX_LIBRARIES}) diff --git a/parser/lexer.ll b/parser/lexer.ll new file mode 100644 index 0000000..9485abd --- /dev/null +++ b/parser/lexer.ll @@ -0,0 +1,131 @@ +%{ +#define YY_NO_UNISTD_H +#define YY_USER_ACTION this->location.columns(yyleng); + +#include +#include "parser.hpp" + +#undef YY_DECL +#define YY_DECL yy::parser::symbol_type elna::syntax::FooLexer::lex() +#define yyterminate() return yy::parser::make_YYEOF(this->location) +%} + +%option c++ noyywrap never-interactive +%option yyclass="elna::syntax::FooLexer" + +%% +%{ + this->location.step(); +%} + +\-\-.* { + /* Skip the comment */ + } +[\ \t\r] ; /* Skip the whitespaces */ +\n+ { + this->location.lines(yyleng); + this->location.step(); + } +if { + return yy::parser::make_IF(this->location); + } +then { + return yy::parser::make_THEN(this->location); + } +while { + return yy::parser::make_WHILE(this->location); + } +do { + return yy::parser::make_DO(this->location); + } +proc { + return yy::parser::make_PROCEDURE(this->location); + } +begin { + return yy::parser::make_BEGIN_BLOCK(this->location); + } +end { + return yy::parser::make_END_BLOCK(this->location); + } +const { + return yy::parser::make_CONST(this->location); + } +var { + return yy::parser::make_VAR(this->location); + } +True { + return yy::parser::make_BOOLEAN(true, this->location); + } +False { + return yy::parser::make_BOOLEAN(false, this->location); + } +[A-Za-z_][A-Za-z0-9_]* { + return yy::parser::make_IDENTIFIER(yytext, this->location); + } +[0-9]+ { + return yy::parser::make_NUMBER(strtol(yytext, NULL, 10), this->location); + } +\( { + return yy::parser::make_LEFT_PAREN(this->location); + } +\) { + return yy::parser::make_RIGHT_PAREN(this->location); + } +\>= { + return yy::parser::make_GREATER_EQUAL(this->location); + } +\<= { + return yy::parser::make_LESS_EQUAL(this->location); + } +\> { + return yy::parser::make_GREATER_THAN(this->location); + } +\< { + return yy::parser::make_LESS_THAN(this->location); + } +\/= { + return yy::parser::make_NOT_EQUAL(this->location); + } += { + return yy::parser::make_EQUALS(this->location); + } +; { + return yy::parser::make_SEMICOLON(this->location); + } +\. { + return yy::parser::make_DOT(this->location); + } +, { + return yy::parser::make_COMMA(this->location); + } +\+ { + return yy::parser::make_PLUS(this->location); + } +\- { + return yy::parser::make_MINUS(this->location); + } +\* { + return yy::parser::make_MULTIPLICATION(this->location); + } +\/ { + return yy::parser::make_DIVISION(this->location); + } +:= { + return yy::parser::make_ASSIGNMENT(this->location); + } +: { + return yy::parser::make_COLON(this->location); + } +\^ { + return yy::parser::make_HAT(this->location); + } +@ { + return yy::parser::make_AT(this->location); + } +. { + std::stringstream ss; + + ss << "Illegal character 0x" << std::hex << static_cast(yytext[0]); + throw yy::parser::syntax_error(this->location, ss.str()); + } +%% diff --git a/parser/main.cpp b/parser/main.cpp new file mode 100644 index 0000000..0caf149 --- /dev/null +++ b/parser/main.cpp @@ -0,0 +1,22 @@ +#include "parser.hpp" +#include + +int main() +{ + std::istringstream inp("const world = 5, hello = 7;"); + + std::unique_ptr program; + + elna::syntax::FooLexer lexer(inp); + yy::parser parser(lexer, program); + auto result = parser(); + + for (auto& definition : program->definitions()) + { + auto const_definition = dynamic_cast(definition.get()); + + std::cout << "const " << const_definition->identifier() << " = " + << const_definition->body().number() << std::endl; + } + return result; +} diff --git a/parser/parser.yy b/parser/parser.yy new file mode 100644 index 0000000..1ba9d94 --- /dev/null +++ b/parser/parser.yy @@ -0,0 +1,141 @@ +%require "3.2" +%language "c++" + +%code requires { + #include + #include + #include "elna/source/parser.hpp" + + + #if ! defined(yyFlexLexerOnce) + #include + #endif + + namespace elna::syntax + { + class FooLexer; + } +} + +%code provides { + namespace elna::syntax + { + + class FooLexer : public yyFlexLexer + { + public: + yy::location location; + + FooLexer(std::istream& arg_yyin) + : yyFlexLexer(&arg_yyin) + { + } + + yy::parser::symbol_type lex(); + }; + + } +} + +%define api.token.raw +%define api.token.constructor +%define api.value.type variant +%define parse.assert + +%parse-param {elna::syntax::FooLexer& lexer} +%parse-param {std::unique_ptr& program} +%locations + +%header + +%code { + #define yylex lexer.lex +} +%start program; + +%token IDENTIFIER "identifier" +%token NUMBER "number" +%token BOOLEAN +%token IF THEN WHILE DO +%token CONST VAR PROCEDURE +%token BEGIN_BLOCK END_BLOCK +%token TRUE FALSE +%token LEFT_PAREN RIGHT_PAREN SEMICOLON DOT COMMA +%token GREATER_EQUAL LESS_EQUAL LESS_THAN GREATER_THAN NOT_EQUAL EQUALS +%token PLUS MINUS MULTIPLICATION DIVISION +%token ASSIGNMENT COLON HAT AT + +%type > integer_literal; +%type > constant_definition; +%type >> constant_definition_part constant_definitions; +%type > type_expression; +%% +program: constant_definition_part + { + elna::source::position position; + std::vector> declarations; + std::vector> definitions($1.size()); + std::vector>::iterator definition = definitions.begin(); + + for (auto& constant : $1) + { + *definition++ = std::move(constant); + } + program = std::make_unique(position, + std::move(definitions), std::move(declarations), + std::make_unique(position)); + } +integer_literal: NUMBER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1); + }; +type_expression: + HAT IDENTIFIER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $2, true); + } + | IDENTIFIER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1, false); + } +variable_declaration: IDENTIFIER COLON type_expression +variable_declarations: + variable_declaration COMMA variable_declarations + | variable_declaration +constant_definition: IDENTIFIER EQUALS integer_literal + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + $1, std::move($3)); + }; +constant_definitions: + constant_definition COMMA constant_definitions + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | constant_definition { $$.emplace_back(std::move($1)); } +constant_definition_part: + /* no constant definitions */ {} + | CONST constant_definitions SEMICOLON { std::swap($$, $2); }; +%% + +void yy::parser::error(const location_type& loc, const std::string &message) +{ + std::cerr << "Error: " << message << std::endl; +} diff --git a/tests/empty_file.eln b/tests/empty_file.eln new file mode 100644 index 0000000..e69de29 diff --git a/tests/failures/empty_file.txt b/tests/failures/empty_file.txt new file mode 100644 index 0000000..e69de29