diff --git a/CMakeLists.txt b/CMakeLists.txt index 215a0ab..1fa0eeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_CXX_STANDARD 17) find_package(Boost COMPONENTS program_options REQUIRED) -find_package(FLEX) +find_package(FLEX REQUIRED) +find_package(BISON REQUIRED) include_directories(${Boost_INCLUDE_DIR}) FLEX_TARGET(scanner source/scanner.l ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) @@ -26,3 +27,20 @@ add_executable(elna cli/main.cpp ) target_include_directories(elna PRIVATE include) target_link_libraries(elna LINK_PUBLIC ${Boost_LIBRARIES}) + +FLEX_TARGET(lexer parser/lexer.ll ${CMAKE_CURRENT_BINARY_DIR}/lexer.cpp) +BISON_TARGET(parser parser/parser.yy ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp) +add_flex_bison_dependency(lexer parser) + +add_executable(test parser/main.cpp + source/lexer.cpp include/elna/source/lexer.hpp + source/parser.cpp include/elna/source/parser.hpp + source/types.cpp include/elna/source/types.hpp + source/symbol_table.cpp include/elna/source/symbol_table.hpp + source/result.cpp include/elna/source/result.hpp + source/semantic.cpp include/elna/source/semantic.hpp + source/optimizer.cpp include/elna/source/optimizer.hpp + ${BISON_parser_OUTPUTS} ${FLEX_lexer_OUTPUTS} +) +target_include_directories(test PRIVATE ${CMAKE_CURRENT_BINARY_DIR} parser include) +# target_link_libraries(test ${FLEX_LIBRARIES}) diff --git a/parser/lexer.ll b/parser/lexer.ll new file mode 100644 index 0000000..9485abd --- /dev/null +++ b/parser/lexer.ll @@ -0,0 +1,131 @@ +%{ +#define YY_NO_UNISTD_H +#define YY_USER_ACTION this->location.columns(yyleng); + +#include +#include "parser.hpp" + +#undef YY_DECL +#define YY_DECL yy::parser::symbol_type elna::syntax::FooLexer::lex() +#define yyterminate() return yy::parser::make_YYEOF(this->location) +%} + +%option c++ noyywrap never-interactive +%option yyclass="elna::syntax::FooLexer" + +%% +%{ + this->location.step(); +%} + +\-\-.* { + /* Skip the comment */ + } +[\ \t\r] ; /* Skip the whitespaces */ +\n+ { + this->location.lines(yyleng); + this->location.step(); + } +if { + return yy::parser::make_IF(this->location); + } +then { + return yy::parser::make_THEN(this->location); + } +while { + return yy::parser::make_WHILE(this->location); + } +do { + return yy::parser::make_DO(this->location); + } +proc { + return yy::parser::make_PROCEDURE(this->location); + } +begin { + return yy::parser::make_BEGIN_BLOCK(this->location); + } +end { + return yy::parser::make_END_BLOCK(this->location); + } +const { + return yy::parser::make_CONST(this->location); + } +var { + return yy::parser::make_VAR(this->location); + } +True { + return yy::parser::make_BOOLEAN(true, this->location); + } +False { + return yy::parser::make_BOOLEAN(false, this->location); + } +[A-Za-z_][A-Za-z0-9_]* { + return yy::parser::make_IDENTIFIER(yytext, this->location); + } +[0-9]+ { + return yy::parser::make_NUMBER(strtol(yytext, NULL, 10), this->location); + } +\( { + return yy::parser::make_LEFT_PAREN(this->location); + } +\) { + return yy::parser::make_RIGHT_PAREN(this->location); + } +\>= { + return yy::parser::make_GREATER_EQUAL(this->location); + } +\<= { + return yy::parser::make_LESS_EQUAL(this->location); + } +\> { + return yy::parser::make_GREATER_THAN(this->location); + } +\< { + return yy::parser::make_LESS_THAN(this->location); + } +\/= { + return yy::parser::make_NOT_EQUAL(this->location); + } += { + return yy::parser::make_EQUALS(this->location); + } +; { + return yy::parser::make_SEMICOLON(this->location); + } +\. { + return yy::parser::make_DOT(this->location); + } +, { + return yy::parser::make_COMMA(this->location); + } +\+ { + return yy::parser::make_PLUS(this->location); + } +\- { + return yy::parser::make_MINUS(this->location); + } +\* { + return yy::parser::make_MULTIPLICATION(this->location); + } +\/ { + return yy::parser::make_DIVISION(this->location); + } +:= { + return yy::parser::make_ASSIGNMENT(this->location); + } +: { + return yy::parser::make_COLON(this->location); + } +\^ { + return yy::parser::make_HAT(this->location); + } +@ { + return yy::parser::make_AT(this->location); + } +. { + std::stringstream ss; + + ss << "Illegal character 0x" << std::hex << static_cast(yytext[0]); + throw yy::parser::syntax_error(this->location, ss.str()); + } +%% diff --git a/parser/main.cpp b/parser/main.cpp new file mode 100644 index 0000000..2c51302 --- /dev/null +++ b/parser/main.cpp @@ -0,0 +1,46 @@ +#include "parser.hpp" +#include +#include + +int main() +{ + std::istringstream inp( + "const world = 5, hello = 7;\n" + "var x: Int, y: ^Int;\n" + "begin\n" + "end.\n" + ); + std::unique_ptr program; + int result{ 1 }; + + elna::syntax::FooLexer lexer(inp); + yy::parser parser(lexer, program); + try + { + result = parser(); + } + catch (yy::parser::syntax_error& syntax_error) + { + std::cerr << syntax_error.location << ": " << syntax_error.what() << std::endl; + return result; + } + + for (auto& definition : program->definitions()) + { + auto const_definition = dynamic_cast(definition.get()); + + std::cout << "const " << const_definition->identifier() << " = " + << const_definition->body().number() << std::endl; + } + for (auto& declaration : program->declarations()) + { + std::cout << "var " << declaration->identifier() << ": "; + + if (declaration->type().is_pointer()) + { + std::cout << '^'; + } + std::cout << declaration->type().base() << std::endl; + } + return result; +} diff --git a/parser/parser.yy b/parser/parser.yy new file mode 100644 index 0000000..bf692e5 --- /dev/null +++ b/parser/parser.yy @@ -0,0 +1,406 @@ +%require "3.2" +%language "c++" + +%code requires { + #include + #include "elna/source/parser.hpp" + + + #if ! defined(yyFlexLexerOnce) + #include + #endif + + namespace elna::syntax + { + class FooLexer; + } +} + +%code provides { + namespace elna::syntax + { + + class FooLexer : public yyFlexLexer + { + public: + yy::location location; + + FooLexer(std::istream& arg_yyin) + : yyFlexLexer(&arg_yyin) + { + } + + yy::parser::symbol_type lex(); + }; + + } +} + +%define api.token.raw +%define api.token.constructor +%define api.value.type variant +%define parse.assert + +%parse-param {elna::syntax::FooLexer& lexer} +%parse-param {std::unique_ptr& program} +%locations + +%header + +%code { + #define yylex lexer.lex +} +%start program; + +%token IDENTIFIER "identifier" +%token NUMBER "number" +%token BOOLEAN +%token IF THEN WHILE DO +%token CONST VAR PROCEDURE +%token BEGIN_BLOCK END_BLOCK +%token TRUE FALSE +%token LEFT_PAREN RIGHT_PAREN SEMICOLON DOT COMMA +%token GREATER_EQUAL LESS_EQUAL LESS_THAN GREATER_THAN NOT_EQUAL EQUALS +%token PLUS MINUS MULTIPLICATION DIVISION +%token ASSIGNMENT COLON HAT AT + +%type > integer_literal; +%type > boolean_literal; +%type > variable_expression; +%type > constant_definition; +%type > reference_expression dereference_expression; +%type >> constant_definition_part constant_definitions; +%type > type_expression; +%type > variable_declaration; +%type >> variable_declaration_part variable_declarations; +%type > assign_statement; +%type > expression factor term comparand; +%type > statement; +%type >> arguments; +%type > call_statement; +%type > compound_statement; +%type >> statements; +%type > if_statement; +%type > while_statement; +%type > procedure_definition; +%type >> procedure_definition_part procedure_definitions; +%% +program: constant_definition_part variable_declaration_part procedure_definition_part statement DOT + { + elna::source::position position; + std::vector> definitions($1.size()); + std::vector> declarations($2.size()); + std::vector>::iterator definition = definitions.begin(); + std::vector>::iterator declaration = declarations.begin(); + + for (auto& constant : $1) + { + *definition++ = std::move(constant); + } + for (auto& variable : $2) + { + *declaration++ = std::move(variable); + } + program = std::make_unique(position, + std::move(definitions), std::move(declarations), std::move($4)); + } +procedure_definition: + PROCEDURE IDENTIFIER SEMICOLON constant_definition_part variable_declaration_part statement SEMICOLON + { + elna::source::position position; + std::vector> definitions($4.size()); + std::vector> declarations($5.size()); + std::vector>::iterator definition = definitions.begin(); + std::vector>::iterator declaration = declarations.begin(); + + for (auto& constant : $4) + { + *definition++ = std::move(constant); + } + for (auto& variable : $5) + { + *declaration++ = std::move(variable); + } + auto block = std::make_unique(position, + std::move(definitions), std::move(declarations), std::move($6)); + $$ = std::make_unique(position, + $2, std::move(block)); + } +procedure_definition_part: + /* no procedure definitions. */ {} + | procedure_definitions { std::swap($1, $$); } +procedure_definitions: + procedure_definition procedure_definitions + { + std::swap($$, $2); + $$.emplace($$.cbegin(), std::move($1)); + } + | procedure_definition { $$.emplace_back(std::move($1)); } +integer_literal: NUMBER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1); + } +boolean_literal: BOOLEAN + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1); + } +variable_expression: IDENTIFIER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1); + } +reference_expression: AT variable_expression + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, std::move($2), '@'); + } +dereference_expression: factor HAT + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, std::move($1), '^'); + } +expression: + comparand EQUALS comparand + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '='); + } + | comparand NOT_EQUAL comparand + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), 'n'); + } + | comparand GREATER_THAN comparand + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '>'); + } + | comparand LESS_THAN comparand + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '<'); + } + | comparand GREATER_EQUAL comparand + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), 'g'); + } + | comparand LESS_EQUAL comparand + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), 'l'); + } +comparand: + term PLUS term + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '+'); + } + | term MINUS term + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '-'); + } + | term { $$ = std::move($1); } +factor: + integer_literal { $$ = std::move($1); } + | boolean_literal { $$ = std::move($1); } + | variable_expression { $$ = std::move($1); } + | reference_expression { $$ = std::move($1); } + | dereference_expression { $$ = std::move($1); } + | LEFT_PAREN expression RIGHT_PAREN { $$ = std::move($2); } +term: + factor MULTIPLICATION factor + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '*'); + } + | factor DIVISION factor + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($1), std::move($3), '/'); + } + | factor { $$ = std::move($1); } +type_expression: + HAT IDENTIFIER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $2, true); + } + | IDENTIFIER + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1, false); + } +assign_statement: IDENTIFIER ASSIGNMENT expression + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@2.begin.column) + }; + $$ = std::make_unique(position, $1, std::move($3)); + } +call_statement: IDENTIFIER LEFT_PAREN arguments RIGHT_PAREN + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1); + std::swap($3, $$->arguments()); + } +compound_statement: BEGIN_BLOCK statements END_BLOCK + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position); + std::swap($2, $$->statements()); + } +if_statement: IF expression THEN statement + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($2), std::move($4)); + } +while_statement: WHILE expression DO statement + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + std::move($2), std::move($4)); + } +statement: + assign_statement { $$ = std::move($1); } + | call_statement { $$ = std::move($1); } + | compound_statement { $$ = std::move($1); } + | if_statement { $$ = std::move($1); } + | while_statement { $$ = std::move($1); } +variable_declaration: IDENTIFIER COLON type_expression + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, $1, std::move($3)); + } +variable_declarations: + variable_declaration COMMA variable_declarations + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | variable_declaration { $$.emplace_back(std::move($1)); } +constant_definition: IDENTIFIER EQUALS integer_literal + { + elna::source::position position{ + static_cast(@1.begin.line), + static_cast(@1.begin.column) + }; + $$ = std::make_unique(position, + $1, std::move($3)); + }; +constant_definitions: + constant_definition COMMA constant_definitions + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | constant_definition { $$.emplace_back(std::move($1)); } +constant_definition_part: + /* no constant definitions */ {} + | CONST constant_definitions SEMICOLON { std::swap($$, $2); }; +variable_declaration_part: + /* no constant declarations */ {} + | VAR variable_declarations SEMICOLON { std::swap($$, $2); }; +arguments: + /* no arguments */ {} + | expression COMMA arguments + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | expression { $$.emplace_back(std::move($1)); } +statements: + /* no statements */ {} + | statement SEMICOLON statements + { + std::swap($$, $3); + $$.emplace($$.cbegin(), std::move($1)); + } + | statement { $$.emplace_back(std::move($1)); } +%% + +void yy::parser::error(const location_type& loc, const std::string &message) +{ + throw yy::parser::syntax_error(loc, message); +} diff --git a/tests/empty_file.eln b/tests/empty_file.eln new file mode 100644 index 0000000..e69de29 diff --git a/tests/failures/empty_file.txt b/tests/failures/empty_file.txt new file mode 100644 index 0000000..e69de29