Generate a bison parser

This commit is contained in:
Eugen Wissner 2024-07-07 18:50:15 +02:00
parent 0dbbd3f403
commit 5ba7d7aef6
6 changed files with 602 additions and 1 deletions

View File

@ -6,7 +6,8 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
find_package(Boost COMPONENTS program_options REQUIRED) find_package(Boost COMPONENTS program_options REQUIRED)
find_package(FLEX) find_package(FLEX REQUIRED)
find_package(BISON REQUIRED)
include_directories(${Boost_INCLUDE_DIR}) include_directories(${Boost_INCLUDE_DIR})
FLEX_TARGET(scanner source/scanner.l ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp) FLEX_TARGET(scanner source/scanner.l ${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
@ -26,3 +27,20 @@ add_executable(elna cli/main.cpp
) )
target_include_directories(elna PRIVATE include) target_include_directories(elna PRIVATE include)
target_link_libraries(elna LINK_PUBLIC ${Boost_LIBRARIES}) target_link_libraries(elna LINK_PUBLIC ${Boost_LIBRARIES})
FLEX_TARGET(lexer parser/lexer.ll ${CMAKE_CURRENT_BINARY_DIR}/lexer.cpp)
BISON_TARGET(parser parser/parser.yy ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp)
add_flex_bison_dependency(lexer parser)
add_executable(test parser/main.cpp
source/lexer.cpp include/elna/source/lexer.hpp
source/parser.cpp include/elna/source/parser.hpp
source/types.cpp include/elna/source/types.hpp
source/symbol_table.cpp include/elna/source/symbol_table.hpp
source/result.cpp include/elna/source/result.hpp
source/semantic.cpp include/elna/source/semantic.hpp
source/optimizer.cpp include/elna/source/optimizer.hpp
${BISON_parser_OUTPUTS} ${FLEX_lexer_OUTPUTS}
)
target_include_directories(test PRIVATE ${CMAKE_CURRENT_BINARY_DIR} parser include)
# target_link_libraries(test ${FLEX_LIBRARIES})

131
parser/lexer.ll Normal file
View File

@ -0,0 +1,131 @@
%{
#define YY_NO_UNISTD_H
#define YY_USER_ACTION this->location.columns(yyleng);
#include <sstream>
#include "parser.hpp"
#undef YY_DECL
#define YY_DECL yy::parser::symbol_type elna::syntax::FooLexer::lex()
#define yyterminate() return yy::parser::make_YYEOF(this->location)
%}
%option c++ noyywrap never-interactive
%option yyclass="elna::syntax::FooLexer"
%%
%{
this->location.step();
%}
\-\-.* {
/* Skip the comment */
}
[\ \t\r] ; /* Skip the whitespaces */
\n+ {
this->location.lines(yyleng);
this->location.step();
}
if {
return yy::parser::make_IF(this->location);
}
then {
return yy::parser::make_THEN(this->location);
}
while {
return yy::parser::make_WHILE(this->location);
}
do {
return yy::parser::make_DO(this->location);
}
proc {
return yy::parser::make_PROCEDURE(this->location);
}
begin {
return yy::parser::make_BEGIN_BLOCK(this->location);
}
end {
return yy::parser::make_END_BLOCK(this->location);
}
const {
return yy::parser::make_CONST(this->location);
}
var {
return yy::parser::make_VAR(this->location);
}
True {
return yy::parser::make_BOOLEAN(true, this->location);
}
False {
return yy::parser::make_BOOLEAN(false, this->location);
}
[A-Za-z_][A-Za-z0-9_]* {
return yy::parser::make_IDENTIFIER(yytext, this->location);
}
[0-9]+ {
return yy::parser::make_NUMBER(strtol(yytext, NULL, 10), this->location);
}
\( {
return yy::parser::make_LEFT_PAREN(this->location);
}
\) {
return yy::parser::make_RIGHT_PAREN(this->location);
}
\>= {
return yy::parser::make_GREATER_EQUAL(this->location);
}
\<= {
return yy::parser::make_LESS_EQUAL(this->location);
}
\> {
return yy::parser::make_GREATER_THAN(this->location);
}
\< {
return yy::parser::make_LESS_THAN(this->location);
}
\/= {
return yy::parser::make_NOT_EQUAL(this->location);
}
= {
return yy::parser::make_EQUALS(this->location);
}
; {
return yy::parser::make_SEMICOLON(this->location);
}
\. {
return yy::parser::make_DOT(this->location);
}
, {
return yy::parser::make_COMMA(this->location);
}
\+ {
return yy::parser::make_PLUS(this->location);
}
\- {
return yy::parser::make_MINUS(this->location);
}
\* {
return yy::parser::make_MULTIPLICATION(this->location);
}
\/ {
return yy::parser::make_DIVISION(this->location);
}
:= {
return yy::parser::make_ASSIGNMENT(this->location);
}
: {
return yy::parser::make_COLON(this->location);
}
\^ {
return yy::parser::make_HAT(this->location);
}
@ {
return yy::parser::make_AT(this->location);
}
. {
std::stringstream ss;
ss << "Illegal character 0x" << std::hex << static_cast<unsigned char>(yytext[0]);
throw yy::parser::syntax_error(this->location, ss.str());
}
%%

46
parser/main.cpp Normal file
View File

@ -0,0 +1,46 @@
#include "parser.hpp"
#include <iostream>
#include <sstream>
int main()
{
std::istringstream inp(
"const world = 5, hello = 7;\n"
"var x: Int, y: ^Int;\n"
"begin\n"
"end.\n"
);
std::unique_ptr<elna::source::program> program;
int result{ 1 };
elna::syntax::FooLexer lexer(inp);
yy::parser parser(lexer, program);
try
{
result = parser();
}
catch (yy::parser::syntax_error& syntax_error)
{
std::cerr << syntax_error.location << ": " << syntax_error.what() << std::endl;
return result;
}
for (auto& definition : program->definitions())
{
auto const_definition = dynamic_cast<elna::source::constant_definition *>(definition.get());
std::cout << "const " << const_definition->identifier() << " = "
<< const_definition->body().number() << std::endl;
}
for (auto& declaration : program->declarations())
{
std::cout << "var " << declaration->identifier() << ": ";
if (declaration->type().is_pointer())
{
std::cout << '^';
}
std::cout << declaration->type().base() << std::endl;
}
return result;
}

406
parser/parser.yy Normal file
View File

@ -0,0 +1,406 @@
%require "3.2"
%language "c++"
%code requires {
#include <cstdint>
#include "elna/source/parser.hpp"
#if ! defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif
namespace elna::syntax
{
class FooLexer;
}
}
%code provides {
namespace elna::syntax
{
class FooLexer : public yyFlexLexer
{
public:
yy::location location;
FooLexer(std::istream& arg_yyin)
: yyFlexLexer(&arg_yyin)
{
}
yy::parser::symbol_type lex();
};
}
}
%define api.token.raw
%define api.token.constructor
%define api.value.type variant
%define parse.assert
%parse-param {elna::syntax::FooLexer& lexer}
%parse-param {std::unique_ptr<elna::source::program>& program}
%locations
%header
%code {
#define yylex lexer.lex
}
%start program;
%token <std::string> IDENTIFIER "identifier"
%token <std::int32_t> NUMBER "number"
%token <bool> BOOLEAN
%token IF THEN WHILE DO
%token CONST VAR PROCEDURE
%token BEGIN_BLOCK END_BLOCK
%token TRUE FALSE
%token LEFT_PAREN RIGHT_PAREN SEMICOLON DOT COMMA
%token GREATER_EQUAL LESS_EQUAL LESS_THAN GREATER_THAN NOT_EQUAL EQUALS
%token PLUS MINUS MULTIPLICATION DIVISION
%token ASSIGNMENT COLON HAT AT
%type <std::unique_ptr<elna::source::integer_literal>> integer_literal;
%type <std::unique_ptr<elna::source::boolean_literal>> boolean_literal;
%type <std::unique_ptr<elna::source::variable_expression>> variable_expression;
%type <std::unique_ptr<elna::source::constant_definition>> constant_definition;
%type <std::unique_ptr<elna::source::unary_expression>> reference_expression dereference_expression;
%type <std::vector<std::unique_ptr<elna::source::constant_definition>>> constant_definition_part constant_definitions;
%type <std::unique_ptr<elna::source::type_expression>> type_expression;
%type <std::unique_ptr<elna::source::declaration>> variable_declaration;
%type <std::vector<std::unique_ptr<elna::source::declaration>>> variable_declaration_part variable_declarations;
%type <std::unique_ptr<elna::source::assign_statement>> assign_statement;
%type <std::unique_ptr<elna::source::expression>> expression factor term comparand;
%type <std::unique_ptr<elna::source::statement>> statement;
%type <std::vector<std::unique_ptr<elna::source::expression>>> arguments;
%type <std::unique_ptr<elna::source::call_statement>> call_statement;
%type <std::unique_ptr<elna::source::compound_statement>> compound_statement;
%type <std::vector<std::unique_ptr<elna::source::statement>>> statements;
%type <std::unique_ptr<elna::source::if_statement>> if_statement;
%type <std::unique_ptr<elna::source::while_statement>> while_statement;
%type <std::unique_ptr<elna::source::procedure_definition>> procedure_definition;
%type <std::vector<std::unique_ptr<elna::source::procedure_definition>>> procedure_definition_part procedure_definitions;
%%
program: constant_definition_part variable_declaration_part procedure_definition_part statement DOT
{
elna::source::position position;
std::vector<std::unique_ptr<elna::source::definition>> definitions($1.size());
std::vector<std::unique_ptr<elna::source::declaration>> declarations($2.size());
std::vector<std::unique_ptr<elna::source::definition>>::iterator definition = definitions.begin();
std::vector<std::unique_ptr<elna::source::declaration>>::iterator declaration = declarations.begin();
for (auto& constant : $1)
{
*definition++ = std::move(constant);
}
for (auto& variable : $2)
{
*declaration++ = std::move(variable);
}
program = std::make_unique<elna::source::program>(position,
std::move(definitions), std::move(declarations), std::move($4));
}
procedure_definition:
PROCEDURE IDENTIFIER SEMICOLON constant_definition_part variable_declaration_part statement SEMICOLON
{
elna::source::position position;
std::vector<std::unique_ptr<elna::source::definition>> definitions($4.size());
std::vector<std::unique_ptr<elna::source::declaration>> declarations($5.size());
std::vector<std::unique_ptr<elna::source::definition>>::iterator definition = definitions.begin();
std::vector<std::unique_ptr<elna::source::declaration>>::iterator declaration = declarations.begin();
for (auto& constant : $4)
{
*definition++ = std::move(constant);
}
for (auto& variable : $5)
{
*declaration++ = std::move(variable);
}
auto block = std::make_unique<elna::source::block>(position,
std::move(definitions), std::move(declarations), std::move($6));
$$ = std::make_unique<elna::source::procedure_definition>(position,
$2, std::move(block));
}
procedure_definition_part:
/* no procedure definitions. */ {}
| procedure_definitions { std::swap($1, $$); }
procedure_definitions:
procedure_definition procedure_definitions
{
std::swap($$, $2);
$$.emplace($$.cbegin(), std::move($1));
}
| procedure_definition { $$.emplace_back(std::move($1)); }
integer_literal: NUMBER
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::integer_literal>(position, $1);
}
boolean_literal: BOOLEAN
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::boolean_literal>(position, $1);
}
variable_expression: IDENTIFIER
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::variable_expression>(position, $1);
}
reference_expression: AT variable_expression
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::unary_expression>(position, std::move($2), '@');
}
dereference_expression: factor HAT
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::unary_expression>(position, std::move($1), '^');
}
expression:
comparand EQUALS comparand
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '=');
}
| comparand NOT_EQUAL comparand
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), 'n');
}
| comparand GREATER_THAN comparand
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '>');
}
| comparand LESS_THAN comparand
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '<');
}
| comparand GREATER_EQUAL comparand
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), 'g');
}
| comparand LESS_EQUAL comparand
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), 'l');
}
comparand:
term PLUS term
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '+');
}
| term MINUS term
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '-');
}
| term { $$ = std::move($1); }
factor:
integer_literal { $$ = std::move($1); }
| boolean_literal { $$ = std::move($1); }
| variable_expression { $$ = std::move($1); }
| reference_expression { $$ = std::move($1); }
| dereference_expression { $$ = std::move($1); }
| LEFT_PAREN expression RIGHT_PAREN { $$ = std::move($2); }
term:
factor MULTIPLICATION factor
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '*');
}
| factor DIVISION factor
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::binary_expression>(position,
std::move($1), std::move($3), '/');
}
| factor { $$ = std::move($1); }
type_expression:
HAT IDENTIFIER
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::type_expression>(position, $2, true);
}
| IDENTIFIER
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::type_expression>(position, $1, false);
}
assign_statement: IDENTIFIER ASSIGNMENT expression
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@2.begin.column)
};
$$ = std::make_unique<elna::source::assign_statement>(position, $1, std::move($3));
}
call_statement: IDENTIFIER LEFT_PAREN arguments RIGHT_PAREN
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::call_statement>(position, $1);
std::swap($3, $$->arguments());
}
compound_statement: BEGIN_BLOCK statements END_BLOCK
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::compound_statement>(position);
std::swap($2, $$->statements());
}
if_statement: IF expression THEN statement
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::if_statement>(position,
std::move($2), std::move($4));
}
while_statement: WHILE expression DO statement
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::while_statement>(position,
std::move($2), std::move($4));
}
statement:
assign_statement { $$ = std::move($1); }
| call_statement { $$ = std::move($1); }
| compound_statement { $$ = std::move($1); }
| if_statement { $$ = std::move($1); }
| while_statement { $$ = std::move($1); }
variable_declaration: IDENTIFIER COLON type_expression
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::declaration>(position, $1, std::move($3));
}
variable_declarations:
variable_declaration COMMA variable_declarations
{
std::swap($$, $3);
$$.emplace($$.cbegin(), std::move($1));
}
| variable_declaration { $$.emplace_back(std::move($1)); }
constant_definition: IDENTIFIER EQUALS integer_literal
{
elna::source::position position{
static_cast<std::size_t>(@1.begin.line),
static_cast<std::size_t>(@1.begin.column)
};
$$ = std::make_unique<elna::source::constant_definition>(position,
$1, std::move($3));
};
constant_definitions:
constant_definition COMMA constant_definitions
{
std::swap($$, $3);
$$.emplace($$.cbegin(), std::move($1));
}
| constant_definition { $$.emplace_back(std::move($1)); }
constant_definition_part:
/* no constant definitions */ {}
| CONST constant_definitions SEMICOLON { std::swap($$, $2); };
variable_declaration_part:
/* no constant declarations */ {}
| VAR variable_declarations SEMICOLON { std::swap($$, $2); };
arguments:
/* no arguments */ {}
| expression COMMA arguments
{
std::swap($$, $3);
$$.emplace($$.cbegin(), std::move($1));
}
| expression { $$.emplace_back(std::move($1)); }
statements:
/* no statements */ {}
| statement SEMICOLON statements
{
std::swap($$, $3);
$$.emplace($$.cbegin(), std::move($1));
}
| statement { $$.emplace_back(std::move($1)); }
%%
void yy::parser::error(const location_type& loc, const std::string &message)
{
throw yy::parser::syntax_error(loc, message);
}

0
tests/empty_file.eln Normal file
View File

View File