elna/lib/Language/Elna/Parser.hs

220 lines
5.8 KiB
Haskell

module Language.Elna.Parser
( Parser
, programP
) where
import Control.Monad (void)
import Control.Monad.Combinators.Expr (Operator(..), makeExprParser)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Void (Void)
import Language.Elna.AST
( VariableAccess(..)
, Condition(..)
, Declaration(..)
, Expression(..)
, Identifier(..)
, Literal(..)
, Parameter(..)
, Program(..)
, Statement(..)
, TypeExpression(..)
, VariableDeclaration(..)
)
import Text.Megaparsec
( Parsec
, MonadParsec(..)
, (<?>)
, optional
, between
, sepBy
, choice
)
import Text.Megaparsec.Char
( alphaNumChar
, char
, letterChar
, space1
, string
)
import qualified Text.Megaparsec.Char.Lexer as Lexer
import Control.Applicative (Alternative(..))
import Data.Maybe (isJust)
import Data.Functor (($>))
type Parser = Parsec Void Text
space :: Parser ()
space = Lexer.space space1 (Lexer.skipLineComment "//")
$ Lexer.skipBlockComment "/*" "*/"
lexeme :: forall a. Parser a -> Parser a
lexeme = Lexer.lexeme space
symbol :: Text -> Parser Text
symbol = Lexer.symbol space
blockP :: forall a. Parser a -> Parser a
blockP = between (symbol "{") (symbol "}")
procedureP :: Parser ()
procedureP = void $ symbol "proc"
parensP :: forall a. Parser a -> Parser a
parensP = between (symbol "(") (symbol ")")
bracketsP :: forall a. Parser a -> Parser a
bracketsP = between (symbol "[") (symbol "]")
colonP :: Parser ()
colonP = void $ symbol ":"
semicolonP :: Parser ()
semicolonP = void $ symbol ";"
identifierP :: Parser Identifier
identifierP =
let wordParser = (:) <$> letterChar <*> many alphaNumChar <?> "identifier"
in fmap Identifier $ lexeme $ Text.pack <$> wordParser
typeExpressionP :: Parser TypeExpression
typeExpressionP = arrayTypeExpression
<|> NamedType <$> identifierP
<?> "type expression"
where
arrayTypeExpression = ArrayType
<$> (symbol "array" *> bracketsP (lexeme Lexer.decimal))
<*> (symbol "of" *> typeExpressionP)
typeDefinitionP :: Parser Declaration
typeDefinitionP = TypeDefinition
<$> (symbol "type" *> identifierP)
<*> (symbol "=" *> typeExpressionP)
<* semicolonP
<?> "type definition"
variableDeclarationP :: Parser VariableDeclaration
variableDeclarationP = VariableDeclaration
<$> (symbol "var" *> identifierP)
<*> (colonP *> typeExpressionP)
<* semicolonP
<?> "variable declaration"
parameterP :: Parser Parameter
parameterP = paramCons
<$> optional (symbol "ref")
<*> identifierP
<*> (colonP *> typeExpressionP)
where
paramCons ref name typeName = Parameter name typeName (isJust ref)
commaP :: Parser ()
commaP = void $ symbol ","
literalP :: Parser Literal
literalP
= HexadecimalLiteral <$> (string "0x" *> lexeme Lexer.hexadecimal)
<|> IntegerLiteral <$> lexeme Lexer.decimal
<|> CharacterLiteral <$> lexeme charP
<|> BooleanLiteral <$> (symbol "true" $> True)
<|> BooleanLiteral <$> (symbol "false" $> False)
where
charP = fromIntegral . fromEnum
<$> between (char '\'') (char '\'') Lexer.charLiteral
termP :: Parser Expression
termP = choice
[ parensP expressionP
, LiteralExpression <$> literalP
, VariableExpression <$> variableAccessP
]
variableAccessP :: Parser VariableAccess
variableAccessP = do
identifier <- identifierP
indices <- many $ bracketsP expressionP
pure $ foldr (flip ArrayAccess) (VariableAccess identifier) indices
operatorTable :: [[Operator Parser Expression]]
operatorTable =
[ unaryOperator
, factorOperator
, termOperator
]
where
unaryOperator =
[ prefix "-" NegationExpression
, prefix "+" id
]
factorOperator =
[ binary "*" ProductExpression
, binary "/" DivisionExpression
]
termOperator =
[ binary "+" SumExpression
, binary "-" SubtractionExpression
]
prefix name f = Prefix (f <$ symbol name)
binary name f = InfixL (f <$ symbol name)
expressionP :: Parser Expression
expressionP = makeExprParser termP operatorTable
conditionP :: Parser Condition
conditionP = do
lhs <- expressionP
conditionCons <- choice comparisonOperator
conditionCons lhs <$> expressionP
where
comparisonOperator =
[ symbol "<" >> pure LessCondition
, symbol "<=" >> pure LessOrEqualCondition
, symbol ">" >> pure GreaterCondition
, symbol ">=" >> pure GreaterOrEqualCondition
, symbol "=" >> pure EqualCondition
, symbol "#" >> pure NonEqualCondition
]
statementP :: Parser Statement
statementP
= EmptyStatement <$ semicolonP
<|> CompoundStatement <$> blockP (many statementP)
<|> try assignmentP
<|> try ifElseP
<|> try whileP
<|> try callP
<?> "statement"
where
ifElseP = IfStatement
<$> (symbol "if" *> parensP conditionP)
<*> statementP
<*> optional (symbol "else" *> statementP)
whileP = WhileStatement
<$> (symbol "while" *> parensP conditionP)
<*> statementP
callP = CallStatement
<$> identifierP
<*> parensP (sepBy expressionP commaP)
<* semicolonP
assignmentP = AssignmentStatement
<$> variableAccessP
<* symbol ":="
<*> expressionP
<* semicolonP
procedureDefinitionP :: Parser Declaration
procedureDefinitionP = procedureCons
<$> (procedureP *> identifierP)
<*> parensP (sepBy parameterP commaP)
<*> blockP ((,) <$> many variableDeclarationP <*> many statementP)
<?> "procedure definition"
where
procedureCons procedureName parameters (variables, body) =
ProcedureDefinition procedureName parameters variables body
declarationP :: Parser Declaration
declarationP = typeDefinitionP <|> procedureDefinitionP
programP :: Parser Program
programP = Program <$> many declarationP