graphql/Language/GraphQL/Lexer.hs

{-# LANGUAGE ExplicitForAll #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.GraphQL.Lexer 
    ( Parser
    , amp
    , at
    , bang
    , blockString
    , braces
    , brackets
    , colon
    , dollar
    , comment
    , equals
    , integer
    , float
    , lexeme
    , name
    , parens
    , pipe
    , spaceConsumer
    , spread
    , string
    , symbol
    ) where

import Control.Applicative ( Alternative(..)
                           , liftA2
                           )
import Data.Char ( chr
                 , digitToInt
                 , isAsciiLower
                 , isAsciiUpper
                 , ord
                 )
import Data.Foldable (foldl')
import Data.List (dropWhileEnd)
import Data.Proxy (Proxy(..))
import Data.Void (Void)
import Text.Megaparsec ( Parsec
                       , MonadParsec
                       , Token
                       , between
                       , chunk
                       , chunkToTokens
                       , lookAhead
                       , notFollowedBy
                       , oneOf
                       , option
                       , satisfy
                       , sepBy
                       , skipSome
                       , takeP
                       , takeWhile1P
                       , try
                       )
import Text.Megaparsec.Char ( char
                            , digitChar
                            , space1
                            )
import qualified Text.Megaparsec.Char.Lexer as Lexer
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL

-- | Standard parser.
-- Accepts the type of the parsed token.
type Parser = Parsec Void T.Text

ignoredCharacters :: Parser ()
ignoredCharacters = space1 <|> skipSome (char ',')

spaceConsumer :: Parser ()
spaceConsumer = Lexer.space ignoredCharacters comment empty

-- | Parser for comments.
comment :: Parser ()
comment = Lexer.skipLineComment "#"

lexeme :: forall a. Parser a -> Parser a
lexeme = Lexer.lexeme spaceConsumer

symbol :: T.Text -> Parser T.Text
symbol = Lexer.symbol spaceConsumer

-- | Parser for "!".
bang :: Parser Char
bang = char '!'

-- | Parser for "$".
dollar :: Parser Char
dollar = char '$'

-- | Parser for "@".
at :: Parser Char
at = char '@'

-- | Parser for "&".
amp :: Parser T.Text
amp = symbol "&"

-- | Parser for ":".
colon :: Parser T.Text
colon = symbol ":"

-- | Parser for "=".
equals :: Parser T.Text
equals = symbol "="

-- | Parser for the spread operator (...).
spread :: Parser T.Text
spread = symbol "..."

-- | Parser for "|".
pipe :: Parser T.Text
pipe = symbol "|"

-- | Parser for an expression between "(" and ")".
parens :: forall a. Parser a -> Parser a
parens = between (symbol "(") (symbol ")")

-- | Parser for an expression between "[" and "]".
brackets :: forall a. Parser a -> Parser a
brackets = between (symbol "[") (symbol "]")

-- | Parser for an expression between "{" and "}".
braces :: forall a. Parser a -> Parser a
braces = between (symbol "{") (symbol "}")

-- | Parser for strings.
string :: Parser T.Text
string = between "\"" "\"" stringValue
  where
    stringValue = T.pack <$> many stringCharacter
    stringCharacter = satisfy isStringCharacter1
        <|> escapeSequence
    isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter

-- | Parser for block strings.
blockString :: Parser T.Text
blockString = between "\"\"\"" "\"\"\"" stringValue
  where
    stringValue = do
        byLine <- sepBy (many blockStringCharacter) lineTerminator
        let indentSize = foldr countIndent 0 $ tail byLine
            withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine)
            withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent

        return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines
    removeEmptyLine [] = True
    removeEmptyLine [x] = T.null x || isWhiteSpace (T.head x)
    removeEmptyLine _ = False
    blockStringCharacter
        = takeWhile1P Nothing isWhiteSpace
        <|> takeWhile1P Nothing isBlockStringCharacter1
        <|> escapeTripleQuote
        <|> try (chunk "\"" <* notFollowedBy (chunk "\"\""))
    escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"")
    isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
    countIndent [] acc = acc
    countIndent (x:_) acc
        | T.null x = acc
        | not (isWhiteSpace $ T.head x) = acc
        | acc == 0 = T.length x
        | otherwise = min acc $ T.length x
    removeIndent n [] = []
    removeIndent n (x:chunks) = T.drop n x : chunks

-- | Parser for integers.
integer :: Integral a => Parser a
integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal

-- | Parser for floating-point numbers.
float :: Parser Double
float = Lexer.signed (pure ()) $ lexeme Lexer.float

-- | Parser for names (/[_A-Za-z][_0-9A-Za-z]*/).
name :: Parser T.Text
name = do
    firstLetter <- nameFirstLetter
    rest <- many $ nameFirstLetter <|> digitChar
    _ <- spaceConsumer
    return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest
      where
        nameFirstLetter = satisfy isAsciiUpper <|> satisfy isAsciiLower <|> char '_'

isChunkDelimiter :: Char -> Bool
isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r']

isWhiteSpace :: Char -> Bool
isWhiteSpace = liftA2 (||) (== ' ') (== '\t')

lineTerminator :: Parser T.Text
lineTerminator = chunk "\r\n" <|> chunk "\n" <|> chunk "\r"

isSourceCharacter :: Char -> Bool
isSourceCharacter = isSourceCharacter' . ord
  where
    isSourceCharacter' code = code >= 0x0020
                           || code == 0x0009
                           || code == 0x000a
                           || code == 0x000d

escapeSequence :: Parser Char
escapeSequence = do
    _ <- char '\\'
    escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u']
    case escaped of
        'b' -> return '\b'
        'f' -> return '\f'
        'n' -> return '\n'
        'r' -> return '\r'
        't' -> return '\t'
        'u' -> chr . foldl' step 0
                   . chunkToTokens (Proxy :: Proxy T.Text)
                 <$> takeP Nothing 4
        _ -> return escaped
  where
    step accumulator = (accumulator * 16 +) . digitToInt
Parse queries with megaparsec 2019-06-21 10:44:58 +02:00			`{-# LANGUAGE ExplicitForAll #-}`
			`{-# LANGUAGE OverloadedStrings #-}`
			`module Language.GraphQL.Lexer`
			`( Parser`
			`, amp`
			`, at`
			`, bang`
			`, blockString`
			`, braces`
			`, brackets`
			`, colon`
			`, dollar`
			`, comment`
			`, equals`
			`, integer`
			`, float`
			`, lexeme`
			`, name`
			`, parens`
			`, pipe`
			`, spaceConsumer`
			`, spread`
			`, string`
			`, symbol`
			`) where`

			`import Control.Applicative ( Alternative(..)`
			`, liftA2`
			`)`
			`import Data.Char ( chr`
			`, digitToInt`
			`, isAsciiLower`
			`, isAsciiUpper`
			`, ord`
			`)`
			`import Data.Foldable (foldl')`
			`import Data.List (dropWhileEnd)`
			`import Data.Proxy (Proxy(..))`
			`import Data.Void (Void)`
			`import Text.Megaparsec ( Parsec`
			`, MonadParsec`
			`, Token`
			`, between`
			`, chunk`
			`, chunkToTokens`
			`, lookAhead`
			`, notFollowedBy`
			`, oneOf`
			`, option`
			`, satisfy`
			`, sepBy`
			`, skipSome`
			`, takeP`
			`, takeWhile1P`
			`, try`
			`)`
			`import Text.Megaparsec.Char ( char`
			`, digitChar`
			`, space1`
			`)`
			`import qualified Text.Megaparsec.Char.Lexer as Lexer`
			`import qualified Data.Text as T`
			`import qualified Data.Text.Lazy as TL`

			`-- \| Standard parser.`
			`-- Accepts the type of the parsed token.`
			`type Parser = Parsec Void T.Text`

			`ignoredCharacters :: Parser ()`
			`ignoredCharacters = space1 <\|> skipSome (char ',')`

			`spaceConsumer :: Parser ()`
			`spaceConsumer = Lexer.space ignoredCharacters comment empty`

			`-- \| Parser for comments.`
			`comment :: Parser ()`
			`comment = Lexer.skipLineComment "#"`

			`lexeme :: forall a. Parser a -> Parser a`
			`lexeme = Lexer.lexeme spaceConsumer`

			`symbol :: T.Text -> Parser T.Text`
			`symbol = Lexer.symbol spaceConsumer`

			`-- \| Parser for "!".`
			`bang :: Parser Char`
			`bang = char '!'`

			`-- \| Parser for "$".`
			`dollar :: Parser Char`
			`dollar = char '$'`

			`-- \| Parser for "@".`
			`at :: Parser Char`
			`at = char '@'`

			`-- \| Parser for "&".`
			`amp :: Parser T.Text`
			`amp = symbol "&"`

			`-- \| Parser for ":".`
			`colon :: Parser T.Text`
			`colon = symbol ":"`

			`-- \| Parser for "=".`
			`equals :: Parser T.Text`
			`equals = symbol "="`

			`-- \| Parser for the spread operator (...).`
			`spread :: Parser T.Text`
			`spread = symbol "..."`

			`-- \| Parser for "\|".`
			`pipe :: Parser T.Text`
			`pipe = symbol "\|"`

			`-- \| Parser for an expression between "(" and ")".`
			`parens :: forall a. Parser a -> Parser a`
			`parens = between (symbol "(") (symbol ")")`

			`-- \| Parser for an expression between "[" and "]".`
			`brackets :: forall a. Parser a -> Parser a`
			`brackets = between (symbol "[") (symbol "]")`

			`-- \| Parser for an expression between "{" and "}".`
			`braces :: forall a. Parser a -> Parser a`
			`braces = between (symbol "{") (symbol "}")`

			`-- \| Parser for strings.`
			`string :: Parser T.Text`
			`string = between "\"" "\"" stringValue`
			`where`
			`stringValue = T.pack <$> many stringCharacter`
			`stringCharacter = satisfy isStringCharacter1`
			`<\|> escapeSequence`
			`isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter`

			`-- \| Parser for block strings.`
			`blockString :: Parser T.Text`
			`blockString = between "\"\"\"" "\"\"\"" stringValue`
			`where`
			`stringValue = do`
			`byLine <- sepBy (many blockStringCharacter) lineTerminator`
			`let indentSize = foldr countIndent 0 $ tail byLine`
			`withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine)`
			`withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent`

			`return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines`
			`removeEmptyLine [] = True`
			`removeEmptyLine [x] = T.null x \|\| isWhiteSpace (T.head x)`
			`removeEmptyLine _ = False`
			`blockStringCharacter`
			`= takeWhile1P Nothing isWhiteSpace`
			`<\|> takeWhile1P Nothing isBlockStringCharacter1`
			`<\|> escapeTripleQuote`
			`<\|> try (chunk "\"" <* notFollowedBy (chunk "\"\""))`
			`escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"")`
			`isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter`
			`countIndent [] acc = acc`
			`countIndent (x:_) acc`
			`\| T.null x = acc`
			`\| not (isWhiteSpace $ T.head x) = acc`
			`\| acc == 0 = T.length x`
			`\| otherwise = min acc $ T.length x`
			`removeIndent n [] = []`
			`removeIndent n (x:chunks) = T.drop n x : chunks`

			`-- \| Parser for integers.`
			`integer :: Integral a => Parser a`
			`integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal`

			`-- \| Parser for floating-point numbers.`
			`float :: Parser Double`
			`float = Lexer.signed (pure ()) $ lexeme Lexer.float`

			`-- \| Parser for names (/[_A-Za-z][_0-9A-Za-z]*/).`
			`name :: Parser T.Text`
			`name = do`
			`firstLetter <- nameFirstLetter`
			`rest <- many $ nameFirstLetter <\|> digitChar`
			`_ <- spaceConsumer`
			`return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest`
			`where`
			`nameFirstLetter = satisfy isAsciiUpper <\|> satisfy isAsciiLower <\|> char '_'`

			`isChunkDelimiter :: Char -> Bool`
			`isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r']`

			`isWhiteSpace :: Char -> Bool`
			`isWhiteSpace = liftA2 (\|\|) (== ' ') (== '\t')`

			`lineTerminator :: Parser T.Text`
			`lineTerminator = chunk "\r\n" <\|> chunk "\n" <\|> chunk "\r"`

			`isSourceCharacter :: Char -> Bool`
			`isSourceCharacter = isSourceCharacter' . ord`
			`where`
			`isSourceCharacter' code = code >= 0x0020`
			`\|\| code == 0x0009`
			`\|\| code == 0x000a`
			`\|\| code == 0x000d`

			`escapeSequence :: Parser Char`
			`escapeSequence = do`
			`_ <- char '\\'`
			`escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u']`
			`case escaped of`
			`'b' -> return '\b'`
			`'f' -> return '\f'`
			`'n' -> return '\n'`
			`'r' -> return '\r'`
			`'t' -> return '\t'`
			`'u' -> chr . foldl' step 0`
			`. chunkToTokens (Proxy :: Proxy T.Text)`
			`<$> takeP Nothing 4`
			`_ -> return escaped`
			`where`
			`step accumulator = (accumulator * 16 +) . digitToInt`