forked from OSS/graphql
219 lines
6.1 KiB
Haskell
219 lines
6.1 KiB
Haskell
{-# LANGUAGE ExplicitForAll #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
module Language.GraphQL.Lexer
|
|
( Parser
|
|
, amp
|
|
, at
|
|
, bang
|
|
, blockString
|
|
, braces
|
|
, brackets
|
|
, colon
|
|
, dollar
|
|
, comment
|
|
, equals
|
|
, integer
|
|
, float
|
|
, lexeme
|
|
, name
|
|
, parens
|
|
, pipe
|
|
, spaceConsumer
|
|
, spread
|
|
, string
|
|
, symbol
|
|
) where
|
|
|
|
import Control.Applicative ( Alternative(..)
|
|
, liftA2
|
|
)
|
|
import Data.Char ( chr
|
|
, digitToInt
|
|
, isAsciiLower
|
|
, isAsciiUpper
|
|
, ord
|
|
)
|
|
import Data.Foldable (foldl')
|
|
import Data.List (dropWhileEnd)
|
|
import Data.Proxy (Proxy(..))
|
|
import Data.Void (Void)
|
|
import Text.Megaparsec ( Parsec
|
|
, MonadParsec
|
|
, Token
|
|
, between
|
|
, chunk
|
|
, chunkToTokens
|
|
, lookAhead
|
|
, notFollowedBy
|
|
, oneOf
|
|
, option
|
|
, satisfy
|
|
, sepBy
|
|
, skipSome
|
|
, takeP
|
|
, takeWhile1P
|
|
, try
|
|
)
|
|
import Text.Megaparsec.Char ( char
|
|
, digitChar
|
|
, space1
|
|
)
|
|
import qualified Text.Megaparsec.Char.Lexer as Lexer
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.Lazy as TL
|
|
|
|
-- | Standard parser.
|
|
-- Accepts the type of the parsed token.
|
|
type Parser = Parsec Void T.Text
|
|
|
|
ignoredCharacters :: Parser ()
|
|
ignoredCharacters = space1 <|> skipSome (char ',')
|
|
|
|
spaceConsumer :: Parser ()
|
|
spaceConsumer = Lexer.space ignoredCharacters comment empty
|
|
|
|
-- | Parser for comments.
|
|
comment :: Parser ()
|
|
comment = Lexer.skipLineComment "#"
|
|
|
|
lexeme :: forall a. Parser a -> Parser a
|
|
lexeme = Lexer.lexeme spaceConsumer
|
|
|
|
symbol :: T.Text -> Parser T.Text
|
|
symbol = Lexer.symbol spaceConsumer
|
|
|
|
-- | Parser for "!".
|
|
bang :: Parser Char
|
|
bang = char '!'
|
|
|
|
-- | Parser for "$".
|
|
dollar :: Parser Char
|
|
dollar = char '$'
|
|
|
|
-- | Parser for "@".
|
|
at :: Parser Char
|
|
at = char '@'
|
|
|
|
-- | Parser for "&".
|
|
amp :: Parser T.Text
|
|
amp = symbol "&"
|
|
|
|
-- | Parser for ":".
|
|
colon :: Parser T.Text
|
|
colon = symbol ":"
|
|
|
|
-- | Parser for "=".
|
|
equals :: Parser T.Text
|
|
equals = symbol "="
|
|
|
|
-- | Parser for the spread operator (...).
|
|
spread :: Parser T.Text
|
|
spread = symbol "..."
|
|
|
|
-- | Parser for "|".
|
|
pipe :: Parser T.Text
|
|
pipe = symbol "|"
|
|
|
|
-- | Parser for an expression between "(" and ")".
|
|
parens :: forall a. Parser a -> Parser a
|
|
parens = between (symbol "(") (symbol ")")
|
|
|
|
-- | Parser for an expression between "[" and "]".
|
|
brackets :: forall a. Parser a -> Parser a
|
|
brackets = between (symbol "[") (symbol "]")
|
|
|
|
-- | Parser for an expression between "{" and "}".
|
|
braces :: forall a. Parser a -> Parser a
|
|
braces = between (symbol "{") (symbol "}")
|
|
|
|
-- | Parser for strings.
|
|
string :: Parser T.Text
|
|
string = between "\"" "\"" stringValue
|
|
where
|
|
stringValue = T.pack <$> many stringCharacter
|
|
stringCharacter = satisfy isStringCharacter1
|
|
<|> escapeSequence
|
|
isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
|
|
|
|
-- | Parser for block strings.
|
|
blockString :: Parser T.Text
|
|
blockString = between "\"\"\"" "\"\"\"" stringValue
|
|
where
|
|
stringValue = do
|
|
byLine <- sepBy (many blockStringCharacter) lineTerminator
|
|
let indentSize = foldr countIndent 0 $ tail byLine
|
|
withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine)
|
|
withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent
|
|
|
|
return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines
|
|
removeEmptyLine [] = True
|
|
removeEmptyLine [x] = T.null x || isWhiteSpace (T.head x)
|
|
removeEmptyLine _ = False
|
|
blockStringCharacter
|
|
= takeWhile1P Nothing isWhiteSpace
|
|
<|> takeWhile1P Nothing isBlockStringCharacter1
|
|
<|> escapeTripleQuote
|
|
<|> try (chunk "\"" <* notFollowedBy (chunk "\"\""))
|
|
escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"")
|
|
isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
|
|
countIndent [] acc = acc
|
|
countIndent (x:_) acc
|
|
| T.null x = acc
|
|
| not (isWhiteSpace $ T.head x) = acc
|
|
| acc == 0 = T.length x
|
|
| otherwise = min acc $ T.length x
|
|
removeIndent n [] = []
|
|
removeIndent n (x:chunks) = T.drop n x : chunks
|
|
|
|
-- | Parser for integers.
|
|
integer :: Integral a => Parser a
|
|
integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal
|
|
|
|
-- | Parser for floating-point numbers.
|
|
float :: Parser Double
|
|
float = Lexer.signed (pure ()) $ lexeme Lexer.float
|
|
|
|
-- | Parser for names (/[_A-Za-z][_0-9A-Za-z]*/).
|
|
name :: Parser T.Text
|
|
name = do
|
|
firstLetter <- nameFirstLetter
|
|
rest <- many $ nameFirstLetter <|> digitChar
|
|
_ <- spaceConsumer
|
|
return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest
|
|
where
|
|
nameFirstLetter = satisfy isAsciiUpper <|> satisfy isAsciiLower <|> char '_'
|
|
|
|
isChunkDelimiter :: Char -> Bool
|
|
isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r']
|
|
|
|
isWhiteSpace :: Char -> Bool
|
|
isWhiteSpace = liftA2 (||) (== ' ') (== '\t')
|
|
|
|
lineTerminator :: Parser T.Text
|
|
lineTerminator = chunk "\r\n" <|> chunk "\n" <|> chunk "\r"
|
|
|
|
isSourceCharacter :: Char -> Bool
|
|
isSourceCharacter = isSourceCharacter' . ord
|
|
where
|
|
isSourceCharacter' code = code >= 0x0020
|
|
|| code == 0x0009
|
|
|| code == 0x000a
|
|
|| code == 0x000d
|
|
|
|
escapeSequence :: Parser Char
|
|
escapeSequence = do
|
|
_ <- char '\\'
|
|
escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u']
|
|
case escaped of
|
|
'b' -> return '\b'
|
|
'f' -> return '\f'
|
|
'n' -> return '\n'
|
|
'r' -> return '\r'
|
|
't' -> return '\t'
|
|
'u' -> chr . foldl' step 0
|
|
. chunkToTokens (Proxy :: Proxy T.Text)
|
|
<$> takeP Nothing 4
|
|
_ -> return escaped
|
|
where
|
|
step accumulator = (accumulator * 16 +) . digitToInt
|