summaryrefslogtreecommitdiff
path: root/src/Language/GraphQL/Lexer.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Language/GraphQL/Lexer.hs')
-rw-r--r--src/Language/GraphQL/Lexer.hs228
1 files changed, 0 insertions, 228 deletions
diff --git a/src/Language/GraphQL/Lexer.hs b/src/Language/GraphQL/Lexer.hs
deleted file mode 100644
index dc000b5..0000000
--- a/src/Language/GraphQL/Lexer.hs
+++ /dev/null
@@ -1,228 +0,0 @@
-{-# LANGUAGE ExplicitForAll #-}
-{-# LANGUAGE OverloadedStrings #-}
-
--- | This module defines a bunch of small parsers used to parse individual
--- lexemes.
-module Language.GraphQL.Lexer
- ( Parser
- , amp
- , at
- , bang
- , blockString
- , braces
- , brackets
- , colon
- , dollar
- , comment
- , equals
- , integer
- , float
- , lexeme
- , name
- , parens
- , pipe
- , spaceConsumer
- , spread
- , string
- , symbol
- , unicodeBOM
- ) where
-
-import Control.Applicative ( Alternative(..)
- , liftA2
- )
-import Data.Char ( chr
- , digitToInt
- , isAsciiLower
- , isAsciiUpper
- , ord
- )
-import Data.Foldable (foldl')
-import Data.List (dropWhileEnd)
-import Data.Proxy (Proxy(..))
-import Data.Void (Void)
-import Text.Megaparsec ( Parsec
- , between
- , chunk
- , chunkToTokens
- , notFollowedBy
- , oneOf
- , option
- , optional
- , satisfy
- , sepBy
- , skipSome
- , takeP
- , takeWhile1P
- , try
- )
-import Text.Megaparsec.Char ( char
- , digitChar
- , space1
- )
-import qualified Text.Megaparsec.Char.Lexer as Lexer
-import qualified Data.Text as T
-import qualified Data.Text.Lazy as TL
-
--- | Standard parser.
--- Accepts the type of the parsed token.
-type Parser = Parsec Void T.Text
-
-ignoredCharacters :: Parser ()
-ignoredCharacters = space1 <|> skipSome (char ',')
-
--- | Parser that skips comments and meaningless characters, whitespaces and
--- commas.
-spaceConsumer :: Parser ()
-spaceConsumer = Lexer.space ignoredCharacters comment empty
-
--- | Parser for comments.
-comment :: Parser ()
-comment = Lexer.skipLineComment "#"
-
--- | Lexeme definition which ignores whitespaces and commas.
-lexeme :: forall a. Parser a -> Parser a
-lexeme = Lexer.lexeme spaceConsumer
-
--- | Symbol definition which ignores whitespaces and commas.
-symbol :: T.Text -> Parser T.Text
-symbol = Lexer.symbol spaceConsumer
-
--- | Parser for "!".
-bang :: Parser Char
-bang = char '!'
-
--- | Parser for "$".
-dollar :: Parser Char
-dollar = char '$'
-
--- | Parser for "@".
-at :: Parser Char
-at = char '@'
-
--- | Parser for "&".
-amp :: Parser T.Text
-amp = symbol "&"
-
--- | Parser for ":".
-colon :: Parser T.Text
-colon = symbol ":"
-
--- | Parser for "=".
-equals :: Parser T.Text
-equals = symbol "="
-
--- | Parser for the spread operator (...).
-spread :: Parser T.Text
-spread = symbol "..."
-
--- | Parser for "|".
-pipe :: Parser T.Text
-pipe = symbol "|"
-
--- | Parser for an expression between "(" and ")".
-parens :: forall a. Parser a -> Parser a
-parens = between (symbol "(") (symbol ")")
-
--- | Parser for an expression between "[" and "]".
-brackets :: forall a. Parser a -> Parser a
-brackets = between (symbol "[") (symbol "]")
-
--- | Parser for an expression between "{" and "}".
-braces :: forall a. Parser a -> Parser a
-braces = between (symbol "{") (symbol "}")
-
--- | Parser for strings.
-string :: Parser T.Text
-string = between "\"" "\"" stringValue
- where
- stringValue = T.pack <$> many stringCharacter
- stringCharacter = satisfy isStringCharacter1
- <|> escapeSequence
- isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
-
--- | Parser for block strings.
-blockString :: Parser T.Text
-blockString = between "\"\"\"" "\"\"\"" stringValue
- where
- stringValue = do
- byLine <- sepBy (many blockStringCharacter) lineTerminator
- let indentSize = foldr countIndent 0 $ tail byLine
- withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine)
- withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent
-
- return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines
- removeEmptyLine [] = True
- removeEmptyLine [x] = T.null x || isWhiteSpace (T.head x)
- removeEmptyLine _ = False
- blockStringCharacter
- = takeWhile1P Nothing isWhiteSpace
- <|> takeWhile1P Nothing isBlockStringCharacter1
- <|> escapeTripleQuote
- <|> try (chunk "\"" <* notFollowedBy (chunk "\"\""))
- escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"")
- isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
- countIndent [] acc = acc
- countIndent (x:_) acc
- | T.null x = acc
- | not (isWhiteSpace $ T.head x) = acc
- | acc == 0 = T.length x
- | otherwise = min acc $ T.length x
- removeIndent _ [] = []
- removeIndent n (x:chunks) = T.drop n x : chunks
-
--- | Parser for integers.
-integer :: Integral a => Parser a
-integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal
-
--- | Parser for floating-point numbers.
-float :: Parser Double
-float = Lexer.signed (pure ()) $ lexeme Lexer.float
-
--- | Parser for names (/[_A-Za-z][_0-9A-Za-z]*/).
-name :: Parser T.Text
-name = do
- firstLetter <- nameFirstLetter
- rest <- many $ nameFirstLetter <|> digitChar
- _ <- spaceConsumer
- return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest
- where
- nameFirstLetter = satisfy isAsciiUpper <|> satisfy isAsciiLower <|> char '_'
-
-isChunkDelimiter :: Char -> Bool
-isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r']
-
-isWhiteSpace :: Char -> Bool
-isWhiteSpace = liftA2 (||) (== ' ') (== '\t')
-
-lineTerminator :: Parser T.Text
-lineTerminator = chunk "\r\n" <|> chunk "\n" <|> chunk "\r"
-
-isSourceCharacter :: Char -> Bool
-isSourceCharacter = isSourceCharacter' . ord
- where
- isSourceCharacter' code = code >= 0x0020
- || code == 0x0009
- || code == 0x000a
- || code == 0x000d
-
-escapeSequence :: Parser Char
-escapeSequence = do
- _ <- char '\\'
- escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u']
- case escaped of
- 'b' -> return '\b'
- 'f' -> return '\f'
- 'n' -> return '\n'
- 'r' -> return '\r'
- 't' -> return '\t'
- 'u' -> chr . foldl' step 0
- . chunkToTokens (Proxy :: Proxy T.Text)
- <$> takeP Nothing 4
- _ -> return escaped
- where
- step accumulator = (accumulator * 16 +) . digitToInt
-
--- | Parser for the "Byte Order Mark".
-unicodeBOM :: Parser ()
-unicodeBOM = optional (char '\xfeff') >> pure ()