summaryrefslogtreecommitdiff
path: root/src/Language/GraphQL/Lexer.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Language/GraphQL/Lexer.hs')
-rw-r--r--src/Language/GraphQL/Lexer.hs215
1 files changed, 215 insertions, 0 deletions
diff --git a/src/Language/GraphQL/Lexer.hs b/src/Language/GraphQL/Lexer.hs
new file mode 100644
index 0000000..655be3d
--- /dev/null
+++ b/src/Language/GraphQL/Lexer.hs
@@ -0,0 +1,215 @@
+{-# LANGUAGE ExplicitForAll #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Language.GraphQL.Lexer
+ ( Parser
+ , amp
+ , at
+ , bang
+ , blockString
+ , braces
+ , brackets
+ , colon
+ , dollar
+ , comment
+ , equals
+ , integer
+ , float
+ , lexeme
+ , name
+ , parens
+ , pipe
+ , spaceConsumer
+ , spread
+ , string
+ , symbol
+ ) where
+
+import Control.Applicative ( Alternative(..)
+ , liftA2
+ )
+import Data.Char ( chr
+ , digitToInt
+ , isAsciiLower
+ , isAsciiUpper
+ , ord
+ )
+import Data.Foldable (foldl')
+import Data.List (dropWhileEnd)
+import Data.Proxy (Proxy(..))
+import Data.Void (Void)
+import Text.Megaparsec ( Parsec
+ , between
+ , chunk
+ , chunkToTokens
+ , notFollowedBy
+ , oneOf
+ , option
+ , satisfy
+ , sepBy
+ , skipSome
+ , takeP
+ , takeWhile1P
+ , try
+ )
+import Text.Megaparsec.Char ( char
+ , digitChar
+ , space1
+ )
+import qualified Text.Megaparsec.Char.Lexer as Lexer
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+
+-- | Standard parser.
+-- Accepts the type of the parsed token.
+type Parser = Parsec Void T.Text
+
+ignoredCharacters :: Parser ()
+ignoredCharacters = space1 <|> skipSome (char ',')
+
+spaceConsumer :: Parser ()
+spaceConsumer = Lexer.space ignoredCharacters comment empty
+
+-- | Parser for comments.
+comment :: Parser ()
+comment = Lexer.skipLineComment "#"
+
+lexeme :: forall a. Parser a -> Parser a
+lexeme = Lexer.lexeme spaceConsumer
+
+symbol :: T.Text -> Parser T.Text
+symbol = Lexer.symbol spaceConsumer
+
+-- | Parser for "!".
+bang :: Parser Char
+bang = char '!'
+
+-- | Parser for "$".
+dollar :: Parser Char
+dollar = char '$'
+
+-- | Parser for "@".
+at :: Parser Char
+at = char '@'
+
+-- | Parser for "&".
+amp :: Parser T.Text
+amp = symbol "&"
+
+-- | Parser for ":".
+colon :: Parser T.Text
+colon = symbol ":"
+
+-- | Parser for "=".
+equals :: Parser T.Text
+equals = symbol "="
+
+-- | Parser for the spread operator (...).
+spread :: Parser T.Text
+spread = symbol "..."
+
+-- | Parser for "|".
+pipe :: Parser T.Text
+pipe = symbol "|"
+
+-- | Parser for an expression between "(" and ")".
+parens :: forall a. Parser a -> Parser a
+parens = between (symbol "(") (symbol ")")
+
+-- | Parser for an expression between "[" and "]".
+brackets :: forall a. Parser a -> Parser a
+brackets = between (symbol "[") (symbol "]")
+
+-- | Parser for an expression between "{" and "}".
+braces :: forall a. Parser a -> Parser a
+braces = between (symbol "{") (symbol "}")
+
+-- | Parser for strings.
+string :: Parser T.Text
+string = between "\"" "\"" stringValue
+ where
+ stringValue = T.pack <$> many stringCharacter
+ stringCharacter = satisfy isStringCharacter1
+ <|> escapeSequence
+ isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
+
+-- | Parser for block strings.
+blockString :: Parser T.Text
+blockString = between "\"\"\"" "\"\"\"" stringValue
+ where
+ stringValue = do
+ byLine <- sepBy (many blockStringCharacter) lineTerminator
+ let indentSize = foldr countIndent 0 $ tail byLine
+ withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine)
+ withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent
+
+ return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines
+ removeEmptyLine [] = True
+ removeEmptyLine [x] = T.null x || isWhiteSpace (T.head x)
+ removeEmptyLine _ = False
+ blockStringCharacter
+ = takeWhile1P Nothing isWhiteSpace
+ <|> takeWhile1P Nothing isBlockStringCharacter1
+ <|> escapeTripleQuote
+ <|> try (chunk "\"" <* notFollowedBy (chunk "\"\""))
+ escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"")
+ isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
+ countIndent [] acc = acc
+ countIndent (x:_) acc
+ | T.null x = acc
+ | not (isWhiteSpace $ T.head x) = acc
+ | acc == 0 = T.length x
+ | otherwise = min acc $ T.length x
+ removeIndent _ [] = []
+ removeIndent n (x:chunks) = T.drop n x : chunks
+
+-- | Parser for integers.
+integer :: Integral a => Parser a
+integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal
+
+-- | Parser for floating-point numbers.
+float :: Parser Double
+float = Lexer.signed (pure ()) $ lexeme Lexer.float
+
+-- | Parser for names (/[_A-Za-z][_0-9A-Za-z]*/).
+name :: Parser T.Text
+name = do
+ firstLetter <- nameFirstLetter
+ rest <- many $ nameFirstLetter <|> digitChar
+ _ <- spaceConsumer
+ return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest
+ where
+ nameFirstLetter = satisfy isAsciiUpper <|> satisfy isAsciiLower <|> char '_'
+
+isChunkDelimiter :: Char -> Bool
+isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r']
+
+isWhiteSpace :: Char -> Bool
+isWhiteSpace = liftA2 (||) (== ' ') (== '\t')
+
+lineTerminator :: Parser T.Text
+lineTerminator = chunk "\r\n" <|> chunk "\n" <|> chunk "\r"
+
+isSourceCharacter :: Char -> Bool
+isSourceCharacter = isSourceCharacter' . ord
+ where
+ isSourceCharacter' code = code >= 0x0020
+ || code == 0x0009
+ || code == 0x000a
+ || code == 0x000d
+
+escapeSequence :: Parser Char
+escapeSequence = do
+ _ <- char '\\'
+ escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u']
+ case escaped of
+ 'b' -> return '\b'
+ 'f' -> return '\f'
+ 'n' -> return '\n'
+ 'r' -> return '\r'
+ 't' -> return '\t'
+ 'u' -> chr . foldl' step 0
+ . chunkToTokens (Proxy :: Proxy T.Text)
+ <$> takeP Nothing 4
+ _ -> return escaped
+ where
+ step accumulator = (accumulator * 16 +) . digitToInt