summaryrefslogtreecommitdiff
path: root/src/Language/GraphQL/AST/Lexer.hs
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2019-11-03 10:42:10 +0100
committerEugen Wissner <belka@caraus.de>2019-11-03 11:00:18 +0100
commit73fc334bf8d7bd6d8b83143995844ca0968ceeda (patch)
tree0f4e2e31b5e3dd031a2fbd5f078eb741e5b3e931 /src/Language/GraphQL/AST/Lexer.hs
parent417ff5da7d0db6c8e73a238c17368192a3515a93 (diff)
downloadgraphql-73fc334bf8d7bd6d8b83143995844ca0968ceeda.tar.gz
Move related modules to Language.GraphQL.AST
Fixes #18. - `Language.GraphQL.Encoder` moved to `Language.GraphQL.AST.Encoder`. - `Language.GraphQL.Parser` moved to `Language.GraphQL.AST.Parser`. - `Language.GraphQL.Lexer` moved to `Language.GraphQL.AST.Lexer`. - All `Language.GraphQL.AST.Value` data constructor prefixes were removed. The module should be imported qualified. - All `Language.GraphQL.AST.Core.Value` data constructor prefixes were removed. The module should be imported qualified. - `Language.GraphQL.AST.Transform` is now isn't exposed publically anymore.
Diffstat (limited to 'src/Language/GraphQL/AST/Lexer.hs')
-rw-r--r--src/Language/GraphQL/AST/Lexer.hs228
1 files changed, 228 insertions, 0 deletions
diff --git a/src/Language/GraphQL/AST/Lexer.hs b/src/Language/GraphQL/AST/Lexer.hs
new file mode 100644
index 0000000..97a334c
--- /dev/null
+++ b/src/Language/GraphQL/AST/Lexer.hs
@@ -0,0 +1,228 @@
+{-# LANGUAGE ExplicitForAll #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+-- | This module defines a bunch of small parsers used to parse individual
+-- lexemes.
+module Language.GraphQL.AST.Lexer
+ ( Parser
+ , amp
+ , at
+ , bang
+ , blockString
+ , braces
+ , brackets
+ , colon
+ , dollar
+ , comment
+ , equals
+ , integer
+ , float
+ , lexeme
+ , name
+ , parens
+ , pipe
+ , spaceConsumer
+ , spread
+ , string
+ , symbol
+ , unicodeBOM
+ ) where
+
+import Control.Applicative ( Alternative(..)
+ , liftA2
+ )
+import Data.Char ( chr
+ , digitToInt
+ , isAsciiLower
+ , isAsciiUpper
+ , ord
+ )
+import Data.Foldable (foldl')
+import Data.List (dropWhileEnd)
+import Data.Proxy (Proxy(..))
+import Data.Void (Void)
+import Text.Megaparsec ( Parsec
+ , between
+ , chunk
+ , chunkToTokens
+ , notFollowedBy
+ , oneOf
+ , option
+ , optional
+ , satisfy
+ , sepBy
+ , skipSome
+ , takeP
+ , takeWhile1P
+ , try
+ )
+import Text.Megaparsec.Char ( char
+ , digitChar
+ , space1
+ )
+import qualified Text.Megaparsec.Char.Lexer as Lexer
+import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
+
+-- | Standard parser.
+-- Accepts the type of the parsed token.
+type Parser = Parsec Void T.Text
+
+ignoredCharacters :: Parser ()
+ignoredCharacters = space1 <|> skipSome (char ',')
+
+-- | Parser that skips comments and meaningless characters, whitespaces and
+-- commas.
+spaceConsumer :: Parser ()
+spaceConsumer = Lexer.space ignoredCharacters comment empty
+
+-- | Parser for comments.
+comment :: Parser ()
+comment = Lexer.skipLineComment "#"
+
+-- | Lexeme definition which ignores whitespaces and commas.
+lexeme :: forall a. Parser a -> Parser a
+lexeme = Lexer.lexeme spaceConsumer
+
+-- | Symbol definition which ignores whitespaces and commas.
+symbol :: T.Text -> Parser T.Text
+symbol = Lexer.symbol spaceConsumer
+
+-- | Parser for "!".
+bang :: Parser Char
+bang = char '!'
+
+-- | Parser for "$".
+dollar :: Parser Char
+dollar = char '$'
+
+-- | Parser for "@".
+at :: Parser Char
+at = char '@'
+
+-- | Parser for "&".
+amp :: Parser T.Text
+amp = symbol "&"
+
+-- | Parser for ":".
+colon :: Parser T.Text
+colon = symbol ":"
+
+-- | Parser for "=".
+equals :: Parser T.Text
+equals = symbol "="
+
+-- | Parser for the spread operator (...).
+spread :: Parser T.Text
+spread = symbol "..."
+
+-- | Parser for "|".
+pipe :: Parser T.Text
+pipe = symbol "|"
+
+-- | Parser for an expression between "(" and ")".
+parens :: forall a. Parser a -> Parser a
+parens = between (symbol "(") (symbol ")")
+
+-- | Parser for an expression between "[" and "]".
+brackets :: forall a. Parser a -> Parser a
+brackets = between (symbol "[") (symbol "]")
+
+-- | Parser for an expression between "{" and "}".
+braces :: forall a. Parser a -> Parser a
+braces = between (symbol "{") (symbol "}")
+
+-- | Parser for strings.
+string :: Parser T.Text
+string = between "\"" "\"" stringValue
+ where
+ stringValue = T.pack <$> many stringCharacter
+ stringCharacter = satisfy isStringCharacter1
+ <|> escapeSequence
+ isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
+
+-- | Parser for block strings.
+blockString :: Parser T.Text
+blockString = between "\"\"\"" "\"\"\"" stringValue
+ where
+ stringValue = do
+ byLine <- sepBy (many blockStringCharacter) lineTerminator
+ let indentSize = foldr countIndent 0 $ tail byLine
+ withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine)
+ withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent
+
+ return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines
+ removeEmptyLine [] = True
+ removeEmptyLine [x] = T.null x || isWhiteSpace (T.head x)
+ removeEmptyLine _ = False
+ blockStringCharacter
+ = takeWhile1P Nothing isWhiteSpace
+ <|> takeWhile1P Nothing isBlockStringCharacter1
+ <|> escapeTripleQuote
+ <|> try (chunk "\"" <* notFollowedBy (chunk "\"\""))
+ escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"")
+ isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter
+ countIndent [] acc = acc
+ countIndent (x:_) acc
+ | T.null x = acc
+ | not (isWhiteSpace $ T.head x) = acc
+ | acc == 0 = T.length x
+ | otherwise = min acc $ T.length x
+ removeIndent _ [] = []
+ removeIndent n (x:chunks) = T.drop n x : chunks
+
+-- | Parser for integers.
+integer :: Integral a => Parser a
+integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal
+
+-- | Parser for floating-point numbers.
+float :: Parser Double
+float = Lexer.signed (pure ()) $ lexeme Lexer.float
+
+-- | Parser for names (/[_A-Za-z][_0-9A-Za-z]*/).
+name :: Parser T.Text
+name = do
+ firstLetter <- nameFirstLetter
+ rest <- many $ nameFirstLetter <|> digitChar
+ _ <- spaceConsumer
+ return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest
+ where
+ nameFirstLetter = satisfy isAsciiUpper <|> satisfy isAsciiLower <|> char '_'
+
+isChunkDelimiter :: Char -> Bool
+isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r']
+
+isWhiteSpace :: Char -> Bool
+isWhiteSpace = liftA2 (||) (== ' ') (== '\t')
+
+lineTerminator :: Parser T.Text
+lineTerminator = chunk "\r\n" <|> chunk "\n" <|> chunk "\r"
+
+isSourceCharacter :: Char -> Bool
+isSourceCharacter = isSourceCharacter' . ord
+ where
+ isSourceCharacter' code = code >= 0x0020
+ || code == 0x0009
+ || code == 0x000a
+ || code == 0x000d
+
+escapeSequence :: Parser Char
+escapeSequence = do
+ _ <- char '\\'
+ escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u']
+ case escaped of
+ 'b' -> return '\b'
+ 'f' -> return '\f'
+ 'n' -> return '\n'
+ 'r' -> return '\r'
+ 't' -> return '\t'
+ 'u' -> chr . foldl' step 0
+ . chunkToTokens (Proxy :: Proxy T.Text)
+ <$> takeP Nothing 4
+ _ -> return escaped
+ where
+ step accumulator = (accumulator * 16 +) . digitToInt
+
+-- | Parser for the "Byte Order Mark".
+unicodeBOM :: Parser ()
+unicodeBOM = optional (char '\xfeff') >> pure ()