diff options
Diffstat (limited to 'src/Language/GraphQL/Lexer.hs')
| -rw-r--r-- | src/Language/GraphQL/Lexer.hs | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/src/Language/GraphQL/Lexer.hs b/src/Language/GraphQL/Lexer.hs new file mode 100644 index 0000000..655be3d --- /dev/null +++ b/src/Language/GraphQL/Lexer.hs @@ -0,0 +1,215 @@ +{-# LANGUAGE ExplicitForAll #-} +{-# LANGUAGE OverloadedStrings #-} +module Language.GraphQL.Lexer + ( Parser + , amp + , at + , bang + , blockString + , braces + , brackets + , colon + , dollar + , comment + , equals + , integer + , float + , lexeme + , name + , parens + , pipe + , spaceConsumer + , spread + , string + , symbol + ) where + +import Control.Applicative ( Alternative(..) + , liftA2 + ) +import Data.Char ( chr + , digitToInt + , isAsciiLower + , isAsciiUpper + , ord + ) +import Data.Foldable (foldl') +import Data.List (dropWhileEnd) +import Data.Proxy (Proxy(..)) +import Data.Void (Void) +import Text.Megaparsec ( Parsec + , between + , chunk + , chunkToTokens + , notFollowedBy + , oneOf + , option + , satisfy + , sepBy + , skipSome + , takeP + , takeWhile1P + , try + ) +import Text.Megaparsec.Char ( char + , digitChar + , space1 + ) +import qualified Text.Megaparsec.Char.Lexer as Lexer +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL + +-- | Standard parser. +-- Accepts the type of the parsed token. +type Parser = Parsec Void T.Text + +ignoredCharacters :: Parser () +ignoredCharacters = space1 <|> skipSome (char ',') + +spaceConsumer :: Parser () +spaceConsumer = Lexer.space ignoredCharacters comment empty + +-- | Parser for comments. +comment :: Parser () +comment = Lexer.skipLineComment "#" + +lexeme :: forall a. Parser a -> Parser a +lexeme = Lexer.lexeme spaceConsumer + +symbol :: T.Text -> Parser T.Text +symbol = Lexer.symbol spaceConsumer + +-- | Parser for "!". +bang :: Parser Char +bang = char '!' + +-- | Parser for "$". +dollar :: Parser Char +dollar = char '$' + +-- | Parser for "@". +at :: Parser Char +at = char '@' + +-- | Parser for "&". +amp :: Parser T.Text +amp = symbol "&" + +-- | Parser for ":". +colon :: Parser T.Text +colon = symbol ":" + +-- | Parser for "=". +equals :: Parser T.Text +equals = symbol "=" + +-- | Parser for the spread operator (...). +spread :: Parser T.Text +spread = symbol "..." + +-- | Parser for "|". +pipe :: Parser T.Text +pipe = symbol "|" + +-- | Parser for an expression between "(" and ")". +parens :: forall a. Parser a -> Parser a +parens = between (symbol "(") (symbol ")") + +-- | Parser for an expression between "[" and "]". +brackets :: forall a. Parser a -> Parser a +brackets = between (symbol "[") (symbol "]") + +-- | Parser for an expression between "{" and "}". +braces :: forall a. Parser a -> Parser a +braces = between (symbol "{") (symbol "}") + +-- | Parser for strings. +string :: Parser T.Text +string = between "\"" "\"" stringValue + where + stringValue = T.pack <$> many stringCharacter + stringCharacter = satisfy isStringCharacter1 + <|> escapeSequence + isStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter + +-- | Parser for block strings. +blockString :: Parser T.Text +blockString = between "\"\"\"" "\"\"\"" stringValue + where + stringValue = do + byLine <- sepBy (many blockStringCharacter) lineTerminator + let indentSize = foldr countIndent 0 $ tail byLine + withoutIndent = head byLine : (removeIndent indentSize <$> tail byLine) + withoutEmptyLines = liftA2 (.) dropWhile dropWhileEnd removeEmptyLine withoutIndent + + return $ T.intercalate "\n" $ T.concat <$> withoutEmptyLines + removeEmptyLine [] = True + removeEmptyLine [x] = T.null x || isWhiteSpace (T.head x) + removeEmptyLine _ = False + blockStringCharacter + = takeWhile1P Nothing isWhiteSpace + <|> takeWhile1P Nothing isBlockStringCharacter1 + <|> escapeTripleQuote + <|> try (chunk "\"" <* notFollowedBy (chunk "\"\"")) + escapeTripleQuote = chunk "\\" >>= flip option (chunk "\"\"") + isBlockStringCharacter1 = liftA2 (&&) isSourceCharacter isChunkDelimiter + countIndent [] acc = acc + countIndent (x:_) acc + | T.null x = acc + | not (isWhiteSpace $ T.head x) = acc + | acc == 0 = T.length x + | otherwise = min acc $ T.length x + removeIndent _ [] = [] + removeIndent n (x:chunks) = T.drop n x : chunks + +-- | Parser for integers. +integer :: Integral a => Parser a +integer = Lexer.signed (pure ()) $ lexeme Lexer.decimal + +-- | Parser for floating-point numbers. +float :: Parser Double +float = Lexer.signed (pure ()) $ lexeme Lexer.float + +-- | Parser for names (/[_A-Za-z][_0-9A-Za-z]*/). +name :: Parser T.Text +name = do + firstLetter <- nameFirstLetter + rest <- many $ nameFirstLetter <|> digitChar + _ <- spaceConsumer + return $ TL.toStrict $ TL.cons firstLetter $ TL.pack rest + where + nameFirstLetter = satisfy isAsciiUpper <|> satisfy isAsciiLower <|> char '_' + +isChunkDelimiter :: Char -> Bool +isChunkDelimiter = flip notElem ['"', '\\', '\n', '\r'] + +isWhiteSpace :: Char -> Bool +isWhiteSpace = liftA2 (||) (== ' ') (== '\t') + +lineTerminator :: Parser T.Text +lineTerminator = chunk "\r\n" <|> chunk "\n" <|> chunk "\r" + +isSourceCharacter :: Char -> Bool +isSourceCharacter = isSourceCharacter' . ord + where + isSourceCharacter' code = code >= 0x0020 + || code == 0x0009 + || code == 0x000a + || code == 0x000d + +escapeSequence :: Parser Char +escapeSequence = do + _ <- char '\\' + escaped <- oneOf ['"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u'] + case escaped of + 'b' -> return '\b' + 'f' -> return '\f' + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'u' -> chr . foldl' step 0 + . chunkToTokens (Proxy :: Proxy T.Text) + <$> takeP Nothing 4 + _ -> return escaped + where + step accumulator = (accumulator * 16 +) . digitToInt |
