Consume tokens matching 0 characters at the end
Some checks failed
Build / audit (push) Failing after 1s
Build / test (push) Successful in 16m52s

This commit is contained in:
Eugen Wissner 2024-09-10 11:33:31 +02:00
parent 6ead225e88
commit 74da0eb391
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
2 changed files with 60 additions and 54 deletions

View File

@ -89,72 +89,73 @@ data MatchToken
-- (v)\\. -- (v)\\.
-- @ -- @
match :: Text -> Text -> Maybe Text match :: Text -> Text -> Maybe Text
match fullPattern input = match fullPattern = go startState
case Text.foldl' go (Just startState) input of
Just state@MatchState{ pattern' = [] } -> Just $ getField @"matched" state
Just state@MatchState{ pattern' = [CloseParenMatchToken] } ->
Just $ getField @"matched" state
Just state@MatchState{ pattern' = [AsteriskMatchToken] } ->
Just $ getField @"matched" state
Just state@MatchState{ pattern' = [OneOfMatchToken _] } ->
Just $ getField @"matched" state
_ -> Nothing
where where
digits = toEnum <$> [fromEnum '0' .. fromEnum '9']
parsePattern :: Text -> [MatchToken]
parsePattern input'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '\\' =
case Text.uncons remaining of
Nothing -> []
Just ('d', remaining') -> OneOfMatchToken digits
: parsePattern remaining'
Just ('.', remaining') -> OneOfMatchToken ('.' : digits)
: parsePattern remaining'
Just ('\\', remaining') -> SymbolMatchToken '\\'
: parsePattern remaining'
Just (_, remaining') -> parsePattern remaining'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '['
, Just lastBracket <- Text.findIndex (== ']') remaining
= OneOfMatchToken (Text.unpack $ Text.take lastBracket remaining)
: parsePattern (Text.drop (succ lastBracket) remaining)
| Just (firstChar, remaining) <- Text.uncons input' =
let token =
case firstChar of
'*' -> AsteriskMatchToken
'(' -> OpenParenMatchToken
')' -> CloseParenMatchToken
s -> SymbolMatchToken s
in token : parsePattern remaining
| otherwise = []
startState = MatchState startState = MatchState
{ ignoring = False { ignoring = False
, matched = mempty , matched = mempty
, pattern' = parsePattern fullPattern , pattern' = parsePattern fullPattern
} }
go :: Maybe MatchState -> Char -> Maybe MatchState go :: MatchState -> Text -> Maybe Text
go (Just state@MatchState{ pattern' = token : remaining }) nextCharacter = -- There is no input, look at the remaining tokens.
case token of go MatchState{ pattern' = [], matched } "" = Just matched
OpenParenMatchToken -> go (Just state{ ignoring = True, pattern' = remaining }) nextCharacter go state@MatchState{ pattern' = OpenParenMatchToken : tokens } input' =
CloseParenMatchToken -> go (Just state{ ignoring = False, pattern' = remaining }) nextCharacter go (state{ ignoring = True, pattern' = tokens }) input'
AsteriskMatchToken -> Just $ matchSymbolToken state nextCharacter go state@MatchState{ pattern' = CloseParenMatchToken : tokens } input' =
SymbolMatchToken patternCharacter go (state{ ignoring = False, pattern' = tokens }) input'
| patternCharacter == nextCharacter -> Just go state@MatchState{ pattern' = AsteriskMatchToken : tokens } input'
$ matchSymbolToken state{ pattern' = remaining } nextCharacter | Just (nextCharacter, leftOver) <- Text.uncons input' =
| otherwise -> Nothing go (matchSymbolToken state nextCharacter) leftOver
OneOfMatchToken chars | otherwise = go state{ pattern' = tokens } ""
| nextCharacter `elem` chars -> go state@MatchState{ pattern' = OneOfMatchToken chars : tokens } input'
Just $ matchSymbolToken state nextCharacter | Just (nextCharacter, leftOver) <- Text.uncons input'
| otherwise -> , nextCharacter `elem` chars =
go (Just state{ pattern' = remaining }) nextCharacter go (matchSymbolToken state nextCharacter) leftOver
go _ _ = Nothing | otherwise =
go (state{ pattern' = tokens }) input'
go state@MatchState{ pattern' = SymbolMatchToken patternCharacter : tokens } input'
| Just (nextCharacter, leftOver) <- Text.uncons input'
, patternCharacter == nextCharacter =
go (matchSymbolToken state{ pattern' = tokens } nextCharacter) leftOver
| otherwise = Nothing
-- All tokens are processed, but there is still some input left.
go MatchState{ pattern' = [] } _ = Nothing
matchSymbolToken state nextCharacter matchSymbolToken state nextCharacter
| getField @"ignoring" state = state | getField @"ignoring" state = state
| otherwise = state | otherwise = state
{ matched = Text.snoc (getField @"matched" state) nextCharacter { matched = Text.snoc (getField @"matched" state) nextCharacter
} }
parsePattern :: Text -> [MatchToken]
parsePattern input'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '\\' =
case Text.uncons remaining of
Nothing -> []
Just ('d', remaining') -> OneOfMatchToken digits
: parsePattern remaining'
Just ('.', remaining') -> OneOfMatchToken ('.' : digits)
: parsePattern remaining'
Just ('\\', remaining') -> SymbolMatchToken '\\'
: parsePattern remaining'
Just (_, remaining') -> parsePattern remaining'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '['
, Just lastBracket <- Text.findIndex (== ']') remaining
= OneOfMatchToken (Text.unpack $ Text.take lastBracket remaining)
: parsePattern (Text.drop (succ lastBracket) remaining)
| Just (firstChar, remaining) <- Text.uncons input' =
let token =
case firstChar of
'*' -> AsteriskMatchToken
'(' -> OpenParenMatchToken
')' -> CloseParenMatchToken
s -> SymbolMatchToken s
in token : parsePattern remaining
| otherwise = []
where
digits = toEnum <$> [fromEnum '0' .. fromEnum '9']
-- * Packagist -- * Packagist
newtype PackagistPackage = PackagistPackage newtype PackagistPackage = PackagistPackage

View File

@ -41,3 +41,8 @@ spec = do
let expected = Nothing let expected = Nothing
actual = match "2.6.0-rc1" "2.6.0" actual = match "2.6.0-rc1" "2.6.0"
in actual `shouldBe` expected in actual `shouldBe` expected
it "consumes the last token matching nothing" $
let expected = Just "abc"
actual = match "abc\\d\\d" "abc"
in actual `shouldBe` expected