Consume tokens matching 0 characters at the end
Some checks failed
Build / audit (push) Failing after 1s
Build / test (push) Successful in 16m52s

This commit is contained in:
Eugen Wissner 2024-09-10 11:33:31 +02:00
parent 6ead225e88
commit 74da0eb391
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
2 changed files with 60 additions and 54 deletions

View File

@ -89,72 +89,73 @@ data MatchToken
-- (v)\\.
-- @
match :: Text -> Text -> Maybe Text
match fullPattern input =
case Text.foldl' go (Just startState) input of
Just state@MatchState{ pattern' = [] } -> Just $ getField @"matched" state
Just state@MatchState{ pattern' = [CloseParenMatchToken] } ->
Just $ getField @"matched" state
Just state@MatchState{ pattern' = [AsteriskMatchToken] } ->
Just $ getField @"matched" state
Just state@MatchState{ pattern' = [OneOfMatchToken _] } ->
Just $ getField @"matched" state
_ -> Nothing
match fullPattern = go startState
where
digits = toEnum <$> [fromEnum '0' .. fromEnum '9']
parsePattern :: Text -> [MatchToken]
parsePattern input'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '\\' =
case Text.uncons remaining of
Nothing -> []
Just ('d', remaining') -> OneOfMatchToken digits
: parsePattern remaining'
Just ('.', remaining') -> OneOfMatchToken ('.' : digits)
: parsePattern remaining'
Just ('\\', remaining') -> SymbolMatchToken '\\'
: parsePattern remaining'
Just (_, remaining') -> parsePattern remaining'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '['
, Just lastBracket <- Text.findIndex (== ']') remaining
= OneOfMatchToken (Text.unpack $ Text.take lastBracket remaining)
: parsePattern (Text.drop (succ lastBracket) remaining)
| Just (firstChar, remaining) <- Text.uncons input' =
let token =
case firstChar of
'*' -> AsteriskMatchToken
'(' -> OpenParenMatchToken
')' -> CloseParenMatchToken
s -> SymbolMatchToken s
in token : parsePattern remaining
| otherwise = []
startState = MatchState
{ ignoring = False
, matched = mempty
, pattern' = parsePattern fullPattern
}
go :: Maybe MatchState -> Char -> Maybe MatchState
go (Just state@MatchState{ pattern' = token : remaining }) nextCharacter =
case token of
OpenParenMatchToken -> go (Just state{ ignoring = True, pattern' = remaining }) nextCharacter
CloseParenMatchToken -> go (Just state{ ignoring = False, pattern' = remaining }) nextCharacter
AsteriskMatchToken -> Just $ matchSymbolToken state nextCharacter
SymbolMatchToken patternCharacter
| patternCharacter == nextCharacter -> Just
$ matchSymbolToken state{ pattern' = remaining } nextCharacter
| otherwise -> Nothing
OneOfMatchToken chars
| nextCharacter `elem` chars ->
Just $ matchSymbolToken state nextCharacter
| otherwise ->
go (Just state{ pattern' = remaining }) nextCharacter
go _ _ = Nothing
go :: MatchState -> Text -> Maybe Text
-- There is no input, look at the remaining tokens.
go MatchState{ pattern' = [], matched } "" = Just matched
go state@MatchState{ pattern' = OpenParenMatchToken : tokens } input' =
go (state{ ignoring = True, pattern' = tokens }) input'
go state@MatchState{ pattern' = CloseParenMatchToken : tokens } input' =
go (state{ ignoring = False, pattern' = tokens }) input'
go state@MatchState{ pattern' = AsteriskMatchToken : tokens } input'
| Just (nextCharacter, leftOver) <- Text.uncons input' =
go (matchSymbolToken state nextCharacter) leftOver
| otherwise = go state{ pattern' = tokens } ""
go state@MatchState{ pattern' = OneOfMatchToken chars : tokens } input'
| Just (nextCharacter, leftOver) <- Text.uncons input'
, nextCharacter `elem` chars =
go (matchSymbolToken state nextCharacter) leftOver
| otherwise =
go (state{ pattern' = tokens }) input'
go state@MatchState{ pattern' = SymbolMatchToken patternCharacter : tokens } input'
| Just (nextCharacter, leftOver) <- Text.uncons input'
, patternCharacter == nextCharacter =
go (matchSymbolToken state{ pattern' = tokens } nextCharacter) leftOver
| otherwise = Nothing
-- All tokens are processed, but there is still some input left.
go MatchState{ pattern' = [] } _ = Nothing
matchSymbolToken state nextCharacter
| getField @"ignoring" state = state
| otherwise = state
{ matched = Text.snoc (getField @"matched" state) nextCharacter
}
parsePattern :: Text -> [MatchToken]
parsePattern input'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '\\' =
case Text.uncons remaining of
Nothing -> []
Just ('d', remaining') -> OneOfMatchToken digits
: parsePattern remaining'
Just ('.', remaining') -> OneOfMatchToken ('.' : digits)
: parsePattern remaining'
Just ('\\', remaining') -> SymbolMatchToken '\\'
: parsePattern remaining'
Just (_, remaining') -> parsePattern remaining'
| Just (firstChar, remaining) <- Text.uncons input'
, firstChar == '['
, Just lastBracket <- Text.findIndex (== ']') remaining
= OneOfMatchToken (Text.unpack $ Text.take lastBracket remaining)
: parsePattern (Text.drop (succ lastBracket) remaining)
| Just (firstChar, remaining) <- Text.uncons input' =
let token =
case firstChar of
'*' -> AsteriskMatchToken
'(' -> OpenParenMatchToken
')' -> CloseParenMatchToken
s -> SymbolMatchToken s
in token : parsePattern remaining
| otherwise = []
where
digits = toEnum <$> [fromEnum '0' .. fromEnum '9']
-- * Packagist
newtype PackagistPackage = PackagistPackage

View File

@ -41,3 +41,8 @@ spec = do
let expected = Nothing
actual = match "2.6.0-rc1" "2.6.0"
in actual `shouldBe` expected
it "consumes the last token matching nothing" $
let expected = Just "abc"
actual = match "abc\\d\\d" "abc"
in actual `shouldBe` expected