From 2dd3856389f9080e122f3a76ceda9c64146eb262 Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Sat, 22 Feb 2025 21:30:29 +0100 Subject: tea-cleaner: Configure word lists --- tea-cleaner/TeaCleaner/Filter.hs | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'tea-cleaner/TeaCleaner/Filter.hs') diff --git a/tea-cleaner/TeaCleaner/Filter.hs b/tea-cleaner/TeaCleaner/Filter.hs index 8448ff5..658606d 100644 --- a/tea-cleaner/TeaCleaner/Filter.hs +++ b/tea-cleaner/TeaCleaner/Filter.hs @@ -6,11 +6,11 @@ module TeaCleaner.Filter ) where import qualified Data.Text as Text -import Data.Time (LocalTime(..), ZonedTime(..)) -import Data.Time.Calendar.OrdinalDate (fromOrdinalDate) +import Data.Time (LocalTime(..), ZonedTime(..), UTCTime(..), addUTCTime) import qualified Data.Vector as Vector import TeaCleaner.Client (Activity(..), User(..), getActivities) import TeaCleaner.Configuration (Settings(..)) +import GHC.Records (HasField(..)) data UserFilter = PassFilter @@ -21,15 +21,31 @@ data UserFilter data FilterResult = FilterResult User UserFilter deriving (Show) -filterByUserProperties :: User -> FilterResult -filterByUserProperties user@User{ created, lastLogin, description, website } - | zonedDay created == zonedDay lastLogin - , zonedDay created > fromOrdinalDate 2024 1 - , zonedDay created < fromOrdinalDate 2025 17 - , not (Text.null description) - , not (Text.null website) = FilterResult user SuspiciousFilter +filterByUserProperties :: Settings -> User -> FilterResult +filterByUserProperties settings user@User{ created, lastLogin } + | noLoginSinceRegistration = FilterResult user FailedFilter + | containsSpamWords = FilterResult user FailedFilter + | percentEncodedWebsite = FilterResult user FailedFilter + | hasFullDescription = FilterResult user SuspiciousFilter + | unusualMailDomains = FilterResult user SuspiciousFilter | otherwise = FilterResult user PassFilter where + percentEncodedWebsite = Text.elem '%' $ getField @"website" user + unusualMailDomains = + let predicate = (`Text.isSuffixOf` getField @"email" user) + in any predicate (getField @"mailDomains" settings) + containsSpamWords = + let lowerCaseDescription = Text.toLower $ getField @"description" user + lowerCaseWebsite = Text.toLower $ getField @"website" user + predicate word = Text.isInfixOf word lowerCaseWebsite + || Text.isInfixOf word lowerCaseDescription + in any predicate (getField @"spamWords" settings) + hasFullDescription + = not (Text.null $ getField @"description" user) + && not (Text.null $ getField @"website" user) + noLoginSinceRegistration = + let monthAgo = utctDay $ addUTCTime (-2592000) $ getField @"now" settings + in zonedDay created < monthAgo && zonedDay created == zonedDay lastLogin zonedDay = localDay . zonedTimeToLocalTime filterByActivities :: Settings -> User -> IO FilterResult @@ -39,5 +55,5 @@ filterByActivities settings user = getActivities settings user evalActivities activities | Just (Activity{ opType }, rest) <- Vector.uncons activities , Vector.null rest - , opType == "create_repo" = pure $ FilterResult user SuspiciousFilter + , opType == "create_repo" = pure $ FilterResult user FailedFilter evalActivities _ = pure $ FilterResult user PassFilter -- cgit v1.2.3