tea-cleaner: Configure word lists

This commit is contained in:
2025-02-22 21:30:29 +01:00
parent a4c56fb432
commit 2dd3856389
4 changed files with 78 additions and 36 deletions

View File

@ -6,11 +6,11 @@ module TeaCleaner.Filter
) where
import qualified Data.Text as Text
import Data.Time (LocalTime(..), ZonedTime(..))
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
import Data.Time (LocalTime(..), ZonedTime(..), UTCTime(..), addUTCTime)
import qualified Data.Vector as Vector
import TeaCleaner.Client (Activity(..), User(..), getActivities)
import TeaCleaner.Configuration (Settings(..))
import GHC.Records (HasField(..))
data UserFilter
= PassFilter
@ -21,15 +21,31 @@ data UserFilter
data FilterResult = FilterResult User UserFilter
deriving (Show)
filterByUserProperties :: User -> FilterResult
filterByUserProperties user@User{ created, lastLogin, description, website }
| zonedDay created == zonedDay lastLogin
, zonedDay created > fromOrdinalDate 2024 1
, zonedDay created < fromOrdinalDate 2025 17
, not (Text.null description)
, not (Text.null website) = FilterResult user SuspiciousFilter
filterByUserProperties :: Settings -> User -> FilterResult
filterByUserProperties settings user@User{ created, lastLogin }
| noLoginSinceRegistration = FilterResult user FailedFilter
| containsSpamWords = FilterResult user FailedFilter
| percentEncodedWebsite = FilterResult user FailedFilter
| hasFullDescription = FilterResult user SuspiciousFilter
| unusualMailDomains = FilterResult user SuspiciousFilter
| otherwise = FilterResult user PassFilter
where
percentEncodedWebsite = Text.elem '%' $ getField @"website" user
unusualMailDomains =
let predicate = (`Text.isSuffixOf` getField @"email" user)
in any predicate (getField @"mailDomains" settings)
containsSpamWords =
let lowerCaseDescription = Text.toLower $ getField @"description" user
lowerCaseWebsite = Text.toLower $ getField @"website" user
predicate word = Text.isInfixOf word lowerCaseWebsite
|| Text.isInfixOf word lowerCaseDescription
in any predicate (getField @"spamWords" settings)
hasFullDescription
= not (Text.null $ getField @"description" user)
&& not (Text.null $ getField @"website" user)
noLoginSinceRegistration =
let monthAgo = utctDay $ addUTCTime (-2592000) $ getField @"now" settings
in zonedDay created < monthAgo && zonedDay created == zonedDay lastLogin
zonedDay = localDay . zonedTimeToLocalTime
filterByActivities :: Settings -> User -> IO FilterResult
@ -39,5 +55,5 @@ filterByActivities settings user = getActivities settings user
evalActivities activities
| Just (Activity{ opType }, rest) <- Vector.uncons activities
, Vector.null rest
, opType == "create_repo" = pure $ FilterResult user SuspiciousFilter
, opType == "create_repo" = pure $ FilterResult user FailedFilter
evalActivities _ = pure $ FilterResult user PassFilter