aboutsummaryrefslogtreecommitdiff
path: root/tea-cleaner/TeaCleaner
diff options
context:
space:
mode:
Diffstat (limited to 'tea-cleaner/TeaCleaner')
-rw-r--r--tea-cleaner/TeaCleaner/Configuration.hs27
-rw-r--r--tea-cleaner/TeaCleaner/Filter.hs36
2 files changed, 44 insertions, 19 deletions
diff --git a/tea-cleaner/TeaCleaner/Configuration.hs b/tea-cleaner/TeaCleaner/Configuration.hs
index a7342d3..1d2db32 100644
--- a/tea-cleaner/TeaCleaner/Configuration.hs
+++ b/tea-cleaner/TeaCleaner/Configuration.hs
@@ -26,35 +26,44 @@ import Options.Applicative
import Text.URI (URI)
import qualified Text.URI as URI
import Data.Time (UTCTime(..), getCurrentTime)
+import Data.IORef (IORef, newIORef)
data ConfigFile = ConfigFile
{ token :: StrictText
, server :: StrictText
+ , spamWords :: [StrictText]
+ , mailDomains :: [StrictText]
} deriving (Eq, Show)
configFileCodec :: Toml.TomlCodec ConfigFile
configFileCodec = ConfigFile
<$> Toml.text "token" .= getField @"token"
<*> Toml.text "server" .= getField @"server"
+ <*> Toml.arrayOf Toml._Text "spam_words" .= getField @"spamWords"
+ <*> Toml.arrayOf Toml._Text "mail_domains" .= getField @"mailDomains"
data Settings = Settings
{ token :: StrictText
, server :: URI
, now :: UTCTime
- } deriving (Eq, Show)
+ , spamWords :: [StrictText]
+ , mailDomains :: [StrictText]
+ , statistics :: IORef Int
+ } deriving Eq
decodeSettingsFile :: FilePath -> IO Settings
-decodeSettingsFile configPath = Toml.decodeFile configFileCodec configPath
- >>= withConfiguration
- where
- withConfiguration configFile@ConfigFile{ server } = URI.mkURI server
- >>= withServer configFile
- withServer configFile parsedServer = getCurrentTime
- >>= withTime configFile parsedServer
- withTime ConfigFile{..} parsedServer now = pure $ Settings
+decodeSettingsFile configPath = do
+ ConfigFile{..} <- Toml.decodeFile configFileCodec configPath
+ parsedServer <- URI.mkURI server
+ now <- getCurrentTime
+ ioRef <- newIORef 0
+ pure $ Settings
{ token = token
, server = parsedServer
, now = now
+ , spamWords = spamWords
+ , mailDomains = mailDomains
+ , statistics = ioRef
}
newtype ProgramOptions = ProgramOptions
diff --git a/tea-cleaner/TeaCleaner/Filter.hs b/tea-cleaner/TeaCleaner/Filter.hs
index 8448ff5..658606d 100644
--- a/tea-cleaner/TeaCleaner/Filter.hs
+++ b/tea-cleaner/TeaCleaner/Filter.hs
@@ -6,11 +6,11 @@ module TeaCleaner.Filter
) where
import qualified Data.Text as Text
-import Data.Time (LocalTime(..), ZonedTime(..))
-import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
+import Data.Time (LocalTime(..), ZonedTime(..), UTCTime(..), addUTCTime)
import qualified Data.Vector as Vector
import TeaCleaner.Client (Activity(..), User(..), getActivities)
import TeaCleaner.Configuration (Settings(..))
+import GHC.Records (HasField(..))
data UserFilter
= PassFilter
@@ -21,15 +21,31 @@ data UserFilter
data FilterResult = FilterResult User UserFilter
deriving (Show)
-filterByUserProperties :: User -> FilterResult
-filterByUserProperties user@User{ created, lastLogin, description, website }
- | zonedDay created == zonedDay lastLogin
- , zonedDay created > fromOrdinalDate 2024 1
- , zonedDay created < fromOrdinalDate 2025 17
- , not (Text.null description)
- , not (Text.null website) = FilterResult user SuspiciousFilter
+filterByUserProperties :: Settings -> User -> FilterResult
+filterByUserProperties settings user@User{ created, lastLogin }
+ | noLoginSinceRegistration = FilterResult user FailedFilter
+ | containsSpamWords = FilterResult user FailedFilter
+ | percentEncodedWebsite = FilterResult user FailedFilter
+ | hasFullDescription = FilterResult user SuspiciousFilter
+ | unusualMailDomains = FilterResult user SuspiciousFilter
| otherwise = FilterResult user PassFilter
where
+ percentEncodedWebsite = Text.elem '%' $ getField @"website" user
+ unusualMailDomains =
+ let predicate = (`Text.isSuffixOf` getField @"email" user)
+ in any predicate (getField @"mailDomains" settings)
+ containsSpamWords =
+ let lowerCaseDescription = Text.toLower $ getField @"description" user
+ lowerCaseWebsite = Text.toLower $ getField @"website" user
+ predicate word = Text.isInfixOf word lowerCaseWebsite
+ || Text.isInfixOf word lowerCaseDescription
+ in any predicate (getField @"spamWords" settings)
+ hasFullDescription
+ = not (Text.null $ getField @"description" user)
+ && not (Text.null $ getField @"website" user)
+ noLoginSinceRegistration =
+ let monthAgo = utctDay $ addUTCTime (-2592000) $ getField @"now" settings
+ in zonedDay created < monthAgo && zonedDay created == zonedDay lastLogin
zonedDay = localDay . zonedTimeToLocalTime
filterByActivities :: Settings -> User -> IO FilterResult
@@ -39,5 +55,5 @@ filterByActivities settings user = getActivities settings user
evalActivities activities
| Just (Activity{ opType }, rest) <- Vector.uncons activities
, Vector.null rest
- , opType == "create_repo" = pure $ FilterResult user SuspiciousFilter
+ , opType == "create_repo" = pure $ FilterResult user FailedFilter
evalActivities _ = pure $ FilterResult user PassFilter