tea-cleaner: Configure word lists
This commit is contained in:
parent
a4c56fb432
commit
2dd3856389
@ -2,3 +2,14 @@
|
||||
token = ""
|
||||
# The gitea instance URL beginning wth "https://".
|
||||
server = "https://"
|
||||
# List of words prohibited in the description and the website field.
|
||||
spam_words = [
|
||||
"casino",
|
||||
"gambling"
|
||||
]
|
||||
# Unusual email address exteions.
|
||||
mail_domains = [
|
||||
".online",
|
||||
".shop",
|
||||
".website"
|
||||
]
|
||||
|
@ -25,19 +25,32 @@ import TeaCleaner.Configuration
|
||||
, execParser
|
||||
)
|
||||
import Control.Monad (when)
|
||||
import Data.IORef (modifyIORef, readIORef)
|
||||
|
||||
handleResults :: Settings -> Bool -> Vector User -> IO ()
|
||||
handleResults settings liveRun users = printCount
|
||||
>> Vector.forM_ users (handleResult settings liveRun)
|
||||
printStatistics :: Settings -> IO ()
|
||||
printStatistics Settings{ statistics } =
|
||||
readIORef statistics >>= printCount
|
||||
where
|
||||
printCount =
|
||||
let count = Text.Builder.decimal $ Vector.length users
|
||||
printCount count =
|
||||
let count' = Text.Builder.decimal count
|
||||
in Text.Lazy.IO.putStrLn
|
||||
$ Text.Builder.toLazyText
|
||||
$ "Count: " <> count
|
||||
$ "Count: " <> count'
|
||||
|
||||
handleResult :: Settings -> Bool -> User -> IO ()
|
||||
handleResult settings liveRun user = Text.IO.putStrLn buildValue
|
||||
handleResults :: Settings -> Bool -> Vector FilterResult -> IO (Vector User)
|
||||
handleResults settings liveRun =
|
||||
Vector.foldM' handleResult Vector.empty
|
||||
where
|
||||
handleResult accumulator (FilterResult user FailedFilter)
|
||||
= handleFailedFilter settings liveRun user
|
||||
>> pure accumulator
|
||||
handleResult accumulator (FilterResult _ PassFilter) = pure accumulator
|
||||
handleResult accumulator (FilterResult user SuspiciousFilter) = pure
|
||||
$ Vector.snoc accumulator user
|
||||
|
||||
handleFailedFilter :: Settings -> Bool -> User -> IO ()
|
||||
handleFailedFilter settings liveRun user = Text.IO.putStrLn buildValue
|
||||
>> modifyIORef (getField @"statistics" settings) (+ 1)
|
||||
>> when liveRun (purgeUser settings user)
|
||||
where
|
||||
buildValue = getField @"username" user <> "\n"
|
||||
@ -58,14 +71,7 @@ main = execParser commandLineInfo >>= withArguments
|
||||
|
||||
withSettings :: Bool -> Settings -> IO ()
|
||||
withSettings liveRun settings = getUsers settings
|
||||
>>= Vector.foldM' foldFilterResult Vector.empty . fmap filterByUserProperties
|
||||
>>= handleResults settings liveRun . fmap (filterByUserProperties settings)
|
||||
>>= traverse (filterByActivities settings)
|
||||
>>= Vector.foldM' foldFilterResult Vector.empty
|
||||
>>= handleResults settings liveRun
|
||||
where
|
||||
foldFilterResult accumulator (FilterResult user SuspiciousFilter)
|
||||
= pure $ Vector.snoc accumulator user
|
||||
foldFilterResult accumulator (FilterResult _ PassFilter) = pure accumulator
|
||||
foldFilterResult accumulator (FilterResult user FailedFilter)
|
||||
= handleResult settings liveRun user
|
||||
>> pure accumulator
|
||||
>> printStatistics settings
|
||||
|
@ -26,35 +26,44 @@ import Options.Applicative
|
||||
import Text.URI (URI)
|
||||
import qualified Text.URI as URI
|
||||
import Data.Time (UTCTime(..), getCurrentTime)
|
||||
import Data.IORef (IORef, newIORef)
|
||||
|
||||
data ConfigFile = ConfigFile
|
||||
{ token :: StrictText
|
||||
, server :: StrictText
|
||||
, spamWords :: [StrictText]
|
||||
, mailDomains :: [StrictText]
|
||||
} deriving (Eq, Show)
|
||||
|
||||
configFileCodec :: Toml.TomlCodec ConfigFile
|
||||
configFileCodec = ConfigFile
|
||||
<$> Toml.text "token" .= getField @"token"
|
||||
<*> Toml.text "server" .= getField @"server"
|
||||
<*> Toml.arrayOf Toml._Text "spam_words" .= getField @"spamWords"
|
||||
<*> Toml.arrayOf Toml._Text "mail_domains" .= getField @"mailDomains"
|
||||
|
||||
data Settings = Settings
|
||||
{ token :: StrictText
|
||||
, server :: URI
|
||||
, now :: UTCTime
|
||||
} deriving (Eq, Show)
|
||||
, spamWords :: [StrictText]
|
||||
, mailDomains :: [StrictText]
|
||||
, statistics :: IORef Int
|
||||
} deriving Eq
|
||||
|
||||
decodeSettingsFile :: FilePath -> IO Settings
|
||||
decodeSettingsFile configPath = Toml.decodeFile configFileCodec configPath
|
||||
>>= withConfiguration
|
||||
where
|
||||
withConfiguration configFile@ConfigFile{ server } = URI.mkURI server
|
||||
>>= withServer configFile
|
||||
withServer configFile parsedServer = getCurrentTime
|
||||
>>= withTime configFile parsedServer
|
||||
withTime ConfigFile{..} parsedServer now = pure $ Settings
|
||||
decodeSettingsFile configPath = do
|
||||
ConfigFile{..} <- Toml.decodeFile configFileCodec configPath
|
||||
parsedServer <- URI.mkURI server
|
||||
now <- getCurrentTime
|
||||
ioRef <- newIORef 0
|
||||
pure $ Settings
|
||||
{ token = token
|
||||
, server = parsedServer
|
||||
, now = now
|
||||
, spamWords = spamWords
|
||||
, mailDomains = mailDomains
|
||||
, statistics = ioRef
|
||||
}
|
||||
|
||||
newtype ProgramOptions = ProgramOptions
|
||||
|
@ -6,11 +6,11 @@ module TeaCleaner.Filter
|
||||
) where
|
||||
|
||||
import qualified Data.Text as Text
|
||||
import Data.Time (LocalTime(..), ZonedTime(..))
|
||||
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
|
||||
import Data.Time (LocalTime(..), ZonedTime(..), UTCTime(..), addUTCTime)
|
||||
import qualified Data.Vector as Vector
|
||||
import TeaCleaner.Client (Activity(..), User(..), getActivities)
|
||||
import TeaCleaner.Configuration (Settings(..))
|
||||
import GHC.Records (HasField(..))
|
||||
|
||||
data UserFilter
|
||||
= PassFilter
|
||||
@ -21,15 +21,31 @@ data UserFilter
|
||||
data FilterResult = FilterResult User UserFilter
|
||||
deriving (Show)
|
||||
|
||||
filterByUserProperties :: User -> FilterResult
|
||||
filterByUserProperties user@User{ created, lastLogin, description, website }
|
||||
| zonedDay created == zonedDay lastLogin
|
||||
, zonedDay created > fromOrdinalDate 2024 1
|
||||
, zonedDay created < fromOrdinalDate 2025 17
|
||||
, not (Text.null description)
|
||||
, not (Text.null website) = FilterResult user SuspiciousFilter
|
||||
filterByUserProperties :: Settings -> User -> FilterResult
|
||||
filterByUserProperties settings user@User{ created, lastLogin }
|
||||
| noLoginSinceRegistration = FilterResult user FailedFilter
|
||||
| containsSpamWords = FilterResult user FailedFilter
|
||||
| percentEncodedWebsite = FilterResult user FailedFilter
|
||||
| hasFullDescription = FilterResult user SuspiciousFilter
|
||||
| unusualMailDomains = FilterResult user SuspiciousFilter
|
||||
| otherwise = FilterResult user PassFilter
|
||||
where
|
||||
percentEncodedWebsite = Text.elem '%' $ getField @"website" user
|
||||
unusualMailDomains =
|
||||
let predicate = (`Text.isSuffixOf` getField @"email" user)
|
||||
in any predicate (getField @"mailDomains" settings)
|
||||
containsSpamWords =
|
||||
let lowerCaseDescription = Text.toLower $ getField @"description" user
|
||||
lowerCaseWebsite = Text.toLower $ getField @"website" user
|
||||
predicate word = Text.isInfixOf word lowerCaseWebsite
|
||||
|| Text.isInfixOf word lowerCaseDescription
|
||||
in any predicate (getField @"spamWords" settings)
|
||||
hasFullDescription
|
||||
= not (Text.null $ getField @"description" user)
|
||||
&& not (Text.null $ getField @"website" user)
|
||||
noLoginSinceRegistration =
|
||||
let monthAgo = utctDay $ addUTCTime (-2592000) $ getField @"now" settings
|
||||
in zonedDay created < monthAgo && zonedDay created == zonedDay lastLogin
|
||||
zonedDay = localDay . zonedTimeToLocalTime
|
||||
|
||||
filterByActivities :: Settings -> User -> IO FilterResult
|
||||
@ -39,5 +55,5 @@ filterByActivities settings user = getActivities settings user
|
||||
evalActivities activities
|
||||
| Just (Activity{ opType }, rest) <- Vector.uncons activities
|
||||
, Vector.null rest
|
||||
, opType == "create_repo" = pure $ FilterResult user SuspiciousFilter
|
||||
, opType == "create_repo" = pure $ FilterResult user FailedFilter
|
||||
evalActivities _ = pure $ FilterResult user PassFilter
|
||||
|
Loading…
x
Reference in New Issue
Block a user