tea-cleaner: Configure word lists
This commit is contained in:
parent
a4c56fb432
commit
2dd3856389
@ -2,3 +2,14 @@
|
|||||||
token = ""
|
token = ""
|
||||||
# The gitea instance URL beginning wth "https://".
|
# The gitea instance URL beginning wth "https://".
|
||||||
server = "https://"
|
server = "https://"
|
||||||
|
# List of words prohibited in the description and the website field.
|
||||||
|
spam_words = [
|
||||||
|
"casino",
|
||||||
|
"gambling"
|
||||||
|
]
|
||||||
|
# Unusual email address exteions.
|
||||||
|
mail_domains = [
|
||||||
|
".online",
|
||||||
|
".shop",
|
||||||
|
".website"
|
||||||
|
]
|
||||||
|
@ -25,19 +25,32 @@ import TeaCleaner.Configuration
|
|||||||
, execParser
|
, execParser
|
||||||
)
|
)
|
||||||
import Control.Monad (when)
|
import Control.Monad (when)
|
||||||
|
import Data.IORef (modifyIORef, readIORef)
|
||||||
|
|
||||||
handleResults :: Settings -> Bool -> Vector User -> IO ()
|
printStatistics :: Settings -> IO ()
|
||||||
handleResults settings liveRun users = printCount
|
printStatistics Settings{ statistics } =
|
||||||
>> Vector.forM_ users (handleResult settings liveRun)
|
readIORef statistics >>= printCount
|
||||||
where
|
where
|
||||||
printCount =
|
printCount count =
|
||||||
let count = Text.Builder.decimal $ Vector.length users
|
let count' = Text.Builder.decimal count
|
||||||
in Text.Lazy.IO.putStrLn
|
in Text.Lazy.IO.putStrLn
|
||||||
$ Text.Builder.toLazyText
|
$ Text.Builder.toLazyText
|
||||||
$ "Count: " <> count
|
$ "Count: " <> count'
|
||||||
|
|
||||||
handleResult :: Settings -> Bool -> User -> IO ()
|
handleResults :: Settings -> Bool -> Vector FilterResult -> IO (Vector User)
|
||||||
handleResult settings liveRun user = Text.IO.putStrLn buildValue
|
handleResults settings liveRun =
|
||||||
|
Vector.foldM' handleResult Vector.empty
|
||||||
|
where
|
||||||
|
handleResult accumulator (FilterResult user FailedFilter)
|
||||||
|
= handleFailedFilter settings liveRun user
|
||||||
|
>> pure accumulator
|
||||||
|
handleResult accumulator (FilterResult _ PassFilter) = pure accumulator
|
||||||
|
handleResult accumulator (FilterResult user SuspiciousFilter) = pure
|
||||||
|
$ Vector.snoc accumulator user
|
||||||
|
|
||||||
|
handleFailedFilter :: Settings -> Bool -> User -> IO ()
|
||||||
|
handleFailedFilter settings liveRun user = Text.IO.putStrLn buildValue
|
||||||
|
>> modifyIORef (getField @"statistics" settings) (+ 1)
|
||||||
>> when liveRun (purgeUser settings user)
|
>> when liveRun (purgeUser settings user)
|
||||||
where
|
where
|
||||||
buildValue = getField @"username" user <> "\n"
|
buildValue = getField @"username" user <> "\n"
|
||||||
@ -58,14 +71,7 @@ main = execParser commandLineInfo >>= withArguments
|
|||||||
|
|
||||||
withSettings :: Bool -> Settings -> IO ()
|
withSettings :: Bool -> Settings -> IO ()
|
||||||
withSettings liveRun settings = getUsers settings
|
withSettings liveRun settings = getUsers settings
|
||||||
>>= Vector.foldM' foldFilterResult Vector.empty . fmap filterByUserProperties
|
>>= handleResults settings liveRun . fmap (filterByUserProperties settings)
|
||||||
>>= traverse (filterByActivities settings)
|
>>= traverse (filterByActivities settings)
|
||||||
>>= Vector.foldM' foldFilterResult Vector.empty
|
|
||||||
>>= handleResults settings liveRun
|
>>= handleResults settings liveRun
|
||||||
where
|
>> printStatistics settings
|
||||||
foldFilterResult accumulator (FilterResult user SuspiciousFilter)
|
|
||||||
= pure $ Vector.snoc accumulator user
|
|
||||||
foldFilterResult accumulator (FilterResult _ PassFilter) = pure accumulator
|
|
||||||
foldFilterResult accumulator (FilterResult user FailedFilter)
|
|
||||||
= handleResult settings liveRun user
|
|
||||||
>> pure accumulator
|
|
||||||
|
@ -26,35 +26,44 @@ import Options.Applicative
|
|||||||
import Text.URI (URI)
|
import Text.URI (URI)
|
||||||
import qualified Text.URI as URI
|
import qualified Text.URI as URI
|
||||||
import Data.Time (UTCTime(..), getCurrentTime)
|
import Data.Time (UTCTime(..), getCurrentTime)
|
||||||
|
import Data.IORef (IORef, newIORef)
|
||||||
|
|
||||||
data ConfigFile = ConfigFile
|
data ConfigFile = ConfigFile
|
||||||
{ token :: StrictText
|
{ token :: StrictText
|
||||||
, server :: StrictText
|
, server :: StrictText
|
||||||
|
, spamWords :: [StrictText]
|
||||||
|
, mailDomains :: [StrictText]
|
||||||
} deriving (Eq, Show)
|
} deriving (Eq, Show)
|
||||||
|
|
||||||
configFileCodec :: Toml.TomlCodec ConfigFile
|
configFileCodec :: Toml.TomlCodec ConfigFile
|
||||||
configFileCodec = ConfigFile
|
configFileCodec = ConfigFile
|
||||||
<$> Toml.text "token" .= getField @"token"
|
<$> Toml.text "token" .= getField @"token"
|
||||||
<*> Toml.text "server" .= getField @"server"
|
<*> Toml.text "server" .= getField @"server"
|
||||||
|
<*> Toml.arrayOf Toml._Text "spam_words" .= getField @"spamWords"
|
||||||
|
<*> Toml.arrayOf Toml._Text "mail_domains" .= getField @"mailDomains"
|
||||||
|
|
||||||
data Settings = Settings
|
data Settings = Settings
|
||||||
{ token :: StrictText
|
{ token :: StrictText
|
||||||
, server :: URI
|
, server :: URI
|
||||||
, now :: UTCTime
|
, now :: UTCTime
|
||||||
} deriving (Eq, Show)
|
, spamWords :: [StrictText]
|
||||||
|
, mailDomains :: [StrictText]
|
||||||
|
, statistics :: IORef Int
|
||||||
|
} deriving Eq
|
||||||
|
|
||||||
decodeSettingsFile :: FilePath -> IO Settings
|
decodeSettingsFile :: FilePath -> IO Settings
|
||||||
decodeSettingsFile configPath = Toml.decodeFile configFileCodec configPath
|
decodeSettingsFile configPath = do
|
||||||
>>= withConfiguration
|
ConfigFile{..} <- Toml.decodeFile configFileCodec configPath
|
||||||
where
|
parsedServer <- URI.mkURI server
|
||||||
withConfiguration configFile@ConfigFile{ server } = URI.mkURI server
|
now <- getCurrentTime
|
||||||
>>= withServer configFile
|
ioRef <- newIORef 0
|
||||||
withServer configFile parsedServer = getCurrentTime
|
pure $ Settings
|
||||||
>>= withTime configFile parsedServer
|
|
||||||
withTime ConfigFile{..} parsedServer now = pure $ Settings
|
|
||||||
{ token = token
|
{ token = token
|
||||||
, server = parsedServer
|
, server = parsedServer
|
||||||
, now = now
|
, now = now
|
||||||
|
, spamWords = spamWords
|
||||||
|
, mailDomains = mailDomains
|
||||||
|
, statistics = ioRef
|
||||||
}
|
}
|
||||||
|
|
||||||
newtype ProgramOptions = ProgramOptions
|
newtype ProgramOptions = ProgramOptions
|
||||||
|
@ -6,11 +6,11 @@ module TeaCleaner.Filter
|
|||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
import Data.Time (LocalTime(..), ZonedTime(..))
|
import Data.Time (LocalTime(..), ZonedTime(..), UTCTime(..), addUTCTime)
|
||||||
import Data.Time.Calendar.OrdinalDate (fromOrdinalDate)
|
|
||||||
import qualified Data.Vector as Vector
|
import qualified Data.Vector as Vector
|
||||||
import TeaCleaner.Client (Activity(..), User(..), getActivities)
|
import TeaCleaner.Client (Activity(..), User(..), getActivities)
|
||||||
import TeaCleaner.Configuration (Settings(..))
|
import TeaCleaner.Configuration (Settings(..))
|
||||||
|
import GHC.Records (HasField(..))
|
||||||
|
|
||||||
data UserFilter
|
data UserFilter
|
||||||
= PassFilter
|
= PassFilter
|
||||||
@ -21,15 +21,31 @@ data UserFilter
|
|||||||
data FilterResult = FilterResult User UserFilter
|
data FilterResult = FilterResult User UserFilter
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
filterByUserProperties :: User -> FilterResult
|
filterByUserProperties :: Settings -> User -> FilterResult
|
||||||
filterByUserProperties user@User{ created, lastLogin, description, website }
|
filterByUserProperties settings user@User{ created, lastLogin }
|
||||||
| zonedDay created == zonedDay lastLogin
|
| noLoginSinceRegistration = FilterResult user FailedFilter
|
||||||
, zonedDay created > fromOrdinalDate 2024 1
|
| containsSpamWords = FilterResult user FailedFilter
|
||||||
, zonedDay created < fromOrdinalDate 2025 17
|
| percentEncodedWebsite = FilterResult user FailedFilter
|
||||||
, not (Text.null description)
|
| hasFullDescription = FilterResult user SuspiciousFilter
|
||||||
, not (Text.null website) = FilterResult user SuspiciousFilter
|
| unusualMailDomains = FilterResult user SuspiciousFilter
|
||||||
| otherwise = FilterResult user PassFilter
|
| otherwise = FilterResult user PassFilter
|
||||||
where
|
where
|
||||||
|
percentEncodedWebsite = Text.elem '%' $ getField @"website" user
|
||||||
|
unusualMailDomains =
|
||||||
|
let predicate = (`Text.isSuffixOf` getField @"email" user)
|
||||||
|
in any predicate (getField @"mailDomains" settings)
|
||||||
|
containsSpamWords =
|
||||||
|
let lowerCaseDescription = Text.toLower $ getField @"description" user
|
||||||
|
lowerCaseWebsite = Text.toLower $ getField @"website" user
|
||||||
|
predicate word = Text.isInfixOf word lowerCaseWebsite
|
||||||
|
|| Text.isInfixOf word lowerCaseDescription
|
||||||
|
in any predicate (getField @"spamWords" settings)
|
||||||
|
hasFullDescription
|
||||||
|
= not (Text.null $ getField @"description" user)
|
||||||
|
&& not (Text.null $ getField @"website" user)
|
||||||
|
noLoginSinceRegistration =
|
||||||
|
let monthAgo = utctDay $ addUTCTime (-2592000) $ getField @"now" settings
|
||||||
|
in zonedDay created < monthAgo && zonedDay created == zonedDay lastLogin
|
||||||
zonedDay = localDay . zonedTimeToLocalTime
|
zonedDay = localDay . zonedTimeToLocalTime
|
||||||
|
|
||||||
filterByActivities :: Settings -> User -> IO FilterResult
|
filterByActivities :: Settings -> User -> IO FilterResult
|
||||||
@ -39,5 +55,5 @@ filterByActivities settings user = getActivities settings user
|
|||||||
evalActivities activities
|
evalActivities activities
|
||||||
| Just (Activity{ opType }, rest) <- Vector.uncons activities
|
| Just (Activity{ opType }, rest) <- Vector.uncons activities
|
||||||
, Vector.null rest
|
, Vector.null rest
|
||||||
, opType == "create_repo" = pure $ FilterResult user SuspiciousFilter
|
, opType == "create_repo" = pure $ FilterResult user FailedFilter
|
||||||
evalActivities _ = pure $ FilterResult user PassFilter
|
evalActivities _ = pure $ FilterResult user PassFilter
|
||||||
|
Loading…
x
Reference in New Issue
Block a user