aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/umabot/bot.py12
-rw-r--r--src/umabot/rules/spam_detector.py50
2 files changed, 35 insertions, 27 deletions
diff --git a/src/umabot/bot.py b/src/umabot/bot.py
index 2519962..89b876a 100644
--- a/src/umabot/bot.py
+++ b/src/umabot/bot.py
@@ -2,8 +2,8 @@
import time
import threading
+from collections import OrderedDict
import praw
-from typing import List
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
from loguru import logger
@@ -93,7 +93,7 @@ class UmaBot:
]
# Track processed submissions to avoid processing old posts
- self.processed_submissions = set()
+ self.processed_submissions = OrderedDict()
self.initialized = False
self.logger.info(f"Bot initialized for r/{config.subreddit_name}")
@@ -140,7 +140,7 @@ class UmaBot:
if not self.initialized:
self.logger.info("Initializing bot - marking existing posts as processed")
for submission in new_submissions:
- self.processed_submissions.add(submission.id)
+ self.processed_submissions[submission.id] = None
self.initialized = True
self.logger.info(f"Bot initialized with {len(self.processed_submissions)} existing posts marked as processed")
return
@@ -150,7 +150,7 @@ class UmaBot:
for submission in new_submissions:
if submission.id not in self.processed_submissions:
truly_new_submissions.append(submission)
- self.processed_submissions.add(submission.id)
+ self.processed_submissions[submission.id] = None
if not truly_new_submissions:
self.logger.debug("No truly new submissions found")
@@ -224,6 +224,6 @@ class UmaBot:
# Keep only the last 1000 processed submissions
if len(self.processed_submissions) > 1000:
# Convert to list, keep last 1000, convert back to set
- submissions_list = list(self.processed_submissions)
- self.processed_submissions = set(submissions_list[-1000:])
+ while len(self.processed_submissions) > 1000:
+ self.processed_submissions.popitem(last=False)
self.logger.debug(f"Cleaned up processed submissions, keeping {len(self.processed_submissions)} most recent")
diff --git a/src/umabot/rules/spam_detector.py b/src/umabot/rules/spam_detector.py
index 2de48f2..861616c 100644
--- a/src/umabot/rules/spam_detector.py
+++ b/src/umabot/rules/spam_detector.py
@@ -1,8 +1,7 @@
"""Spam detection rule for limiting posts per user per day."""
-import time
from datetime import datetime, timedelta, timezone
-from typing import Dict, List
+from typing import Dict, Set
import praw.models
from .base import Rule
@@ -13,7 +12,7 @@ class SpamDetector(Rule):
def __init__(self, config):
"""Initialize the spam detector."""
super().__init__(config)
- self.user_posts: Dict[str, List[tuple[float, str]]] = {} # (timestamp, post_id)
+ self.user_posts: Dict[str, Dict[str, float]] = {}
self.max_posts = config.max_posts_per_day
def should_remove(self, submission: praw.models.Submission) -> bool:
@@ -23,22 +22,23 @@ class SpamDetector(Rule):
username = submission.author.name
current_utc = datetime.now(timezone.utc)
+ submission_utc = self._get_submission_utc(submission)
- # Clean old posts from tracking (remove posts from previous days)
self._clean_old_posts(username, current_utc)
+
+ if submission_utc.date() != current_utc.date():
+ return False
- # Count current active posts in today's UTC day
if username not in self.user_posts:
- self.user_posts[username] = []
+ self.user_posts[username] = {}
- # Filter out removed posts and count active ones
active_posts = self._get_active_posts(username, current_utc)
- post_count = len(active_posts)
-
- # Add current post to tracking
- self.user_posts[username].append((current_utc.timestamp(), submission.id))
+ if submission.id in active_posts:
+ return False
- # Check if this post exceeds the limit
+ post_count = len(active_posts)
+ self.user_posts[username][submission.id] = submission_utc.timestamp()
+
if post_count >= self.max_posts:
self.logger.info(
f"User {username} has posted {post_count + 1} active times today (UTC) "
@@ -81,28 +81,36 @@ class SpamDetector(Rule):
today_timestamp = today_start.timestamp()
# Keep only posts from today
- self.user_posts[username] = [
- (post_time, post_id) for post_time, post_id in self.user_posts[username]
+ self.user_posts[username] = {
+ post_id: post_time
+ for post_id, post_time in self.user_posts[username].items()
if post_time >= today_timestamp
- ]
+ }
- def _get_active_posts(self, username: str, current_utc: datetime) -> List[tuple[float, str]]:
+ def _get_active_posts(self, username: str, current_utc: datetime) -> Set[str]:
"""Get active (non-removed) posts for a user."""
if username not in self.user_posts:
- return []
+ return set()
# Get start of current UTC day
today_start = current_utc.replace(hour=0, minute=0, second=0, microsecond=0)
today_timestamp = today_start.timestamp()
- active_posts = []
- for post_time, post_id in self.user_posts[username]:
+ active_posts = set()
+ for post_id, post_time in self.user_posts[username].items():
if post_time >= today_timestamp:
- # Check if the post is still active (not removed)
if self._is_post_active(post_id):
- active_posts.append((post_time, post_id))
+ active_posts.add(post_id)
return active_posts
+
+ def _get_submission_utc(self, submission: praw.models.Submission) -> datetime:
+ """Get the submission timestamp in UTC."""
+ created_utc = getattr(submission, "created_utc", None)
+ if created_utc is None:
+ return datetime.now(timezone.utc)
+
+ return datetime.fromtimestamp(created_utc, timezone.utc)
def _is_post_active(self, post_id: str) -> bool:
"""Check if a post is still active (not removed) by checking its status."""