aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/umabot/bot.py12
-rw-r--r--src/umabot/rules/spam_detector.py50
-rw-r--r--src/umabot/rules/umaddit_removal.py56
3 files changed, 72 insertions, 46 deletions
diff --git a/src/umabot/bot.py b/src/umabot/bot.py
index 2519962..89b876a 100644
--- a/src/umabot/bot.py
+++ b/src/umabot/bot.py
@@ -2,8 +2,8 @@
import time
import threading
+from collections import OrderedDict
import praw
-from typing import List
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
from loguru import logger
@@ -93,7 +93,7 @@ class UmaBot:
]
# Track processed submissions to avoid processing old posts
- self.processed_submissions = set()
+ self.processed_submissions = OrderedDict()
self.initialized = False
self.logger.info(f"Bot initialized for r/{config.subreddit_name}")
@@ -140,7 +140,7 @@ class UmaBot:
if not self.initialized:
self.logger.info("Initializing bot - marking existing posts as processed")
for submission in new_submissions:
- self.processed_submissions.add(submission.id)
+ self.processed_submissions[submission.id] = None
self.initialized = True
self.logger.info(f"Bot initialized with {len(self.processed_submissions)} existing posts marked as processed")
return
@@ -150,7 +150,7 @@ class UmaBot:
for submission in new_submissions:
if submission.id not in self.processed_submissions:
truly_new_submissions.append(submission)
- self.processed_submissions.add(submission.id)
+ self.processed_submissions[submission.id] = None
if not truly_new_submissions:
self.logger.debug("No truly new submissions found")
@@ -224,6 +224,6 @@ class UmaBot:
# Keep only the last 1000 processed submissions
if len(self.processed_submissions) > 1000:
# Convert to list, keep last 1000, convert back to set
- submissions_list = list(self.processed_submissions)
- self.processed_submissions = set(submissions_list[-1000:])
+ while len(self.processed_submissions) > 1000:
+ self.processed_submissions.popitem(last=False)
self.logger.debug(f"Cleaned up processed submissions, keeping {len(self.processed_submissions)} most recent")
diff --git a/src/umabot/rules/spam_detector.py b/src/umabot/rules/spam_detector.py
index 2de48f2..861616c 100644
--- a/src/umabot/rules/spam_detector.py
+++ b/src/umabot/rules/spam_detector.py
@@ -1,8 +1,7 @@
"""Spam detection rule for limiting posts per user per day."""
-import time
from datetime import datetime, timedelta, timezone
-from typing import Dict, List
+from typing import Dict, Set
import praw.models
from .base import Rule
@@ -13,7 +12,7 @@ class SpamDetector(Rule):
def __init__(self, config):
"""Initialize the spam detector."""
super().__init__(config)
- self.user_posts: Dict[str, List[tuple[float, str]]] = {} # (timestamp, post_id)
+ self.user_posts: Dict[str, Dict[str, float]] = {}
self.max_posts = config.max_posts_per_day
def should_remove(self, submission: praw.models.Submission) -> bool:
@@ -23,22 +22,23 @@ class SpamDetector(Rule):
username = submission.author.name
current_utc = datetime.now(timezone.utc)
+ submission_utc = self._get_submission_utc(submission)
- # Clean old posts from tracking (remove posts from previous days)
self._clean_old_posts(username, current_utc)
+
+ if submission_utc.date() != current_utc.date():
+ return False
- # Count current active posts in today's UTC day
if username not in self.user_posts:
- self.user_posts[username] = []
+ self.user_posts[username] = {}
- # Filter out removed posts and count active ones
active_posts = self._get_active_posts(username, current_utc)
- post_count = len(active_posts)
-
- # Add current post to tracking
- self.user_posts[username].append((current_utc.timestamp(), submission.id))
+ if submission.id in active_posts:
+ return False
- # Check if this post exceeds the limit
+ post_count = len(active_posts)
+ self.user_posts[username][submission.id] = submission_utc.timestamp()
+
if post_count >= self.max_posts:
self.logger.info(
f"User {username} has posted {post_count + 1} active times today (UTC) "
@@ -81,28 +81,36 @@ class SpamDetector(Rule):
today_timestamp = today_start.timestamp()
# Keep only posts from today
- self.user_posts[username] = [
- (post_time, post_id) for post_time, post_id in self.user_posts[username]
+ self.user_posts[username] = {
+ post_id: post_time
+ for post_id, post_time in self.user_posts[username].items()
if post_time >= today_timestamp
- ]
+ }
- def _get_active_posts(self, username: str, current_utc: datetime) -> List[tuple[float, str]]:
+ def _get_active_posts(self, username: str, current_utc: datetime) -> Set[str]:
"""Get active (non-removed) posts for a user."""
if username not in self.user_posts:
- return []
+ return set()
# Get start of current UTC day
today_start = current_utc.replace(hour=0, minute=0, second=0, microsecond=0)
today_timestamp = today_start.timestamp()
- active_posts = []
- for post_time, post_id in self.user_posts[username]:
+ active_posts = set()
+ for post_id, post_time in self.user_posts[username].items():
if post_time >= today_timestamp:
- # Check if the post is still active (not removed)
if self._is_post_active(post_id):
- active_posts.append((post_time, post_id))
+ active_posts.add(post_id)
return active_posts
+
+ def _get_submission_utc(self, submission: praw.models.Submission) -> datetime:
+ """Get the submission timestamp in UTC."""
+ created_utc = getattr(submission, "created_utc", None)
+ if created_utc is None:
+ return datetime.now(timezone.utc)
+
+ return datetime.fromtimestamp(created_utc, timezone.utc)
def _is_post_active(self, post_id: str) -> bool:
"""Check if a post is still active (not removed) by checking its status."""
diff --git a/src/umabot/rules/umaddit_removal.py b/src/umabot/rules/umaddit_removal.py
index 759c784..91562d4 100644
--- a/src/umabot/rules/umaddit_removal.py
+++ b/src/umabot/rules/umaddit_removal.py
@@ -1,4 +1,4 @@
-"""Rule to remove posts containing umaddit references."""
+"""Rule to remove posts containing umaddit and umagusher references."""
import re
import praw.models
@@ -6,25 +6,31 @@ from .base import Rule
class UmadditRemovalRule(Rule):
- """Removes posts containing umaddit subreddit references (r/umaddit, /umaddit)."""
+ """Removes posts containing umaddit or umagusher subreddit references (r/umaddit, /umaddit, r/umagusher, /umagusher)."""
def __init__(self, config):
- """Initialize the umaddit removal rule."""
+ """Initialize the umaddit/umagusher removal rule."""
super().__init__(config)
- # Regex pattern to match subreddit references to umaddit
- # Matches: r/umaddit, /umaddit (case-insensitive)
- # Word boundary after umaddit ensures we don't match "umaddit123" or similar
+ # Regex patterns to match subreddit references
+ # Umaddit: only matches with subreddit prefix (r/umaddit, /umaddit)
+ # Umagusher: matches with or without prefix, and handles whitespace variations (uma gusher, uma-gusher, etc.)
self.umaddit_pattern = re.compile(r'(?:r/|/)umaddit\b', re.IGNORECASE)
+ # Match umagusher with prefix, without prefix, or with whitespace variations
+ # Pattern allows: r/umagusher, /umagusher, umagusher, uma gusher, uma-gusher, uma_gusher, umagusher123
+ # Uses negative lookbehind to prevent false matches like "forumagusher"
+ # No word boundary at end to catch variations like "umagusher123"
+ self.umagusher_pattern = re.compile(r'(?<![a-z])(?:r/|/)?uma[\s\-_]?gusher', re.IGNORECASE)
def should_remove(self, submission: praw.models.Submission) -> bool:
- """Check if a post contains umaddit subreddit references."""
+ """Check if a post contains umaddit or umagusher subreddit references."""
if not submission.author:
return False
- # Check if the post contains umaddit subreddit references
- if self._contains_umaddit_reference(submission):
+ # Check if the post contains umaddit or umagusher subreddit references
+ reference_type = self._contains_forbidden_reference(submission)
+ if reference_type:
self.logger.info(
- f"Post by {submission.author.name} silently removed for containing umaddit subreddit reference "
+ f"Post by {submission.author.name} silently removed for containing {reference_type} subreddit reference "
f"(post ID: {submission.id})"
)
return True
@@ -32,11 +38,15 @@ class UmadditRemovalRule(Rule):
return False
def get_removal_message(self, submission: praw.models.Submission) -> str:
- """Get the umaddit removal message - silent removal."""
+ """Get the umaddit/umagusher removal message - silent removal."""
return "" # Silent removal - no message sent
- def _contains_umaddit_reference(self, submission: praw.models.Submission) -> bool:
- """Check if a submission contains any umaddit subreddit references."""
+ def _contains_forbidden_reference(self, submission: praw.models.Submission) -> str:
+ """Check if a submission contains any umaddit or umagusher subreddit references.
+
+ Returns:
+ str: The type of reference found ('umaddit' or 'umagusher'), or empty string if none found.
+ """
try:
# Get all text content to check
content_to_check = []
@@ -53,19 +63,27 @@ class UmadditRemovalRule(Rule):
if submission.url:
content_to_check.append(submission.url)
- # Check each piece of content for umaddit subreddit references
+ # Check each piece of content for forbidden subreddit references
for content in content_to_check:
if content:
- # Use regex search
+ # Check for umaddit references
match = self.umaddit_pattern.search(content)
if match:
self.logger.info(
f"Found umaddit subreddit reference '{match.group()}' in post {submission.id}"
)
- return True
+ return "umaddit"
+
+ # Check for umagusher references
+ match = self.umagusher_pattern.search(content)
+ if match:
+ self.logger.info(
+ f"Found umagusher subreddit reference '{match.group()}' in post {submission.id}"
+ )
+ return "umagusher"
- return False
+ return ""
except Exception as e:
- self.logger.error(f"Error checking umaddit references for submission {submission.id}: {e}")
- return False \ No newline at end of file
+ self.logger.error(f"Error checking forbidden references for submission {submission.id}: {e}")
+ return "" \ No newline at end of file