From 4eb16c473424f6844b1bb2cdc5d3cb5aff916d12 Mon Sep 17 00:00:00 2001 From: Fuwn Date: Wed, 17 Sep 2025 19:28:59 -0700 Subject: feat(rules): Add intelligent roleplay moderator rule --- .gitignore | 2 + README.md | 31 +- TEST_MODERATOR_README.md | 153 +++++ env.example | 4 + pyproject.toml | 1 + requirements-dev.lock | 37 ++ requirements.lock | 37 ++ requirements.txt | 1 + src/umabot/bot.py | 9 +- src/umabot/config.py | 6 +- src/umabot/rules/__init__.py | 3 +- src/umabot/rules/intelligent_roleplay_moderator.py | 351 +++++++++++ test_moderator.py | 693 +++++++++++++++++++++ 13 files changed, 1300 insertions(+), 28 deletions(-) create mode 100644 TEST_MODERATOR_README.md create mode 100644 src/umabot/rules/intelligent_roleplay_moderator.py create mode 100755 test_moderator.py diff --git a/.gitignore b/.gitignore index 339d6aa..7068203 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ wheels/ # Development *.log *.env +/real_test_posts +/test_posts diff --git a/README.md b/README.md index f6dd88a..0780b2e 100644 --- a/README.md +++ b/README.md @@ -5,34 +5,20 @@ A modular Reddit bot for automated post moderation built with Python and PRAW. ## Features - **Spam Detection**: Automatically removes posts from users who post more than 3 times in 24 hours -- **Surge-Based Roleplay Limiter**: Dynamically adjusts roleplay post limits based on subreddit activity +- **Intelligent Roleplay Moderator**: Uses GPT-5-nano to evaluate roleplay posts and make smart moderation decisions - **Modular Design**: Easy to add new moderation rules - **Configurable Messages**: Customizable removal messages - **Dry Run Mode**: Test the bot without actually removing posts - **Comprehensive Logging**: Detailed logs for monitoring and debugging -### Roleplay Limiting Options +### Intelligent Roleplay Moderation -The bot supports two roleplay limiting modes: +The bot features an advanced AI-powered roleplay moderator that: -#### Surge-Based Limiting (Default) - -Intelligent surge detection that adjusts limits based on subreddit activity: - -- **Normal Activity** (< 20 roleplay posts): Users can post up to 5 roleplay posts per time window -- **Moderate Surge** (20+ roleplay posts): Users limited to 3 roleplay posts per time window -- **High Surge** (40+ roleplay posts): Users limited to 1 roleplay post per time window -- **Extreme Surge** (60+ roleplay posts): All roleplay posts temporarily blocked - -#### Static Limiting (Optional) - -Fixed limit that doesn't change based on activity: - -- **Fixed Limit**: Users can post a configurable number of roleplay posts per time window (default: 1) -- **Simple**: No surge detection, just enforces the same limit for all users -- **Predictable**: Consistent behavior regardless of subreddit activity - -To switch to static limiting, uncomment the `StaticRoleplayLimiter` line in `src/umabot/bot.py` and comment out the `RoleplayLimiter` line. +- **Auto-Flairing**: Automatically changes roleplay posts to "Art" flair when they're primarily showcasing artwork +- **Quality Control**: Removes low-effort roleplay posts while preserving high-quality content +- **Smart Evaluation**: Uses GPT-5-nano to analyze post content, creativity, effort, and engagement potential +- **User Communication**: Sends detailed mod mail explaining decisions and providing guidance ## Quick Start @@ -68,7 +54,8 @@ REDDIT_CLIENT_ID=your_client_id_here REDDIT_CLIENT_SECRET=your_client_secret_here REDDIT_USERNAME=your_reddit_username REDDIT_PASSWORD=your_reddit_password -REDDIT_USER_AGENT=UmaBot/0.1.0 +# OpenAI API Credentials +OPENAI_API_KEY=your_openai_api_key_here # Subreddit Configuration SUBREDDIT_NAME=your_subreddit_name diff --git a/TEST_MODERATOR_README.md b/TEST_MODERATOR_README.md new file mode 100644 index 0000000..c2eb94c --- /dev/null +++ b/TEST_MODERATOR_README.md @@ -0,0 +1,153 @@ +# Test Moderator CLI + +A command-line tool for testing the intelligent roleplay moderator against text files before deploying it to production. + +## Features + +- **File Testing**: Test individual text files or entire directories +- **Mock Submissions**: Creates realistic mock Reddit submissions for testing +- **AI Evaluation**: Uses the same GPT-5-nano evaluation logic as the production bot +- **Detailed Results**: Shows AI reasoning, confidence scores, and proposed actions +- **Mod Mail Preview**: Shows what mod mail messages would be sent to users + +## Usage + +### Prerequisites + +1. **OpenAI API Key**: You need a valid OpenAI API key with access to GPT-5-nano +2. **Python Dependencies**: Install the required packages: + ```bash + pip install openai + ``` + +### Basic Usage + +```bash +# Test a single file +python test_moderator.py --file sample_post.txt + +# Test all .txt files in a directory +python test_moderator.py --directory test_posts/ + +# Test with custom author name +python test_moderator.py --file post.txt --author myuser + +# Use API key from command line +python test_moderator.py --file post.txt --api-key sk-your-key-here +``` + +### Environment Variables + +The tool automatically loads configuration from a `.env` file in the project root. Create a `.env` file with: + +```bash +# OpenAI API Credentials +OPENAI_API_KEY=sk-your-key-here +``` + +Alternatively, you can set environment variables directly: + +```bash +export OPENAI_API_KEY="sk-your-key-here" +``` + +### Command Line Options + +- `--file, -f`: Test a single text file +- `--directory, -d`: Test all .txt files in a directory +- `--author, -a`: Author name for mock submissions (default: testuser) +- `--api-key, -k`: OpenAI API key (or set OPENAI_API_KEY env var) +- `--verbose, -v`: Show detailed output + +## Sample Test Files + +The `test_posts/` directory contains sample files demonstrating different types of content: + +- **`artwork_showcase.txt`**: Art-focused post that should be re-flaired +- **`high_quality_roleplay.txt`**: Well-written roleplay that should be allowed +- **`low_effort_post.txt`**: Minimal content that should be removed +- **`basic_roleplay.txt`**: Simple roleplay that might be borderline +- **`poetic_roleplay.txt`**: Creative, high-quality roleplay content + +## Understanding Results + +### AI Evaluation Output + +The tool shows: + +- **Should be Art**: Whether the post should be re-flaired as Art +- **Is Low Effort**: Whether the post should be removed for low effort +- **Confidence**: AI confidence score (0.0 to 1.0) +- **Reasoning**: Detailed explanation of the AI's decision + +### Actions + +- **šŸŽØ Change flair to Art**: Post will be re-flaired and user notified +- **šŸ—‘ļø Remove post (low effort)**: Post will be removed and user notified +- **āœ… Allow post**: Post passes all checks and will be allowed + +### Mod Mail Preview + +The tool shows exactly what mod mail messages would be sent to users, including: + +- Subject lines +- Message content +- Reasoning explanations + +## Example Output + +``` +1. test_posts/artwork_showcase.txt +------------------------------------------------------------ +Title: Artwork Showcase +Word Count: 45 +Has Media: True +Media Type: text +Content Preview: Check out this amazing artwork I drew of Tokai Teio! She's so beautiful and I spent hours getting every detail perfect... + +šŸ¤– AI Evaluation: + Should be Art: True + Is Low Effort: False + Confidence: 0.95 + Reasoning: This post is primarily showcasing artwork with detailed description of the art process, making it more suitable for the Art flair than Roleplay. + +šŸ“‹ Actions: + šŸŽØ Change flair to Art + +šŸ“§ Mod Mail (Art Flair Change): + Subject: Your post flair has been changed to Art + Message: Your roleplay post has been automatically re-flaired as 'Art' because it appears to be primarily showcasing artwork or visual content rather than roleplay. + Reasoning: This post is primarily showcasing artwork with detailed description of the art process, making it more suitable for the Art flair than Roleplay. +``` + +## Setup + +1. **Install Dependencies**: `pip install openai python-dotenv` +2. **Create .env File**: Add `OPENAI_API_KEY=sk-your-key-here` to your `.env` file +3. **Run Tests**: `python test_moderator.py --directory test_posts/` + +## Tips for Testing + +1. **Test Edge Cases**: Include borderline content to see how the AI handles ambiguous cases +2. **Check Confidence Scores**: Low confidence scores indicate the AI is uncertain +3. **Review Reasoning**: The AI's explanations help understand its decision-making process +4. **Test Different Lengths**: Try very short posts, very long posts, and everything in between +5. **Include Media References**: Test posts that mention images, videos, or other media + +## Troubleshooting + +### Common Issues + +- **API Key Error**: Make sure your OpenAI API key is valid and has access to GPT-5-nano +- **File Not Found**: Check that file paths are correct and files exist +- **JSON Parse Error**: The AI sometimes returns malformed JSON; the tool handles this gracefully +- **Rate Limiting**: If you hit OpenAI rate limits, wait a moment and try again + +### Getting Help + +If you encounter issues: + +1. Check that all dependencies are installed +2. Verify your OpenAI API key is working +3. Ensure test files are valid text files +4. Check the error messages for specific guidance diff --git a/env.example b/env.example index 9591db2..d7b14f7 100644 --- a/env.example +++ b/env.example @@ -6,6 +6,10 @@ REDDIT_USERNAME=your_reddit_username REDDIT_PASSWORD=your_reddit_password REDDIT_USER_AGENT=UmaBot/0.1.0 +# OpenAI API Credentials +# Get this from https://platform.openai.com/api-keys +OPENAI_API_KEY=your_openai_api_key_here + # Subreddit Configuration # The subreddit where the bot will operate (without the r/ prefix) SUBREDDIT_NAME=your_subreddit_name diff --git a/pyproject.toml b/pyproject.toml index 6577e62..a4c42cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "python-dotenv>=1.0.0", "schedule>=1.2.0", "loguru>=0.7.0", + "openai>=1.108.0", ] readme = "README.md" requires-python = ">= 3.8" diff --git a/requirements-dev.lock b/requirements-dev.lock index 4e9eaa5..9f68ac8 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -10,24 +10,45 @@ # universal: false -e file:. +annotated-types==0.7.0 + # via pydantic +anyio==4.10.0 + # via httpx + # via openai black==25.1.0 certifi==2025.8.3 + # via httpcore + # via httpx # via requests charset-normalizer==3.4.3 # via requests click==8.2.1 # via black +distro==1.9.0 + # via openai flake8==7.3.0 +h11==0.16.0 + # via httpcore +httpcore==1.0.9 + # via httpx +httpx==0.28.1 + # via openai idna==3.10 + # via anyio + # via httpx # via requests iniconfig==2.1.0 # via pytest +jiter==0.11.0 + # via openai loguru==0.7.3 # via umabot mccabe==0.7.0 # via flake8 mypy-extensions==1.1.0 # via black +openai==1.108.0 + # via umabot packaging==25.0 # via black # via pytest @@ -43,6 +64,10 @@ prawcore==2.4.0 # via praw pycodestyle==2.14.0 # via flake8 +pydantic==2.11.9 + # via openai +pydantic-core==2.33.2 + # via pydantic pyflakes==3.4.0 # via flake8 pygments==2.19.2 @@ -55,6 +80,18 @@ requests==2.32.5 # via update-checker schedule==1.2.2 # via umabot +sniffio==1.3.1 + # via anyio + # via openai +tqdm==4.67.1 + # via openai +typing-extensions==4.15.0 + # via openai + # via pydantic + # via pydantic-core + # via typing-inspection +typing-inspection==0.4.1 + # via pydantic update-checker==0.18.0 # via praw urllib3==2.5.0 diff --git a/requirements.lock b/requirements.lock index aa3738c..eb0a127 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,18 +10,43 @@ # universal: false -e file:. +annotated-types==0.7.0 + # via pydantic +anyio==4.10.0 + # via httpx + # via openai certifi==2025.8.3 + # via httpcore + # via httpx # via requests charset-normalizer==3.4.3 # via requests +distro==1.9.0 + # via openai +h11==0.16.0 + # via httpcore +httpcore==1.0.9 + # via httpx +httpx==0.28.1 + # via openai idna==3.10 + # via anyio + # via httpx # via requests +jiter==0.11.0 + # via openai loguru==0.7.3 # via umabot +openai==1.108.0 + # via umabot praw==7.8.1 # via umabot prawcore==2.4.0 # via praw +pydantic==2.11.9 + # via openai +pydantic-core==2.33.2 + # via pydantic python-dotenv==1.1.1 # via umabot requests==2.32.5 @@ -29,6 +54,18 @@ requests==2.32.5 # via update-checker schedule==1.2.2 # via umabot +sniffio==1.3.1 + # via anyio + # via openai +tqdm==4.67.1 + # via openai +typing-extensions==4.15.0 + # via openai + # via pydantic + # via pydantic-core + # via typing-inspection +typing-inspection==0.4.1 + # via pydantic update-checker==0.18.0 # via praw urllib3==2.5.0 diff --git a/requirements.txt b/requirements.txt index 2ea9c0c..9eec680 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ praw>=7.7.0 python-dotenv>=1.0.0 schedule>=1.2.0 loguru>=0.7.0 +openai>=1.0.0 diff --git a/src/umabot/bot.py b/src/umabot/bot.py index 87068a5..510ca7a 100644 --- a/src/umabot/bot.py +++ b/src/umabot/bot.py @@ -9,7 +9,7 @@ from socketserver import ThreadingMixIn from loguru import logger from .config import Config -from .rules import SpamDetector, RoleplayLimiter, RoleplayMediaRequiredRule, RoleplayWordCountRule +from .rules import SpamDetector, IntelligentRoleplayModerator # from .rules import StaticRoleplayLimiter # Disabled by default - uncomment to use static limiting @@ -76,9 +76,10 @@ class UmaBot: # Initialize rules self.rules = [ SpamDetector(config), - RoleplayMediaRequiredRule(config), # Requires media for roleplay posts - RoleplayWordCountRule(config), # Sends short roleplay posts to mod queue - RoleplayLimiter(config, self.subreddit) # Surge-based roleplay limiter (default) + IntelligentRoleplayModerator(config, self.subreddit), # Intelligent roleplay moderation + # RoleplayMediaRequiredRule(config), # Disabled - using intelligent moderator + # RoleplayWordCountRule(config), # Disabled - using intelligent moderator + # RoleplayLimiter(config, self.subreddit) # Disabled - using intelligent moderator # StaticRoleplayLimiter(config) # Uncomment to use static roleplay limiting instead ] diff --git a/src/umabot/config.py b/src/umabot/config.py index f49f936..ab03a25 100644 --- a/src/umabot/config.py +++ b/src/umabot/config.py @@ -19,6 +19,9 @@ class Config: password: str user_agent: str + # OpenAI API credentials + openai_api_key: str + # Subreddit configuration subreddit_name: str @@ -42,6 +45,7 @@ class Config: username=os.getenv("REDDIT_USERNAME", ""), password=os.getenv("REDDIT_PASSWORD", ""), user_agent=os.getenv("REDDIT_USER_AGENT", "UmaBot/0.1.0"), + openai_api_key=os.getenv("OPENAI_API_KEY", ""), subreddit_name=os.getenv("SUBREDDIT_NAME", ""), roleplay_message=os.getenv( "ROLEPLAY_MESSAGE", @@ -59,7 +63,7 @@ class Config: """Validate that all required configuration is present.""" required_fields = [ "client_id", "client_secret", "username", - "password", "subreddit_name" + "password", "subreddit_name", "openai_api_key" ] missing_fields = [] diff --git a/src/umabot/rules/__init__.py b/src/umabot/rules/__init__.py index 73f84ec..01827ce 100644 --- a/src/umabot/rules/__init__.py +++ b/src/umabot/rules/__init__.py @@ -5,5 +5,6 @@ from .spam_detector import SpamDetector from .roleplay_limiter import RoleplayLimiter, StaticRoleplayLimiter from .roleplay_media_required import RoleplayMediaRequiredRule from .roleplay_word_count import RoleplayWordCountRule +from .intelligent_roleplay_moderator import IntelligentRoleplayModerator -__all__ = ["Rule", "SpamDetector", "RoleplayLimiter", "StaticRoleplayLimiter", "RoleplayMediaRequiredRule", "RoleplayWordCountRule"] +__all__ = ["Rule", "SpamDetector", "RoleplayLimiter", "StaticRoleplayLimiter", "RoleplayMediaRequiredRule", "RoleplayWordCountRule", "IntelligentRoleplayModerator"] diff --git a/src/umabot/rules/intelligent_roleplay_moderator.py b/src/umabot/rules/intelligent_roleplay_moderator.py new file mode 100644 index 0000000..7343df5 --- /dev/null +++ b/src/umabot/rules/intelligent_roleplay_moderator.py @@ -0,0 +1,351 @@ +"""Intelligent roleplay moderator using GPT-5-nano.""" + +import json +import praw.models +from openai import OpenAI +from .base import Rule + + +class IntelligentRoleplayModerator(Rule): + """Intelligent roleplay moderator that evaluates post quality and appropriateness.""" + + def __init__(self, config, subreddit): + """Initialize the intelligent roleplay moderator.""" + super().__init__(config) + self.subreddit = subreddit + self.roleplay_flair_template_id = "311f0024-8302-11f0-9b41-46c005ad843c" + self.art_flair_template_id = "dd4589ba-7c43-11f0-8ef0-22eefb7f5b84" + + # Initialize OpenAI client + self.openai_client = OpenAI(api_key=config.openai_api_key) + + # Evaluation prompt + self.evaluation_prompt = """ +You are an expert moderator for a roleplay subreddit. Your job is to evaluate roleplay posts and determine: + +1. Whether this post would be better flaired as "Art" instead of "Roleplay" +2. Whether this is low-effort content that should be removed + +For each post, respond with a JSON object containing: +{{ + "should_be_art": boolean, + "is_low_effort": boolean, + "confidence": float (0.0 to 1.0), + "reasoning": "Brief explanation of your decision" +}} + +Guidelines: +- A post should be flaired as "Art" if it's primarily showcasing artwork, images, or visual content with minimal roleplay text +- A post is "low effort" if it lacks substance, creativity, or meaningful roleplay content +- Consider factors like: word count, creativity, effort, engagement potential, originality +- Be strict but fair - err on the side of allowing content unless it's clearly low quality +- High confidence (0.8+) for clear cases, lower confidence for borderline cases + +Post to evaluate: +Title: {title} +Content: {content} +Has Media: {has_media} +Media Type: {media_type} +Word Count: {word_count} +""" + + def should_remove(self, submission: praw.models.Submission) -> bool: + """Evaluate a roleplay post and take appropriate action.""" + if not submission.author: + return False + + # Check if this is a roleplay post + if not self._is_roleplay_post(submission): + return False + + try: + # Evaluate the post using GPT-5-nano + evaluation = self._evaluate_post(submission) + + if evaluation["should_be_art"]: + # Change flair to Art and notify user + self._change_flair_to_art(submission) + self._notify_art_flair_change(submission, evaluation) + return False # Don't remove, just change flair + + elif evaluation["is_low_effort"]: + # Remove low effort post and notify user + self._notify_low_effort_removal(submission, evaluation) + return True # Remove the post + + # Post is good quality roleplay - allow it + return False + + except Exception as e: + self.logger.error(f"Error evaluating roleplay post {submission.id}: {e}") + return False # Don't remove on error + + def get_removal_message(self, submission: praw.models.Submission) -> str: + """Get removal message for low effort posts.""" + return "" # We send mod mail instead of commenting + + def _evaluate_post(self, submission: praw.models.Submission) -> dict: + """Use GPT-5-nano to evaluate the post with retry logic.""" + max_retries = 3 + for attempt in range(max_retries): + try: + self.logger.info(f"Attempt {attempt + 1}/{max_retries} for submission {submission.id}") + return self._evaluate_post_attempt(submission) + except Exception as e: + self.logger.warning(f"Attempt {attempt + 1} failed for {submission.id}: {e}") + if attempt == max_retries - 1: + # Last attempt failed, return default evaluation + self.logger.error(f"All {max_retries} attempts failed for {submission.id}, returning default evaluation") + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": f"Failed after {max_retries} attempts: {e}" + } + else: + self.logger.info(f"Retrying in 1 second...") + import time + time.sleep(1) + + # This should never be reached, but just in case + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": "Unexpected error in retry logic" + } + + def _evaluate_post_attempt(self, submission: praw.models.Submission) -> dict: + """Single attempt to evaluate the post using GPT-5-nano.""" + try: + # Extract post information + title = submission.title or "" + content = submission.selftext or "" + has_media = self._has_media(submission) + media_type = self._get_media_type(submission) + word_count = len(content.split()) if content else 0 + + # Prepare the prompt + prompt = self.evaluation_prompt.format( + title=title, + content=content, + has_media=has_media, + media_type=media_type, + word_count=word_count + ) + + # Call GPT-5-nano + response = self.openai_client.chat.completions.create( + model="gpt-5-nano", + messages=[ + {"role": "system", "content": "You are an expert roleplay moderator. Always respond with valid JSON only. Do not include any text before or after the JSON object."}, + {"role": "user", "content": prompt} + ], + max_completion_tokens=2000 + ) + + # Parse the response + response_text = response.choices[0].message.content.strip() + + # Clean up the response text to handle common JSON formatting issues + response_text = response_text.replace('\n', '').replace('\r', '') + + # Try to extract JSON from the response if it's embedded in other text + if '```json' in response_text: + # Extract JSON from code blocks + start = response_text.find('```json') + 7 + end = response_text.find('```', start) + if end != -1: + response_text = response_text[start:end].strip() + elif '```' in response_text: + # Extract JSON from generic code blocks + start = response_text.find('```') + 3 + end = response_text.find('```', start) + if end != -1: + response_text = response_text[start:end].strip() + + # Find JSON object boundaries + if '{' in response_text and '}' in response_text: + start = response_text.find('{') + end = response_text.rfind('}') + 1 + response_text = response_text[start:end] + + evaluation = json.loads(response_text) + + self.logger.info(f"GPT-5-nano evaluation for {submission.id}: {evaluation}") + return evaluation + + except json.JSONDecodeError as e: + self.logger.error(f"JSON parsing error for {submission.id}: {e}") + self.logger.error(f"Raw response: {response_text}") + # Return default evaluation on JSON error + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": f"JSON parsing error: {e}" + } + except Exception as e: + self.logger.error(f"Error calling GPT-5-nano for {submission.id}: {e}") + # Return default evaluation on error + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": f"Error occurred during evaluation: {e}" + } + + def _change_flair_to_art(self, submission: praw.models.Submission) -> None: + """Change the post flair to Art.""" + try: + submission.mod.flair( + text="Art", + flair_template_id=self.art_flair_template_id + ) + self.logger.info(f"Changed flair to Art for post {submission.id}") + except Exception as e: + self.logger.error(f"Error changing flair for {submission.id}: {e}") + + def _notify_art_flair_change(self, submission: praw.models.Submission, evaluation: dict) -> None: + """Send mod mail about flair change.""" + try: + username = submission.author.name + subject = "Your post flair has been changed to Art" + + message = f"""Hello u/{username}, + +Your roleplay post has been automatically re-flaired as "Art" because it appears to be primarily showcasing artwork or visual content rather than roleplay. + +Reasoning: {evaluation['reasoning']} + +Post link: https://reddit.com{submission.permalink} + +If you believe this was done in error, please contact the moderators via Mod Mail. + +Thank you for understanding!""" + + submission.author.message(subject, message) + self.logger.info(f"Sent art flair change notification to {username}") + + except Exception as e: + self.logger.error(f"Error sending art flair notification for {submission.id}: {e}") + + def _notify_low_effort_removal(self, submission: praw.models.Submission, evaluation: dict) -> None: + """Send mod mail about low effort removal.""" + try: + username = submission.author.name + subject = "Your roleplay post has been removed for low effort" + + message = f"""Hello u/{username}, + +Your roleplay post has been removed because it was determined to be low-effort content. + +Reasoning: {evaluation['reasoning']} + +Post link: https://reddit.com{submission.permalink} + +To improve your roleplay posts, consider: +- Adding more detailed descriptions +- Creating engaging scenarios +- Including meaningful character interactions +- Ensuring your content adds value to the community + +If you believe this was done in error, please contact the moderators via Mod Mail. + +Thank you for understanding!""" + + submission.author.message(subject, message) + self.logger.info(f"Sent low effort removal notification to {username}") + + except Exception as e: + self.logger.error(f"Error sending low effort notification for {submission.id}: {e}") + + def _is_roleplay_post(self, submission: praw.models.Submission) -> bool: + """Check if a submission has the roleplay flair.""" + try: + # Check link flair template ID first (most reliable) + if hasattr(submission, 'link_flair_template_id') and submission.link_flair_template_id: + return submission.link_flair_template_id == self.roleplay_flair_template_id + + # Fallback to flair text + if hasattr(submission, 'link_flair_text') and submission.link_flair_text: + return submission.link_flair_text.lower() == "roleplay" + + return False + + except Exception as e: + self.logger.error(f"Error checking flair for submission {submission.id}: {e}") + return False + + def _has_media(self, submission: praw.models.Submission) -> bool: + """Check if a submission has media attached.""" + try: + # Check for image/video posts + if submission.is_video or submission.is_self: + # For self posts, check if they contain media links + if submission.is_self and submission.selftext: + # Look for common media URLs in the text + media_indicators = [ + 'imgur.com', 'i.imgur.com', 'redd.it', 'i.redd.it', + 'youtube.com', 'youtu.be', 'vimeo.com', 'gfycat.com', + 'streamable.com', 'twitter.com', 'x.com', 'tiktok.com', + 'instagram.com', 'facebook.com', 'discord.com', 'discordapp.com' + ] + + text_lower = submission.selftext.lower() + return any(indicator in text_lower for indicator in media_indicators) + return False + + # Check for link posts with media + if hasattr(submission, 'url') and submission.url: + # Check if URL points to media + url_lower = submission.url.lower() + media_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm', '.webp'] + media_domains = [ + 'imgur.com', 'i.imgur.com', 'redd.it', 'i.redd.it', + 'youtube.com', 'youtu.be', 'vimeo.com', 'gfycat.com', + 'streamable.com', 'twitter.com', 'x.com', 'tiktok.com', + 'instagram.com', 'facebook.com' + ] + + # Check file extensions + if any(url_lower.endswith(ext) for ext in media_extensions): + return True + + # Check media domains + if any(domain in url_lower for domain in media_domains): + return True + + # Check for gallery posts + if hasattr(submission, 'is_gallery') and submission.is_gallery: + return True + + return False + + except Exception as e: + self.logger.error(f"Error checking media for submission {submission.id}: {e}") + return False + + def _get_media_type(self, submission: praw.models.Submission) -> str: + """Get the type of media in the submission.""" + try: + if submission.is_video: + return "video" + elif hasattr(submission, 'is_gallery') and submission.is_gallery: + return "gallery" + elif hasattr(submission, 'url') and submission.url: + url_lower = submission.url.lower() + if any(ext in url_lower for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']): + return "image" + elif any(ext in url_lower for ext in ['.mp4', '.webm', '.gifv']): + return "video" + elif any(domain in url_lower for domain in ['youtube.com', 'youtu.be', 'vimeo.com']): + return "video" + else: + return "link" + else: + return "text" + + except Exception as e: + self.logger.error(f"Error getting media type for submission {submission.id}: {e}") + return "unknown" diff --git a/test_moderator.py b/test_moderator.py new file mode 100755 index 0000000..50b6715 --- /dev/null +++ b/test_moderator.py @@ -0,0 +1,693 @@ +#!/usr/bin/env python3 +"""Test CLI for the intelligent roleplay moderator.""" + +import argparse +import json +import os +import random +import sys +from pathlib import Path +from typing import Dict, Any, List +from dataclasses import dataclass +from openai import OpenAI +from dotenv import load_dotenv +import praw + + +@dataclass +class MockSubmission: + """Mock Reddit submission for testing.""" + id: str + title: str + selftext: str + url: str + is_video: bool + is_self: bool + is_gallery: bool + link_flair_text: str + author: 'MockAuthor' + + @property + def permalink(self) -> str: + return f"/r/test/comments/{self.id}/" + + +@dataclass +class MockAuthor: + """Mock Reddit author for testing.""" + name: str + + def message(self, subject: str, message: str) -> None: + print(f"\nšŸ“§ MOD MAIL TO u/{self.name}") + print(f"Subject: {subject}") + print(f"Message:\n{message}") + + +class RedditDownloader: + """Downloads posts from Reddit for testing.""" + + def __init__(self, config): + """Initialize Reddit client.""" + self.reddit = praw.Reddit( + client_id=config.client_id, + client_secret=config.client_secret, + username=config.username, + password=config.password, + user_agent=config.user_agent + ) + self.subreddit_name = config.subreddit_name + + def download_roleplay_posts(self, limit: int = 50, output_dir: Path = None) -> List[Path]: + """Download roleplay posts from the subreddit.""" + if output_dir is None: + output_dir = Path("real_test_posts") + + output_dir.mkdir(exist_ok=True) + + subreddit = self.reddit.subreddit(self.subreddit_name) + downloaded_files = [] + + print(f"Downloading roleplay posts from r/{self.subreddit_name}...") + + count = 0 + for submission in subreddit.new(limit=limit * 3): # Get more to filter + if count >= limit: + break + + # Skip removed posts + if submission.removed_by_category or submission.selftext == "[removed]" or submission.selftext == "[deleted]": + continue + + # Check if it's a roleplay post by flair template ID + is_roleplay = False + if hasattr(submission, 'link_flair_template_id'): + is_roleplay = submission.link_flair_template_id == "311f0024-8302-11f0-9b41-46c005ad843c" + elif hasattr(submission, 'link_flair_text'): + is_roleplay = submission.link_flair_text and submission.link_flair_text.lower() == "roleplay" + + if is_roleplay: + # Create filename + safe_title = "".join(c for c in submission.title if c.isalnum() or c in (' ', '-', '_')).rstrip() + safe_title = safe_title[:50] # Limit length + filename = f"{submission.id}_{safe_title}.txt" + filepath = output_dir / filename + + # Write post content + content = f"Title: {submission.title}\n\n" + if submission.selftext: + content += submission.selftext + else: + content += f"[Link Post: {submission.url}]" + + # Add flair info for debugging + if hasattr(submission, 'link_flair_text') and submission.link_flair_text: + content += f"\n\nFlair: {submission.link_flair_text}" + if hasattr(submission, 'link_flair_template_id') and submission.link_flair_template_id: + content += f"\nFlair Template ID: {submission.link_flair_template_id}" + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + downloaded_files.append(filepath) + count += 1 + print(f"Downloaded: {filename}") + + print(f"Downloaded {len(downloaded_files)} roleplay posts") + return downloaded_files + + def download_random_posts(self, limit: int = 20, output_dir: Path = None) -> List[Path]: + """Download random posts from the subreddit.""" + if output_dir is None: + output_dir = Path("real_test_posts") + + output_dir.mkdir(exist_ok=True) + + subreddit = self.reddit.subreddit(self.subreddit_name) + downloaded_files = [] + + print(f"Downloading random posts from r/{self.subreddit_name}...") + + count = 0 + for submission in subreddit.hot(limit=limit * 2): # Get more to filter + if count >= limit: + break + + # Skip stickied posts + if submission.stickied: + continue + + # Skip removed posts + if submission.removed_by_category or submission.selftext == "[removed]" or submission.selftext == "[deleted]": + continue + + # Create filename + safe_title = "".join(c for c in submission.title if c.isalnum() or c in (' ', '-', '_')).rstrip() + safe_title = safe_title[:50] # Limit length + filename = f"{submission.id}_{safe_title}.txt" + filepath = output_dir / filename + + # Write post content + content = f"Title: {submission.title}\n\n" + if submission.selftext: + content += submission.selftext + else: + content += f"[Link Post: {submission.url}]" + + # Add flair info + if hasattr(submission, 'link_flair_text') and submission.link_flair_text: + content += f"\n\nFlair: {submission.link_flair_text}" + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + downloaded_files.append(filepath) + count += 1 + print(f"Downloaded: {filename}") + + print(f"Downloaded {len(downloaded_files)} random posts") + return downloaded_files + + +class TestIntelligentModerator: + """Test version of the intelligent roleplay moderator.""" + + def __init__(self, openai_api_key: str): + """Initialize the test moderator.""" + self.openai_client = OpenAI(api_key=openai_api_key) + self.roleplay_flair = "Roleplay" + self.art_flair_template_id = "dd4589ba-7c43-11f0-8ef0-22eefb7f5b84" + + # Evaluation prompt + self.evaluation_prompt = """ +You are an expert moderator for a roleplay subreddit. Your job is to evaluate roleplay posts and determine: + +1. Whether this post would be better flaired as "Art" instead of "Roleplay" +2. Whether this is low-effort content that should be removed + +For each post, respond with a JSON object containing: +{{ + "should_be_art": boolean, + "is_low_effort": boolean, + "confidence": float (0.0 to 1.0), + "reasoning": "Brief explanation of your decision" +}} + +Guidelines: +- A post should be flaired as "Art" if it's primarily showcasing artwork, images, or visual content with minimal roleplay text +- A post is "low effort" if it lacks substance, creativity, or meaningful roleplay content +- Consider factors like: word count, creativity, effort, engagement potential, originality +- Be strict but fair - err on the side of allowing content unless it's clearly low quality +- High confidence (0.8+) for clear cases, lower confidence for borderline cases + +Post to evaluate: +Title: {title} +Content: {content} +Has Media: {has_media} +Media Type: {media_type} +Word Count: {word_count} +""" + + def test_file(self, file_path: Path, author_name: str = "testuser") -> Dict[str, Any]: + """Test a single file against the moderator.""" + try: + # Read file content + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read().strip() + + # Create mock submission + submission = MockSubmission( + id=f"test_{file_path.stem}", + title=file_path.stem.replace('_', ' ').title(), + selftext=content, + url="", + is_video=False, + is_self=True, + is_gallery=False, + link_flair_text="Roleplay", + author=MockAuthor(author_name) + ) + + # Evaluate the post + evaluation = self._evaluate_post(submission) + + # Determine actions (same logic as production moderator) + actions = [] + if evaluation["should_be_art"]: + # Change flair to Art and notify user (priority over removal) + actions.append("CHANGE_FLAIR_TO_ART") + elif evaluation["is_low_effort"]: + # Remove low effort post and notify user + actions.append("REMOVE_POST") + else: + # Post is good quality roleplay - allow it + actions.append("ALLOW_POST") + + return { + "file": str(file_path), + "title": submission.title, + "content_preview": content[:200] + "..." if len(content) > 200 else content, + "word_count": len(content.split()), + "has_media": self._has_media(submission), + "media_type": self._get_media_type(submission), + "evaluation": evaluation, + "actions": actions, + "success": True + } + + except Exception as e: + return { + "file": str(file_path), + "error": str(e), + "success": False + } + + def test_directory(self, directory_path: Path, author_name: str = "testuser", random_count: int = None) -> List[Dict[str, Any]]: + """Test all text files in a directory.""" + results = [] + + # Find all text files + text_files = list(directory_path.glob("*.txt")) + + if not text_files: + print(f"No .txt files found in {directory_path}") + return results + + # Random selection if requested + if random_count and random_count < len(text_files): + text_files = random.sample(text_files, random_count) + print(f"Randomly selected {random_count} files from {len(list(directory_path.glob('*.txt')))} available files") + + print(f"Testing {len(text_files)} text files...") + + for file_path in text_files: + print(f"Testing {file_path.name}...") + result = self.test_file(file_path, author_name) + results.append(result) + + return results + + def _evaluate_post(self, submission: MockSubmission) -> Dict[str, Any]: + """Use GPT-5-nano to evaluate the post with retry logic.""" + response_text = "" # Initialize to avoid scope issues + + max_retries = 3 + for attempt in range(max_retries): + try: + return self._evaluate_post_attempt(submission) + except Exception as e: + if attempt == max_retries - 1: + # Last attempt failed, return default evaluation + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": f"Failed after {max_retries} attempts: {e}" + } + else: + import time + time.sleep(1) + + # This should never be reached, but just in case + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": "Unexpected error in retry logic" + } + + def _evaluate_post_attempt(self, submission: MockSubmission) -> Dict[str, Any]: + """Single attempt to evaluate the post using GPT-5-nano.""" + response_text = "" # Initialize to avoid scope issues + + try: + # Extract post information + title = submission.title or "" + content = submission.selftext or "" + has_media = self._has_media(submission) + media_type = self._get_media_type(submission) + word_count = len(content.split()) if content else 0 + + # Prepare the prompt + prompt = self.evaluation_prompt.format( + title=title, + content=content, + has_media=has_media, + media_type=media_type, + word_count=word_count + ) + + # Call GPT-5-nano + response = self.openai_client.chat.completions.create( + model="gpt-5-nano", + messages=[ + {"role": "system", "content": "You are an expert roleplay moderator. Always respond with valid JSON only. Do not include any text before or after the JSON object."}, + {"role": "user", "content": prompt} + ], + max_completion_tokens=2000 + ) + + # Parse the response + response_text = response.choices[0].message.content.strip() + + # Clean up the response text to handle common JSON formatting issues + response_text = response_text.replace('\n', '').replace('\r', '') + + # Try to extract JSON from the response if it's embedded in other text + if '```json' in response_text: + # Extract JSON from code blocks + start = response_text.find('```json') + 7 + end = response_text.find('```', start) + if end != -1: + response_text = response_text[start:end].strip() + elif '```' in response_text: + # Extract JSON from generic code blocks + start = response_text.find('```') + 3 + end = response_text.find('```', start) + if end != -1: + response_text = response_text[start:end].strip() + + # Find JSON object boundaries + if '{' in response_text and '}' in response_text: + start = response_text.find('{') + end = response_text.rfind('}') + 1 + response_text = response_text[start:end] + + try: + evaluation = json.loads(response_text) + except json.JSONDecodeError as json_err: + # Try to fix common JSON issues + try: + # Fix unterminated strings by adding missing quotes + if '"reasoning": "' in response_text and not response_text.rstrip().endswith('"'): + response_text = response_text.rstrip() + '"' + evaluation = json.loads(response_text) + except: + raise json_err + + return evaluation + + except json.JSONDecodeError as e: + # Return default evaluation on JSON error + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": f"JSON parsing error: {e}" + } + except Exception as e: + # Return default evaluation on error + return { + "should_be_art": False, + "is_low_effort": False, + "confidence": 0.0, + "reasoning": f"Error occurred during evaluation: {e}" + } + + def _has_media(self, submission: MockSubmission) -> bool: + """Check if a submission has media attached.""" + try: + # Check for image/video posts + if submission.is_video or submission.is_self: + # For self posts, check if they contain media links + if submission.is_self and submission.selftext: + # Look for common media URLs in the text + media_indicators = [ + 'imgur.com', 'i.imgur.com', 'redd.it', 'i.redd.it', + 'youtube.com', 'youtu.be', 'vimeo.com', 'gfycat.com', + 'streamable.com', 'twitter.com', 'x.com', 'tiktok.com', + 'instagram.com', 'facebook.com', 'discord.com', 'discordapp.com' + ] + + text_lower = submission.selftext.lower() + return any(indicator in text_lower for indicator in media_indicators) + return False + + # Check for link posts with media + if hasattr(submission, 'url') and submission.url: + # Check if URL points to media + url_lower = submission.url.lower() + media_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.gifv', '.mp4', '.webm', '.webp'] + media_domains = [ + 'imgur.com', 'i.imgur.com', 'redd.it', 'i.redd.it', + 'youtube.com', 'youtu.be', 'vimeo.com', 'gfycat.com', + 'streamable.com', 'twitter.com', 'x.com', 'tiktok.com', + 'instagram.com', 'facebook.com' + ] + + # Check file extensions + if any(url_lower.endswith(ext) for ext in media_extensions): + return True + + # Check media domains + if any(domain in url_lower for domain in media_domains): + return True + + # Check for gallery posts + if hasattr(submission, 'is_gallery') and submission.is_gallery: + return True + + return False + + except Exception as e: + print(f"Error checking media: {e}") + return False + + def _get_media_type(self, submission: MockSubmission) -> str: + """Get the type of media in the submission.""" + try: + if submission.is_video: + return "video" + elif hasattr(submission, 'is_gallery') and submission.is_gallery: + return "gallery" + elif hasattr(submission, 'url') and submission.url: + url_lower = submission.url.lower() + if any(ext in url_lower for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp']): + return "image" + elif any(ext in url_lower for ext in ['.mp4', '.webm', '.gifv']): + return "video" + elif any(domain in url_lower for domain in ['youtube.com', 'youtu.be', 'vimeo.com']): + return "video" + else: + return "link" + else: + return "text" + + except Exception as e: + print(f"Error getting media type: {e}") + return "unknown" + + +def print_results(results: List[Dict[str, Any]]) -> None: + """Print test results in a formatted way.""" + print("\n" + "="*80) + print("TEST RESULTS") + print("="*80) + + for i, result in enumerate(results, 1): + print(f"\n{i}. {result['file']}") + print("-" * 60) + + if not result["success"]: + print(f"āŒ ERROR: {result['error']}") + continue + + print(f"Title: {result['title']}") + print(f"Word Count: {result['word_count']}") + print(f"Has Media: {result['has_media']}") + print(f"Media Type: {result['media_type']}") + print(f"Content Preview: {result['content_preview']}") + + evaluation = result["evaluation"] + print(f"\nšŸ¤– AI Evaluation:") + print(f" Should be Art: {evaluation['should_be_art']}") + print(f" Is Low Effort: {evaluation['is_low_effort']}") + print(f" Confidence: {evaluation['confidence']:.2f}") + print(f" Reasoning: {evaluation['reasoning']}") + + actions = result["actions"] + print(f"\nšŸ“‹ Actions:") + for action in actions: + if action == "CHANGE_FLAIR_TO_ART": + print(" šŸŽØ Change flair to Art") + elif action == "REMOVE_POST": + print(" šŸ—‘ļø Remove post (low effort)") + elif action == "ALLOW_POST": + print(" āœ… Allow post") + + # Show what mod mail would be sent + if "CHANGE_FLAIR_TO_ART" in actions: + print(f"\nšŸ“§ Mod Mail (Art Flair Change):") + print(f" Subject: Your post flair has been changed to Art") + print(f" Message: Your roleplay post has been automatically re-flaired as 'Art' because it appears to be primarily showcasing artwork or visual content rather than roleplay.") + print(f" Reasoning: {evaluation['reasoning']}") + + if "REMOVE_POST" in actions: + print(f"\nšŸ“§ Mod Mail (Low Effort Removal):") + print(f" Subject: Your roleplay post has been removed for low effort") + print(f" Message: Your roleplay post has been removed because it was determined to be low effort content.") + print(f" Reasoning: {evaluation['reasoning']}") + + +def main(): + """Main CLI function.""" + # Load environment variables from .env file + load_dotenv() + + parser = argparse.ArgumentParser( + description="Test the intelligent roleplay moderator against text files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Test a single file + python test_moderator.py --file sample_post.txt + + # Test all files in a directory + python test_moderator.py --directory test_posts/ + + # Test 5 random files from a directory + python test_moderator.py --directory real_test_posts/ --random 5 + + # Download 20 roleplay posts from Reddit + python test_moderator.py --download-roleplay 20 + + # Download 10 random posts from Reddit + python test_moderator.py --download-random 10 + + # Download posts then test them + python test_moderator.py --download-roleplay 15 + python test_moderator.py --directory real_test_posts/ --random 5 + """ + ) + + parser.add_argument( + "--file", "-f", + type=Path, + help="Test a single text file" + ) + + parser.add_argument( + "--directory", "-d", + type=Path, + help="Test all .txt files in a directory" + ) + + parser.add_argument( + "--author", "-a", + default="testuser", + help="Author name for mock submissions (default: testuser)" + ) + + parser.add_argument( + "--api-key", "-k", + help="OpenAI API key (or set OPENAI_API_KEY environment variable)" + ) + + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Show detailed output" + ) + + parser.add_argument( + "--random", "-r", + type=int, + help="Randomly select N files from directory for testing" + ) + + parser.add_argument( + "--download-roleplay", + type=int, + metavar="COUNT", + help="Download N roleplay posts from Reddit for testing" + ) + + parser.add_argument( + "--download-random", + type=int, + metavar="COUNT", + help="Download N random posts from Reddit for testing" + ) + + args = parser.parse_args() + + # Get API key (from .env file, environment variable, or command line) + api_key = args.api_key or os.getenv("OPENAI_API_KEY") + if not api_key: + print("āŒ Error: OpenAI API key required") + print("Set OPENAI_API_KEY in .env file, environment variable, or use --api-key") + sys.exit(1) + + # Handle download commands first + if args.download_roleplay or args.download_random: + # Load Reddit config + try: + from src.umabot.config import Config + config = Config.from_env() + config.validate() + except Exception as e: + print(f"āŒ Error loading Reddit config: {e}") + print("Make sure your .env file has all required Reddit credentials") + sys.exit(1) + + downloader = RedditDownloader(config) + + if args.download_roleplay: + downloader.download_roleplay_posts(args.download_roleplay) + + if args.download_random: + downloader.download_random_posts(args.download_random) + + print("āœ… Download complete!") + return + + # Validate inputs for testing + if not args.file and not args.directory: + print("āŒ Error: Must specify either --file or --directory") + parser.print_help() + sys.exit(1) + + if args.file and not args.file.exists(): + print(f"āŒ Error: File {args.file} does not exist") + sys.exit(1) + + if args.directory and not args.directory.exists(): + print(f"āŒ Error: Directory {args.directory} does not exist") + sys.exit(1) + + # Initialize moderator + try: + moderator = TestIntelligentModerator(api_key) + except Exception as e: + print(f"āŒ Error initializing moderator: {e}") + sys.exit(1) + + # Run tests + results = [] + + if args.file: + print(f"Testing single file: {args.file}") + result = moderator.test_file(args.file, args.author) + results.append(result) + + if args.directory: + print(f"Testing directory: {args.directory}") + dir_results = moderator.test_directory(args.directory, args.author, args.random) + results.extend(dir_results) + + # Print results + print_results(results) + + # Summary + total = len(results) + successful = sum(1 for r in results if r["success"]) + errors = total - successful + + print(f"\n" + "="*80) + print(f"SUMMARY: {successful}/{total} tests completed successfully") + if errors > 0: + print(f"Errors: {errors}") + print("="*80) + + +if __name__ == "__main__": + main() -- cgit v1.2.3