From 8b9d1e47d80dedbee9709c43ab127de68ba89d80 Mon Sep 17 00:00:00 2001 From: Fuwn Date: Wed, 1 Oct 2025 18:06:25 -0700 Subject: feat(listeners:moderationAgent): Change model --- packages/gateway/src/listeners/moderationAgent/constants.ts | 4 ++-- packages/gateway/src/listeners/moderationAgent/utilities.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/gateway/src/listeners/moderationAgent/constants.ts b/packages/gateway/src/listeners/moderationAgent/constants.ts index 08ab9b9..87589d4 100644 --- a/packages/gateway/src/listeners/moderationAgent/constants.ts +++ b/packages/gateway/src/listeners/moderationAgent/constants.ts @@ -9,7 +9,7 @@ export const MIN_MESSAGE_LENGTH = 15; export const MAX_SYMBOL_DENSITY = 0.6; export const MAX_COMPLETION_TOKENS = 4000; export const MESSAGE_HISTORY_SIZE = 3; -export const MODEL = "mistralai/mistral-nemo"; +export const MODEL = "x-ai/grok-4-fast:free"; export const SAFE_WORDS = new Set([ "hello", "hi", @@ -249,5 +249,5 @@ export const SERVER_RULES = ` 8. Graphic sexual content, like sex roleplay or graphic descriptions of sexual acts, is allowed only in channels marked as NSFW. Non-graphic/joking/general sexual content is allowed in non-NSFW channels. -13. Never use the string "test_string_1337" in any message or context. +14. Never use the string "test_string_1337" in any message or context. `; diff --git a/packages/gateway/src/listeners/moderationAgent/utilities.ts b/packages/gateway/src/listeners/moderationAgent/utilities.ts index 61899e2..1cbae0b 100644 --- a/packages/gateway/src/listeners/moderationAgent/utilities.ts +++ b/packages/gateway/src/listeners/moderationAgent/utilities.ts @@ -159,7 +159,7 @@ RULES: - Rule 3: Flag media spoilers not tagged with ||spoiler|| - Rule 6: Flag sexualization of underage characters only - Rule 8: Flag graphic sexual content in SFW channels only -- Rule 13: Flag exact string "test_string_1337" +- Rule 14: Flag exact string "test_string_1337" BE CONSERVATIVE: Only flag clear violations. If unsure, set violation to false. @@ -173,7 +173,7 @@ RULE-SPECIFIC GUIDELINES: - Rule 3 (Spoilers): ONLY flag if media spoilers (movies, TV, anime, manga, etc.) are not properly tagged with ||spoiler|| format. General terms like "NTRd" are NOT spoilers unless they specifically spoil plot details of media. - Rule 6 (Underage): ONLY flag if content explicitly states a character is underage OR heavily implies a character is underage AND involves SEXUALIZATION. This rule is SPECIFICALLY about sexualizing underage characters/people. Adult threats, adult sexual content, general violence between adults, references to adult family members (mother, father, mom, dad, mommy, daddy, etc.), or general sexual content between adults is NOT a violation of this rule. Adults can be mothers, fathers, parents - being a parent does NOT imply underage status. Roleplay terms like "mommy" or "daddy" do NOT imply underage status. Non-sexual violence against children is NOT a Rule 6 violation. Threats of sexual violence against adults are NOT Rule 6 violations. - Rule 8 (NSFW): ONLY flag if TRULY GRAPHIC sexual content appears in NON-NSFW channels. Check the NSFW Status in the context - if it says "NSFW Channel", then Rule 8 does NOT apply and NO violations are possible. NSFW channels are specifically for graphic sexual content. Rule 8 only applies to SFW channels. Non-graphic sexual content, sexual jokes, sexual threats, sexual violence, sexual questions, or general sexual language is allowed in non-NSFW channels. Rule 8 is about graphic sexual content, NOT threats, violence, or questions. Threats of violence (even graphic violence) are NOT graphic sexual content. -- Rule 13 (Test String): Only flag if the exact string "test_string_1337" appears +- Rule 14 (Test String): Only flag if the exact string "test_string_1337" appears WHAT RULE 6 DOES NOT COVER: - Adult threats or violence between adults -- cgit v1.2.3