summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFuwn <[email protected]>2025-10-01 18:06:25 -0700
committerFuwn <[email protected]>2025-10-01 18:06:25 -0700
commit8b9d1e47d80dedbee9709c43ab127de68ba89d80 (patch)
tree55dee88ff6736b2892ff313b324c69f80d324f7c
parentfeat(listeners:moderationAgent): Skip actions (diff)
downloadumabotdiscord-8b9d1e47d80dedbee9709c43ab127de68ba89d80.tar.xz
umabotdiscord-8b9d1e47d80dedbee9709c43ab127de68ba89d80.zip
feat(listeners:moderationAgent): Change model
-rw-r--r--packages/gateway/src/listeners/moderationAgent/constants.ts4
-rw-r--r--packages/gateway/src/listeners/moderationAgent/utilities.ts4
2 files changed, 4 insertions, 4 deletions
diff --git a/packages/gateway/src/listeners/moderationAgent/constants.ts b/packages/gateway/src/listeners/moderationAgent/constants.ts
index 08ab9b9..87589d4 100644
--- a/packages/gateway/src/listeners/moderationAgent/constants.ts
+++ b/packages/gateway/src/listeners/moderationAgent/constants.ts
@@ -9,7 +9,7 @@ export const MIN_MESSAGE_LENGTH = 15;
export const MAX_SYMBOL_DENSITY = 0.6;
export const MAX_COMPLETION_TOKENS = 4000;
export const MESSAGE_HISTORY_SIZE = 3;
-export const MODEL = "mistralai/mistral-nemo";
+export const MODEL = "x-ai/grok-4-fast:free";
export const SAFE_WORDS = new Set([
"hello",
"hi",
@@ -249,5 +249,5 @@ export const SERVER_RULES = `
8. Graphic sexual content, like sex roleplay or graphic descriptions of sexual acts, is allowed only in channels marked as NSFW. Non-graphic/joking/general sexual content is allowed in non-NSFW channels.
-13. Never use the string "test_string_1337" in any message or context.
+14. Never use the string "test_string_1337" in any message or context.
`;
diff --git a/packages/gateway/src/listeners/moderationAgent/utilities.ts b/packages/gateway/src/listeners/moderationAgent/utilities.ts
index 61899e2..1cbae0b 100644
--- a/packages/gateway/src/listeners/moderationAgent/utilities.ts
+++ b/packages/gateway/src/listeners/moderationAgent/utilities.ts
@@ -159,7 +159,7 @@ RULES:
- Rule 3: Flag media spoilers not tagged with ||spoiler||
- Rule 6: Flag sexualization of underage characters only
- Rule 8: Flag graphic sexual content in SFW channels only
-- Rule 13: Flag exact string "test_string_1337"
+- Rule 14: Flag exact string "test_string_1337"
BE CONSERVATIVE: Only flag clear violations. If unsure, set violation to false.
@@ -173,7 +173,7 @@ RULE-SPECIFIC GUIDELINES:
- Rule 3 (Spoilers): ONLY flag if media spoilers (movies, TV, anime, manga, etc.) are not properly tagged with ||spoiler|| format. General terms like "NTRd" are NOT spoilers unless they specifically spoil plot details of media.
- Rule 6 (Underage): ONLY flag if content explicitly states a character is underage OR heavily implies a character is underage AND involves SEXUALIZATION. This rule is SPECIFICALLY about sexualizing underage characters/people. Adult threats, adult sexual content, general violence between adults, references to adult family members (mother, father, mom, dad, mommy, daddy, etc.), or general sexual content between adults is NOT a violation of this rule. Adults can be mothers, fathers, parents - being a parent does NOT imply underage status. Roleplay terms like "mommy" or "daddy" do NOT imply underage status. Non-sexual violence against children is NOT a Rule 6 violation. Threats of sexual violence against adults are NOT Rule 6 violations.
- Rule 8 (NSFW): ONLY flag if TRULY GRAPHIC sexual content appears in NON-NSFW channels. Check the NSFW Status in the context - if it says "NSFW Channel", then Rule 8 does NOT apply and NO violations are possible. NSFW channels are specifically for graphic sexual content. Rule 8 only applies to SFW channels. Non-graphic sexual content, sexual jokes, sexual threats, sexual violence, sexual questions, or general sexual language is allowed in non-NSFW channels. Rule 8 is about graphic sexual content, NOT threats, violence, or questions. Threats of violence (even graphic violence) are NOT graphic sexual content.
-- Rule 13 (Test String): Only flag if the exact string "test_string_1337" appears
+- Rule 14 (Test String): Only flag if the exact string "test_string_1337" appears
WHAT RULE 6 DOES NOT COVER:
- Adult threats or violence between adults