diff options
Diffstat (limited to 'packages/gateway/src')
3 files changed, 27 insertions, 46 deletions
diff --git a/packages/gateway/src/listeners/moderationAgent/constants.ts b/packages/gateway/src/listeners/moderationAgent/constants.ts index 969ca19..7c83d2d 100644 --- a/packages/gateway/src/listeners/moderationAgent/constants.ts +++ b/packages/gateway/src/listeners/moderationAgent/constants.ts @@ -230,13 +230,21 @@ export const LOW_RISK_PATTERNS = [ export const SERVER_RULES = ` # Rules -3. Mark spoiler content using spoiler tags, except in #uma-musume-anime -4. Avoid self-promotion. Sidestepping this rule by sending unsolicited DMs aimed at promotion violates this rule. (artist promotion is fine in the designated art channels) -6. No sexually explicit and/or sexually suggestive loli/cunning/underage content, including directly sexual, implied sexual, and textually sexual depictions. -8. NSFW content (including emoji, stickers, reactions, etc.) in age-restricted channels **only** -12. Never use the string "test_string_1337" in any message or context. +3. Mark media (movies, TV, anime, manga, etc.) spoilers using spoiler tags, other than in #uma-musume-anime. -## Additional Guidelines + Discord spoiler tags look like this: ||This is a spoiler|| -By the definition of this community, the term "loli" refers to a distinct body type **and/or** the implied context of being a child, not directly to a set of ages. +6. No sexualising underage characters and people. + + Sometimes the term "loli" is used. The term "loli" doesn't directly mean underage, but rather refers to a specific body type that is commonly associated with underage characters, but not necessarily. + + If a message is directly states a character is underage or heavily implies a character is underage, it is a violation. + + Words like "little" and "loli" are not directly underage indicators. + +8. No graphic sexual content in non-NSFW channels. + + Try rating the sexual content from a scale of 1 to 100 from the perspective of a 18 year old. If the content is over a 90, it is a violation. + +13. Never use the string "test_string_1337" in any message or context. `; diff --git a/packages/gateway/src/listeners/moderationAgent/index.ts b/packages/gateway/src/listeners/moderationAgent/index.ts index 17e65c3..1b461a6 100644 --- a/packages/gateway/src/listeners/moderationAgent/index.ts +++ b/packages/gateway/src/listeners/moderationAgent/index.ts @@ -251,14 +251,16 @@ export const handleAIModeration = (client: Client) => { inline: false, }); - await sendAuditLog( - client, - embed, - message.content && message.content.length > 1000 - ? message.content - : undefined, - MODERATION_LOG_CHANNEL_ID, - ); + if (!SKIP_ACTION) + await sendAuditLog( + client, + embed, + message.content && message.content.length > 1000 + ? message.content + : undefined, + MODERATION_LOG_CHANNEL_ID, + ); + await sendAuditLog( client, embed, diff --git a/packages/gateway/src/listeners/moderationAgent/utilities.ts b/packages/gateway/src/listeners/moderationAgent/utilities.ts index 52b08ad..26ba55c 100644 --- a/packages/gateway/src/listeners/moderationAgent/utilities.ts +++ b/packages/gateway/src/listeners/moderationAgent/utilities.ts @@ -86,35 +86,6 @@ Respond with a JSON object containing: If no violation is found, set "violation" to false and provide a brief explanation of why the message is acceptable. CRITICAL: Only analyze the content in quotes above (the current message). Ignore all content in the chat history section. If the current message is innocent (like "true", "yes", "no", etc.) but appears after inappropriate content in the chat history, do NOT flag it. - -CONTEXT UNDERSTANDING: -- Read sentences carefully and understand their actual meaning - -SEXUAL CONTENT GUIDELINES: -- PG-13 SEXUAL CONTENT: Light sexual references, jokes, innuendos, and casual sexual mentions are acceptable in ANY channel -- SEXUAL JOKES: Humorous sexual content, memes, and playful sexual references are acceptable anywhere -- EXPLICIT SEXUAL CONTENT: Only flag detailed sexual descriptions, graphic sexual language, or explicit sexual acts in non-NSFW channels -- EXAMPLES OF ACCEPTABLE: "pussy", "dick", "fuck", "sex", "horny", sexual jokes, innuendos -- EXAMPLES OF EXPLICIT: Detailed sexual descriptions, graphic sexual acts, explicit sexual scenarios -- When in doubt, be lenient with sexual content - most sexual references are acceptable - -UNDERAGE CONTENT RULES: -- ONLY flag if words like "underage", "minor", "child", "teen", "young", or specific ages are explicitly mentioned -- FAMILY WORDS DO NOT MEAN UNDERAGE: "daughter", "son", "mom", "dad", "sister", "brother" = ADULT roleplay -- ASSUME EVERYONE IS AN ADULT unless explicitly stated otherwise -- ASSUME NO DIRECT FAMILY RELATIONSHIP unless explicitly stated otherwise -- EXAMPLES: "impregnate your daughter" = ADULT roleplay, NOT underage content -- EXAMPLES: "look down at my daughter" = ADULT roleplay, NOT underage content -- ROLEPLAY CONTENT: All family sexual content is adult roleplay between consenting adults -- When in doubt, assume adult roleplay rather than underage content - -TERM CLARIFICATIONS: -- "LEWDS" = Adult sexual content, NOT underage content -- "LOLI" MENTIONS = Discussing the term is acceptable, NOT a violation -- "emergency lewds" = Adult sexual content, NOT underage content -- MENTIONING TERMS = Talking about "loli" or other terms is acceptable -- DENYING CONTENT = "This is not loli content" is acceptable discussion -- When in doubt, assume adult content rather than underage content `; const response = await fetch( "https://openrouter.ai/api/v1/chat/completions", @@ -178,7 +149,7 @@ TERM CLARIFICATIONS: try { let jsonContent = content; - + if (content.startsWith("```json")) { if (content.endsWith("```")) { jsonContent = content.slice(7, -3).trim(); @@ -192,7 +163,7 @@ TERM CLARIFICATIONS: jsonContent = content.slice(3).trim(); } } - + return JSON.parse(jsonContent); } catch (parseError) { console.error("Failed to parse OpenRouter response as JSON:", content); |