2 files changed, 38 insertions, 239 deletions
diff --git a/apps/docs/integrations/pipecat.mdx b/apps/docs/integrations/pipecat.mdx
index c96f27e1..5ba772b2 100644
--- a/apps/docs/integrations/pipecat.mdx
+++ b/apps/docs/integrations/pipecat.mdx
@@ -28,7 +28,8 @@ You can obtain an API key from [console.supermemory.ai](https://console.supermem
 Supermemory integration is provided through the `SupermemoryPipecatService` class in Pipecat:
 
 ```python
-from supermemory_pipecat import SupermemoryPipecatService, InputParams
+from supermemory_pipecat import SupermemoryPipecatService
+from supermemory_pipecat.service import InputParams
 
 memory = SupermemoryPipecatService(
     api_key=os.getenv("SUPERMEMORY_API_KEY"),
@@ -96,6 +97,7 @@ InputParams(
     search_limit=10,           # Max memories to retrieve (default: 10)
     search_threshold=0.1,      # Similarity threshold 0.0-1.0 (default: 0.1)
     system_prompt="Based on previous conversations:\n\n",
+    inject_mode="auto",        # "auto" | "system" | "user"
 )
 ```
 
@@ -105,6 +107,31 @@ InputParams(
 | `search_threshold` | float | 0.1 | Minimum similarity threshold for memory retrieval |
 | `mode` | str | "full" | Memory retrieval mode: `"profile"`, `"query"`, or `"full"` |
 | `system_prompt` | str | "Based on previous conversations:\n\n" | Prefix text for memory context |
+| `inject_mode` | str | "auto" | How memories are injected: `"auto"`, `"system"`, or `"user"` |
+
+## Injection Modes
+
+The `inject_mode` parameter controls how memories are added to the LLM context:
+
+| Mode | Behavior |
+|------|----------|
+| `"auto"` | **Auto-detects** based on frame types. If audio frames detected → injects to system prompt (speech-to-speech). If only text frames → injects as user message (STT/TTS). |
+| `"system"` | Always injects memories into the system prompt |
+| `"user"` | Always injects memories as a user message |
+
+## Speech-to-Speech Models (Gemini Live, etc.)
+
+For speech-to-speech models like Gemini Live, the SDK **automatically detects** audio frames and injects memories into the system prompt. No configuration needed:
+
+```python
+from supermemory_pipecat import SupermemoryPipecatService
+
+# Auto-detection works out of the box
+memory = SupermemoryPipecatService(
+    api_key=os.getenv("SUPERMEMORY_API_KEY"),
+    user_id="unique_user_id",
+)
+```
 
 ## Example: Voice Agent with Memory
 
@@ -130,7 +157,8 @@ from pipecat.transports.websocket.fastapi import (
     FastAPIWebsocketTransport,
 )
 
-from supermemory_pipecat import SupermemoryPipecatService, InputParams
+from supermemory_pipecat import SupermemoryPipecatService
+from supermemory_pipecat.service import InputParams
 
 app = FastAPI()
 
@@ -201,3 +229,11 @@ if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)
 ```
+
+## Example: Gemini Live with Memory
+
+For a complete example using Gemini Live speech-to-speech with Supermemory, check out the reference implementation:
+
+<Card title="Pipecat Memory Example" icon="github" href="https://github.com/supermemoryai/pipecat-memory">
+  Full working example with Gemini Live, including frontend and backend code.
+</Card>
diff --git a/apps/docs/voice-realtime/pipecat.mdx b/apps/docs/voice-realtime/pipecat.mdx
deleted file mode 100644
index 61508593..00000000
--- a/apps/docs/voice-realtime/pipecat.mdx
+++ /dev/null
@@ -1,237 +0,0 @@
----
-title: "Pipecat"
-description: "Integrate Supermemory with Pipecat for conversational memory in voice AI agents"
----
-
-Supermemory integrates with [Pipecat](https://github.com/pipecat-ai/pipecat), providing long-term memory capabilities for voice AI agents. Your Pipecat applications will remember past conversations and provide personalized responses based on user history.
-
-## Installation
-
-To use Supermemory with Pipecat, install the required dependencies:
-
-```bash
-pip install supermemory-pipecat
-```
-
-Set up your API key as an environment variable:
-
-```bash
-export SUPERMEMORY_API_KEY=your_supermemory_api_key
-```
-
-You can obtain an API key from [console.supermemory.ai](https://console.supermemory.ai).
-
-## Configuration
-
-Supermemory integration is provided through the `SupermemoryPipecatService` class in Pipecat:
-
-```python
-from supermemory_pipecat import SupermemoryPipecatService
-from supermemory_pipecat.service import InputParams
-
-memory = SupermemoryPipecatService(
-    api_key=os.getenv("SUPERMEMORY_API_KEY"),
-    user_id="unique_user_id",
-    session_id="session_123",
-    params=InputParams(
-        mode="full",            # "profile" | "query" | "full"
-        search_limit=10,        # Max memories to retrieve
-        search_threshold=0.1,   # Relevance threshold (0.0-1.0)
-        system_prompt="Based on previous conversations:\n\n",
-    ),
-)
-```
-
-## Pipeline Integration
-
-The `SupermemoryPipecatService` should be positioned between your context aggregator and LLM service in the Pipecat pipeline:
-
-```python
-pipeline = Pipeline([
-    transport.input(),
-    stt,                           # Speech-to-text
-    context_aggregator.user(),
-    memory,                        # <- Supermemory memory service
-    llm,
-    tts,                           # Text-to-speech
-    transport.output(),
-    context_aggregator.assistant(),
-])
-```
-
-## How It Works
-
-When integrated with Pipecat, Supermemory provides two key functionalities:
-
-### 1. Memory Retrieval
-
-When a user message is detected, Supermemory retrieves relevant memories:
-
-- **Static Profile**: Persistent facts about the user
-- **Dynamic Profile**: Recent context and preferences
-- **Search Results**: Semantically relevant past memories
-
-### 2. Context Enhancement
-
-Retrieved memories are formatted and injected into the LLM context before generation, giving the model awareness of past conversations.
-
-## Memory Modes
-
-| Mode | Static Profile | Dynamic Profile | Search Results | Use Case |
-|------|----------------|-----------------|----------------|----------|
-| `"profile"` | Yes | Yes | No | Personalization without search |
-| `"query"` | No | No | Yes | Finding relevant past context |
-| `"full"` | Yes | Yes | Yes | Complete memory (default) |
-
-## Configuration Options
-
-You can customize how memories are retrieved and used:
-
-### InputParams
-
-```python
-InputParams(
-    mode="full",               # Memory mode (default: "full")
-    search_limit=10,           # Max memories to retrieve (default: 10)
-    search_threshold=0.1,      # Similarity threshold 0.0-1.0 (default: 0.1)
-    system_prompt="Based on previous conversations:\n\n",
-    inject_mode="auto",        # "auto" | "system" | "user"
-)
-```
-
-| Parameter | Type | Default | Description |
-|-----------|------|---------|-------------|
-| `search_limit` | int | 10 | Maximum number of memories to retrieve per query |
-| `search_threshold` | float | 0.1 | Minimum similarity threshold for memory retrieval |
-| `mode` | str | "full" | Memory retrieval mode: `"profile"`, `"query"`, or `"full"` |
-| `system_prompt` | str | "Based on previous conversations:\n\n" | Prefix text for memory context |
-| `inject_mode` | str | "auto" | How memories are injected: `"auto"`, `"system"`, or `"user"` |
-
-## Injection Modes
-
-The `inject_mode` parameter controls how memories are added to the LLM context:
-
-| Mode | Behavior |
-|------|----------|
-| `"auto"` | **Auto-detects** based on frame types. If audio frames detected → injects to system prompt (speech-to-speech). If only text frames → injects as user message (STT/TTS). |
-| `"system"` | Always injects memories into the system prompt |
-| `"user"` | Always injects memories as a user message |
-
-## Speech-to-Speech Models (Gemini Live, etc.)
-
-For speech-to-speech models like Gemini Live, the SDK **automatically detects** audio frames and injects memories into the system prompt. No configuration needed:
-
-```python
-from supermemory_pipecat import SupermemoryPipecatService
-
-# Auto-detection works out of the box
-memory = SupermemoryPipecatService(
-    api_key=os.getenv("SUPERMEMORY_API_KEY"),
-    user_id="unique_user_id",
-)
-```
-
-## Example: Voice Agent with Memory
-
-Here's a complete example of a Pipecat voice agent with Supermemory integration:
-
-```python
-import os
-from fastapi import FastAPI, WebSocket
-from fastapi.middleware.cors import CORSMiddleware
-
-from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import LLMMessagesFrame
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.serializers.protobuf import ProtobufFrameSerializer
-from pipecat.services.openai.llm import OpenAILLMService
-from pipecat.services.openai.tts import OpenAITTSService
-from pipecat.services.openai.stt import OpenAISTTService
-from pipecat.transports.websocket.fastapi import (
-    FastAPIWebsocketParams,
-    FastAPIWebsocketTransport,
-)
-
-from supermemory_pipecat import SupermemoryPipecatService
-from supermemory_pipecat.service import InputParams
-
-app = FastAPI()
-
-SYSTEM_PROMPT = """You are a helpful voice assistant with memory capabilities.
-You remember information from past conversations and use it to provide personalized responses.
-Keep responses brief and conversational."""
-
-
-async def run_bot(websocket_client, user_id: str, session_id: str):
-    transport = FastAPIWebsocketTransport(
-        websocket=websocket_client,
-        params=FastAPIWebsocketParams(
-            audio_in_enabled=True,
-            audio_out_enabled=True,
-            vad_enabled=True,
-            vad_analyzer=SileroVADAnalyzer(),
-            vad_audio_passthrough=True,
-            serializer=ProtobufFrameSerializer(),
-        ),
-    )
-
-    stt = OpenAISTTService(api_key=os.getenv("OPENAI_API_KEY"))
-    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-5-mini")
-    tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="alloy")
-
-    # Supermemory memory service
-    memory = SupermemoryPipecatService(
-        user_id=user_id,
-        session_id=session_id,
-        params=InputParams(
-            mode="full",
-            search_limit=10,
-            search_threshold=0.1,
-        ),
-    )
-
-    context = OpenAILLMContext([{"role": "system", "content": SYSTEM_PROMPT}])
-    context_aggregator = llm.create_context_aggregator(context)
-
-    pipeline = Pipeline([
-        transport.input(),
-        stt,
-        context_aggregator.user(),
-        memory,
-        llm,
-        tts,
-        transport.output(),
-        context_aggregator.assistant(),
-    ])
-
-    task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
-
-    @transport.event_handler("on_client_disconnected")
-    async def on_client_disconnected(transport, client):
-        await task.cancel()
-
-    runner = PipelineRunner(handle_sigint=False)
-    await runner.run(task)
-
-
-[email protected]("/ws")
-async def websocket_endpoint(websocket: WebSocket):
-    await websocket.accept()
-    await run_bot(websocket, user_id="alice", session_id="session-123")
-
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)
-```
-
-## Example: Gemini Live with Memory
-
-For a complete example using Gemini Live speech-to-speech with Supermemory, check out the reference implementation:
-
-<Card title="Pipecat Memory Example" icon="github" href="https://github.com/supermemoryai/pipecat-memory">
-  Full working example with Gemini Live, including frontend and backend code.
-</Card>