aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornexxeln <[email protected]>2025-12-29 18:51:43 +0000
committernexxeln <[email protected]>2025-12-29 18:51:43 +0000
commit10d8f924393f912ab408d6da50eb32884cef5509 (patch)
tree9d5cea0915e0dd961907b8a07f4c1e4140272e99
parentchore: bump package version (diff)
downloadsupermemory-12-23-fix_deduplication_in_python_sdk.tar.xz
supermemory-12-23-fix_deduplication_in_python_sdk.zip
fix deduplication in python sdk (#626)12-23-fix_deduplication_in_python_sdk
done in a similar way to the ai sdk
-rw-r--r--packages/openai-sdk-python/src/supermemory_openai/__init__.py4
-rw-r--r--packages/openai-sdk-python/src/supermemory_openai/middleware.py54
-rw-r--r--packages/openai-sdk-python/src/supermemory_openai/utils.py66
3 files changed, 99 insertions, 25 deletions
diff --git a/packages/openai-sdk-python/src/supermemory_openai/__init__.py b/packages/openai-sdk-python/src/supermemory_openai/__init__.py
index 8dee08fc..15adf20c 100644
--- a/packages/openai-sdk-python/src/supermemory_openai/__init__.py
+++ b/packages/openai-sdk-python/src/supermemory_openai/__init__.py
@@ -28,6 +28,8 @@ from .utils import (
get_last_user_message,
get_conversation_content,
convert_profile_to_markdown,
+ deduplicate_memories,
+ DeduplicatedMemories,
)
from .exceptions import (
@@ -64,6 +66,8 @@ __all__ = [
"get_last_user_message",
"get_conversation_content",
"convert_profile_to_markdown",
+ "deduplicate_memories",
+ "DeduplicatedMemories",
# Exceptions
"SupermemoryError",
"SupermemoryConfigurationError",
diff --git a/packages/openai-sdk-python/src/supermemory_openai/middleware.py b/packages/openai-sdk-python/src/supermemory_openai/middleware.py
index a21492ae..e2399bb6 100644
--- a/packages/openai-sdk-python/src/supermemory_openai/middleware.py
+++ b/packages/openai-sdk-python/src/supermemory_openai/middleware.py
@@ -18,6 +18,7 @@ from .utils import (
get_last_user_message,
get_conversation_content,
convert_profile_to_markdown,
+ deduplicate_memories,
)
from .exceptions import (
SupermemoryConfigurationError,
@@ -119,8 +120,11 @@ async def add_system_prompt(
container_tag, query_text, api_key
)
- memory_count_static = len(memories_response.profile.get("static", []))
- memory_count_dynamic = len(memories_response.profile.get("dynamic", []))
+ profile = memories_response.profile or {}
+ search_results_data = memories_response.search_results or {}
+ memory_count_static = len(profile.get("static", []))
+ memory_count_dynamic = len(profile.get("dynamic", []))
+ memory_count_search = len(search_results_data.get("results", []))
logger.info(
"Memory search completed",
@@ -133,39 +137,39 @@ async def add_system_prompt(
},
)
+ deduplicated = deduplicate_memories(
+ static=profile.get("static", []),
+ dynamic=profile.get("dynamic", []),
+ search_results=search_results_data.get("results", []),
+ )
+
+ logger.debug(
+ "Memory deduplication completed",
+ {
+ "static": {"original": memory_count_static, "deduplicated": len(deduplicated.static)},
+ "dynamic": {"original": memory_count_dynamic, "deduplicated": len(deduplicated.dynamic)},
+ "search_results": {"original": memory_count_search, "deduplicated": len(deduplicated.search_results)},
+ },
+ )
+
profile_data = ""
if mode != "query":
profile_data = convert_profile_to_markdown(
{
"profile": {
- "static": [
- item.get("memory", "") if isinstance(item, dict) else str(item)
- for item in memories_response.profile.get("static", [])
- ],
- "dynamic": [
- item.get("memory", "") if isinstance(item, dict) else str(item)
- for item in memories_response.profile.get("dynamic", [])
- ],
- },
- "searchResults": {
- "results": [
- {"memory": item.get("memory", "") if isinstance(item, dict) else str(item)}
- for item in memories_response.search_results.get("results", [])
- ],
+ "static": deduplicated.static,
+ "dynamic": deduplicated.dynamic,
},
+ "searchResults": {"results": []},
}
)
search_results_memories = ""
- if mode != "profile":
- search_results = memories_response.search_results.get("results", [])
- if search_results:
- search_results_memories = (
- f"Search results for user's recent message: \n"
- + "\n".join(
- f"- {result.get('memory', '') if isinstance(result, dict) else str(result)}" for result in search_results
- )
- )
+ if mode != "profile" and deduplicated.search_results:
+ search_results_memories = (
+ "Search results for user's recent message: \n"
+ + "\n".join(f"- {memory}" for memory in deduplicated.search_results)
+ )
memories = f"{profile_data}\n{search_results_memories}".strip()
diff --git a/packages/openai-sdk-python/src/supermemory_openai/utils.py b/packages/openai-sdk-python/src/supermemory_openai/utils.py
index d9ea6845..6858e09e 100644
--- a/packages/openai-sdk-python/src/supermemory_openai/utils.py
+++ b/packages/openai-sdk-python/src/supermemory_openai/utils.py
@@ -187,6 +187,72 @@ def get_conversation_content(
return "\n\n".join(conversation_parts)
+class DeduplicatedMemories:
+ """Deduplicated memory strings organized by source."""
+
+ def __init__(self, static: list[str], dynamic: list[str], search_results: list[str]):
+ self.static = static
+ self.dynamic = dynamic
+ self.search_results = search_results
+
+
+def deduplicate_memories(
+ static: Optional[list[Any]] = None,
+ dynamic: Optional[list[Any]] = None,
+ search_results: Optional[list[Any]] = None,
+) -> DeduplicatedMemories:
+ """
+ Deduplicates memory items across sources. Priority: Static > Dynamic > Search Results.
+ Same memory appearing in multiple sources is kept only in the highest-priority source.
+ """
+ static_items = static or []
+ dynamic_items = dynamic or []
+ search_items = search_results or []
+
+ def extract_memory_text(item: Any) -> Optional[str]:
+ if item is None:
+ return None
+ if isinstance(item, dict):
+ memory = item.get("memory")
+ if isinstance(memory, str):
+ trimmed = memory.strip()
+ return trimmed if trimmed else None
+ return None
+ if isinstance(item, str):
+ trimmed = item.strip()
+ return trimmed if trimmed else None
+ return None
+
+ static_memories: list[str] = []
+ seen_memories: set[str] = set()
+
+ for item in static_items:
+ memory = extract_memory_text(item)
+ if memory is not None:
+ static_memories.append(memory)
+ seen_memories.add(memory)
+
+ dynamic_memories: list[str] = []
+ for item in dynamic_items:
+ memory = extract_memory_text(item)
+ if memory is not None and memory not in seen_memories:
+ dynamic_memories.append(memory)
+ seen_memories.add(memory)
+
+ search_memories: list[str] = []
+ for item in search_items:
+ memory = extract_memory_text(item)
+ if memory is not None and memory not in seen_memories:
+ search_memories.append(memory)
+ seen_memories.add(memory)
+
+ return DeduplicatedMemories(
+ static=static_memories,
+ dynamic=dynamic_memories,
+ search_results=search_memories,
+ )
+
+
def convert_profile_to_markdown(data: dict[str, Any]) -> str:
"""
Convert profile data to markdown based on profile.static and profile.dynamic properties.