change embedding model

author: Dhravya Shah <[email protected]> 2025-01-26 12:39:31 -0700
committer: Dhravya Shah <[email protected]> 2025-01-26 12:39:31 -0700
commit: 119280aeb6e1fcb9a3ecce112f95904306daf93c (patch)
tree: 7372f496c531b7161aa9be6baaa98434cbffd445 /apps/backend/src
parent: import tools: CSV and markdown (obsidian) (diff)
download: supermemory-119280aeb6e1fcb9a3ecce112f95904306daf93c.tar.xz
supermemory-119280aeb6e1fcb9a3ecce112f95904306daf93c.zip
3 files changed, 20 insertions, 43 deletions
diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts
index 4de1d339..b9e6a4c2 100644
--- a/apps/backend/src/routes/actions.ts
+++ b/apps/backend/src/routes/actions.ts
@@ -139,9 +139,7 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
       }
       const openAi = openai(c.env);
 
-      const model = openAi.embedding("text-embedding-3-large", {
-        dimensions: 1536,
-      });
+
 
       let lastUserMessage = coreMessages
         .reverse()
@@ -155,10 +153,13 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
 
       console.log("querytext", queryText);
 
+      if (!queryText ||queryText.length === 0) {
+        return c.json({ error: "Failed to generate embedding for query" }, 500);
+      }
+
       const embedStart = performance.now();
-      const { embedding } = await embed({
-        model,
-        value: queryText,
+      const { data: embedding } = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
+        text: queryText,
       });
       const embedEnd = performance.now();
       console.log(`Embedding generation took ${embedEnd - embedStart}ms`);
@@ -174,7 +175,7 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
 
       const similarity = sql<number>`1 - (${cosineDistance(
         chunk.embeddings,
-        embedding
+        embedding[0]
       )})`;
 
       // Find similar chunks using cosine similarity
@@ -605,15 +606,14 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
         return c.json({ error: "Unauthorized" }, 401);
       }
 
-      const openAi = openai(c.env, c.env.OPEN_AI_API_KEY);
-
       try {
         // Generate embedding for the search query
-        const model = openAi.embedding("text-embedding-3-small");
 
-        const embeddings = await embed({ model, value: query });
+        const embeddings = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
+          text: query,
+        });
 
-        if (!embeddings.embedding) {
+        if (!embeddings.data) {
           return c.json(
             { error: "Failed to generate embedding for query" },
             500
@@ -629,7 +629,7 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
             createdAt: documents.createdAt,
             chunkContent: chunk.textContent,
             similarity: sql<number>`1 - (embeddings <=> ${JSON.stringify(
-              embeddings.embedding
+              embeddings.data[0]
             )}::vector)`,
           })
           .from(chunk)
@@ -637,11 +637,11 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
           .where(
             and(
               eq(documents.userId, user.id),
-              sql`1 - (embeddings <=> ${JSON.stringify(embeddings.embedding)}::vector) >= ${threshold}`
+              sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) >= ${threshold}`
             )
           )
           .orderBy(
-            sql`1 - (embeddings <=> ${JSON.stringify(embeddings.embedding)}::vector) desc`
+            sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) desc`
           ) //figure out a better way to do order by my brain isn't working at this time. but youcan't do vector search twice
           .limit(limit);
 
diff --git a/apps/backend/src/types.ts b/apps/backend/src/types.ts
index 31fd7969..0323a9e6 100644
--- a/apps/backend/src/types.ts
+++ b/apps/backend/src/types.ts
@@ -46,6 +46,7 @@ export type Env = {
   };
   ENCRYPTED_TOKENS: KVNamespace;
   RATE_LIMITER: DurableObjectNamespace<DurableObjectRateLimiter>;
+  AI: Ai
 };
 
 export type JobData = {
diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts
index 41c73015..24a1ff3e 100644
--- a/apps/backend/src/workflow/index.ts
+++ b/apps/backend/src/workflow/index.ts
@@ -142,36 +142,12 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
       );
     }
 
-    const model = openai(this.env, this.env.OPEN_AI_API_KEY).embedding(
-      "text-embedding-3-large",
-      {
-        dimensions: 1536,
-      }
-    );
-
-    // Step 3: Create chunks from the content.
-    const embeddings = await step.do(
-      "create embeddings",
-      {
-        retries: {
-          backoff: "constant",
-          delay: "10 seconds",
-          limit: 7,
-        },
-        timeout: "2 minutes",
-      },
-      async () => {
-        const { embeddings }: { embeddings: Array<number>[] } = await embedMany(
-          {
-            model,
-            values: chunked,
-          }
-        );
 
-        return embeddings;
-      }
-    );
+    const {data: embeddings} = await this.env.AI.run("@cf/baai/bge-base-en-v1.5", {
+      text: chunked,
+    });
 
+   
     // Step 4: Prepare chunk data
     const chunkInsertData: ChunkInsert[] = await step.do(
       "prepare chunk data",
author	Dhravya Shah <[email protected]>	2025-01-26 12:39:31 -0700
committer	Dhravya Shah <[email protected]>	2025-01-26 12:39:31 -0700
commit	119280aeb6e1fcb9a3ecce112f95904306daf93c (patch)
tree	7372f496c531b7161aa9be6baaa98434cbffd445 /apps/backend/src
parent	import tools: CSV and markdown (obsidian) (diff)
download	supermemory-119280aeb6e1fcb9a3ecce112f95904306daf93c.tar.xz supermemory-119280aeb6e1fcb9a3ecce112f95904306daf93c.zip