diff options
| author | Dhravya Shah <[email protected]> | 2025-01-26 12:39:31 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2025-01-26 12:39:31 -0700 |
| commit | 119280aeb6e1fcb9a3ecce112f95904306daf93c (patch) | |
| tree | 7372f496c531b7161aa9be6baaa98434cbffd445 /apps | |
| parent | import tools: CSV and markdown (obsidian) (diff) | |
| download | supermemory-119280aeb6e1fcb9a3ecce112f95904306daf93c.tar.xz supermemory-119280aeb6e1fcb9a3ecce112f95904306daf93c.zip | |
change embedding model
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/backend/package.json | 2 | ||||
| -rw-r--r-- | apps/backend/src/routes/actions.ts | 30 | ||||
| -rw-r--r-- | apps/backend/src/types.ts | 1 | ||||
| -rw-r--r-- | apps/backend/src/workflow/index.ts | 32 | ||||
| -rw-r--r-- | apps/backend/wrangler.toml | 3 |
5 files changed, 24 insertions, 44 deletions
diff --git a/apps/backend/package.json b/apps/backend/package.json index 7b961b8d..5d5b1092 100644 --- a/apps/backend/package.json +++ b/apps/backend/package.json @@ -24,7 +24,7 @@ "zod": "^3.23.8" }, "devDependencies": { - "@cloudflare/workers-types": "^4.20240925.0" + "@cloudflare/workers-types": "^4.20250124.3" }, "overrides": { "iron-webcrypto": "^1.2.1" diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts index 4de1d339..b9e6a4c2 100644 --- a/apps/backend/src/routes/actions.ts +++ b/apps/backend/src/routes/actions.ts @@ -139,9 +139,7 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() } const openAi = openai(c.env); - const model = openAi.embedding("text-embedding-3-large", { - dimensions: 1536, - }); + let lastUserMessage = coreMessages .reverse() @@ -155,10 +153,13 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() console.log("querytext", queryText); + if (!queryText ||queryText.length === 0) { + return c.json({ error: "Failed to generate embedding for query" }, 500); + } + const embedStart = performance.now(); - const { embedding } = await embed({ - model, - value: queryText, + const { data: embedding } = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", { + text: queryText, }); const embedEnd = performance.now(); console.log(`Embedding generation took ${embedEnd - embedStart}ms`); @@ -174,7 +175,7 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() const similarity = sql<number>`1 - (${cosineDistance( chunk.embeddings, - embedding + embedding[0] )})`; // Find similar chunks using cosine similarity @@ -605,15 +606,14 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() return c.json({ error: "Unauthorized" }, 401); } - const openAi = openai(c.env, c.env.OPEN_AI_API_KEY); - try { // Generate embedding for the search query - const model = openAi.embedding("text-embedding-3-small"); - const embeddings = await embed({ model, value: query }); + const embeddings = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", { + text: query, + }); - if (!embeddings.embedding) { + if (!embeddings.data) { return c.json( { error: "Failed to generate embedding for query" }, 500 @@ -629,7 +629,7 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() createdAt: documents.createdAt, chunkContent: chunk.textContent, similarity: sql<number>`1 - (embeddings <=> ${JSON.stringify( - embeddings.embedding + embeddings.data[0] )}::vector)`, }) .from(chunk) @@ -637,11 +637,11 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() .where( and( eq(documents.userId, user.id), - sql`1 - (embeddings <=> ${JSON.stringify(embeddings.embedding)}::vector) >= ${threshold}` + sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) >= ${threshold}` ) ) .orderBy( - sql`1 - (embeddings <=> ${JSON.stringify(embeddings.embedding)}::vector) desc` + sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) desc` ) //figure out a better way to do order by my brain isn't working at this time. but youcan't do vector search twice .limit(limit); diff --git a/apps/backend/src/types.ts b/apps/backend/src/types.ts index 31fd7969..0323a9e6 100644 --- a/apps/backend/src/types.ts +++ b/apps/backend/src/types.ts @@ -46,6 +46,7 @@ export type Env = { }; ENCRYPTED_TOKENS: KVNamespace; RATE_LIMITER: DurableObjectNamespace<DurableObjectRateLimiter>; + AI: Ai }; export type JobData = { diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts index 41c73015..24a1ff3e 100644 --- a/apps/backend/src/workflow/index.ts +++ b/apps/backend/src/workflow/index.ts @@ -142,36 +142,12 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { ); } - const model = openai(this.env, this.env.OPEN_AI_API_KEY).embedding( - "text-embedding-3-large", - { - dimensions: 1536, - } - ); - - // Step 3: Create chunks from the content. - const embeddings = await step.do( - "create embeddings", - { - retries: { - backoff: "constant", - delay: "10 seconds", - limit: 7, - }, - timeout: "2 minutes", - }, - async () => { - const { embeddings }: { embeddings: Array<number>[] } = await embedMany( - { - model, - values: chunked, - } - ); - return embeddings; - } - ); + const {data: embeddings} = await this.env.AI.run("@cf/baai/bge-base-en-v1.5", { + text: chunked, + }); + // Step 4: Prepare chunk data const chunkInsertData: ChunkInsert[] = await step.do( "prepare chunk data", diff --git a/apps/backend/wrangler.toml b/apps/backend/wrangler.toml index 6b7d053d..0aa36a1f 100644 --- a/apps/backend/wrangler.toml +++ b/apps/backend/wrangler.toml @@ -14,6 +14,9 @@ enabled = true [placement] mode = "smart" +[ai] +binding = "AI" + [[workflows]] name = "content-workflow-supermemory" binding = "CONTENT_WORKFLOW" |