diff options
| author | Dhravya Shah <[email protected]> | 2025-03-17 15:18:48 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2025-03-17 15:18:48 -0700 |
| commit | 8931c43295d593ffbec8431f5a2e4c97f02f5480 (patch) | |
| tree | 39cb64a792cf1b24fe0b9487990dcf52451d6698 /apps/backend/src | |
| parent | fix: tweets view (diff) | |
| parent | Merge pull request #339 from supermemoryai/hybrid-rag (diff) | |
| download | supermemory-8931c43295d593ffbec8431f5a2e4c97f02f5480.tar.xz supermemory-8931c43295d593ffbec8431f5a2e4c97f02f5480.zip | |
Merge branch 'main' of github.com:supermemoryai/supermemory
Diffstat (limited to 'apps/backend/src')
| -rw-r--r-- | apps/backend/src/components/landing.tsx | 2 | ||||
| -rw-r--r-- | apps/backend/src/routes/actions.ts | 109 |
2 files changed, 40 insertions, 71 deletions
diff --git a/apps/backend/src/components/landing.tsx b/apps/backend/src/components/landing.tsx index ae00a450..87bbdb69 100644 --- a/apps/backend/src/components/landing.tsx +++ b/apps/backend/src/components/landing.tsx @@ -194,7 +194,7 @@ export function LandingPage() { </li> <li> <a - href="https://discord.gg/supermemory" + href="https://discord.gg/b3BgKWpbtR" target="_blank" className="hover:text-white" rel="noreferrer" diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts index dd9311ed..706b87ba 100644 --- a/apps/backend/src/routes/actions.ts +++ b/apps/backend/src/routes/actions.ts @@ -131,10 +131,6 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) // Pre-compute the vector similarity expression const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embedding[0])}::vector)`; - const textSearchRank = sql<number>`ts_rank_cd( - to_tsvector('english', coalesce(${chunk.textContent}, '')), - plainto_tsquery('english', ${queryText}) - )`; // Get matching chunks with document info const matchingChunks = await db @@ -145,7 +141,6 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) orderInDocument: chunk.orderInDocument, metadata: chunk.metadata, similarity: vectorSimilarity, - textRank: textSearchRank, // Document fields docId: documents.id, docUuid: documents.uuid, @@ -159,16 +154,10 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) .from(chunk) .innerJoin(documents, eq(chunk.documentId, documents.id)) .where( - and(eq(documents.userId, user.id), sql`${vectorSimilarity} > 0.5`) + and(eq(documents.userId, user.id), sql`${vectorSimilarity} > 0.3`) ) - .orderBy( - desc(sql<number>`( - 0.6 * ${vectorSimilarity} + - 0.25 * ${textSearchRank} + - 0.15 * (1.0 / (1.0 + extract(epoch from age(${documents.updatedAt})) / (90 * 24 * 60 * 60))) - )::float`) - ) - .limit(15); + .orderBy(desc(vectorSimilarity)) + .limit(25); // Get unique document IDs from matching chunks const uniqueDocIds = [ @@ -201,9 +190,9 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) const docChunks = chunksByDocument.get(match.documentId) || []; const matchIndex = docChunks.findIndex((c) => c.id === match.chunkId); - // Get surrounding chunks (1 before and 1 after) - const start = Math.max(0, matchIndex - 1); - const end = Math.min(docChunks.length, matchIndex + 2); + // Get surrounding chunks (2 before and 2 after for more context) + const start = Math.max(0, matchIndex - 2); + const end = Math.min(docChunks.length, matchIndex + 3); const relevantChunks = docChunks.slice(start, end); return { @@ -224,34 +213,23 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) }; }); - // Remove duplicates based on document ID - const uniqueResults = contextualResults.reduce( - (acc, current) => { - const existingDoc = acc.find((doc) => doc.id === current.id); - if (!existingDoc) { - acc.push(current); - } else if (current.similarity > existingDoc.similarity) { - // Replace if current match is better - const index = acc.findIndex((doc) => doc.id === current.id); - acc[index] = current; - } - return acc; - }, - [] as typeof contextualResults - ); + // Sort by similarity and take top results + const topResults = contextualResults + .sort((a, b) => b.similarity - a.similarity) + .slice(0, 10); - data.appendMessageAnnotation(uniqueResults); + data.appendMessageAnnotation(topResults); if (lastUserMessage) { lastUserMessage.content = typeof lastUserMessage.content === "string" ? lastUserMessage.content + - `<context>${JSON.stringify(uniqueResults)}</context>` + `<context>${JSON.stringify(topResults)}</context>` : [ ...lastUserMessage.content, { type: "text", - text: `<context>${JSON.stringify(uniqueResults)}</context>`, + text: `<context>${JSON.stringify(topResults)}</context>`, }, ]; coreMessages[coreMessages.length - 1] = lastUserMessage; @@ -310,7 +288,7 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) role: "assistant", content: completion.text + - `<context>[${JSON.stringify(uniqueResults)}]</context>`, + `<context>[${JSON.stringify(topResults)}]</context>`, }, ]; @@ -602,14 +580,10 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) ); } - // Pre-compute the vector similarity expression to avoid multiple calculations + // Pre-compute the vector similarity expression const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector)`; - const textSearchRank = sql<number>`ts_rank_cd( - to_tsvector('english', coalesce(${chunk.textContent}, '')), - plainto_tsquery('english', ${query}) - )`; - // First get the top matching chunks + // Get matching chunks const results = await db .select({ chunkId: chunk.id, @@ -618,7 +592,6 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) orderInDocument: chunk.orderInDocument, metadata: chunk.metadata, similarity: vectorSimilarity, - textRank: textSearchRank, // Document fields docUuid: documents.uuid, docContent: documents.content, @@ -657,13 +630,7 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) : []) ) ) - .orderBy( - desc(sql<number>`( - 0.6 * ${vectorSimilarity} + - 0.25 * ${textSearchRank} + - 0.15 * (1.0 / (1.0 + extract(epoch from age(${documents.updatedAt})) / (90 * 24 * 60 * 60))) - )::float`) - ) + .orderBy(desc(vectorSimilarity)) .limit(limit); // Group results by document and take the best matching chunk @@ -679,26 +646,28 @@ const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) } // Convert back to array and format response - const finalResults = Array.from(documentResults.values()).map((r) => ({ - id: r.documentId, - uuid: r.docUuid, - content: r.docContent, - type: r.docType, - url: r.docUrl, - title: r.docTitle, - createdAt: r.docCreatedAt, - updatedAt: r.docUpdatedAt, - userId: r.docUserId, - description: r.docDescription, - ogImage: r.docOgImage, - similarity: Number(r.similarity.toFixed(4)), - matchingChunk: { - id: r.chunkId, - content: r.textContent, - orderInDocument: r.orderInDocument, - metadata: r.metadata, - }, - })); + const finalResults = Array.from(documentResults.values()) + .sort((a, b) => b.similarity - a.similarity) + .map((r) => ({ + id: r.documentId, + uuid: r.docUuid, + content: r.docContent, + type: r.docType, + url: r.docUrl, + title: r.docTitle, + createdAt: r.docCreatedAt, + updatedAt: r.docUpdatedAt, + userId: r.docUserId, + description: r.docDescription, + ogImage: r.docOgImage, + similarity: Number(r.similarity.toFixed(4)), + matchingChunk: { + id: r.chunkId, + content: r.textContent, + orderInDocument: r.orderInDocument, + metadata: r.metadata, + }, + })); return c.json({ results: finalResults }); } catch (error) { |