diff options
| author | Dhravya Shah <[email protected]> | 2025-02-18 21:20:52 -0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2025-02-18 21:20:52 -0700 |
| commit | 0c6db45d32661921445c8e7e9f070da36cc8b2b7 (patch) | |
| tree | 4e00c1a9aef015f80e00542537a7c66f98740812 /apps | |
| parent | better space selector (diff) | |
| parent | implemented proper hybrid search with date relevancy into consideration (diff) | |
| download | supermemory-0c6db45d32661921445c8e7e9f070da36cc8b2b7.tar.xz supermemory-0c6db45d32661921445c8e7e9f070da36cc8b2b7.zip | |
Merge pull request #334 from supermemoryai/hybrid-rag
Hybrid rag
Diffstat (limited to 'apps')
| -rw-r--r-- | apps/backend/drizzle/0016_good_deathbird.sql | 7 | ||||
| -rw-r--r-- | apps/backend/drizzle/meta/0016_snapshot.json | 1222 | ||||
| -rw-r--r-- | apps/backend/drizzle/meta/_journal.json | 7 | ||||
| -rw-r--r-- | apps/backend/src/routes/actions.ts | 67 | ||||
| -rw-r--r-- | apps/backend/src/workflow/index.ts | 22 |
5 files changed, 1301 insertions, 24 deletions
diff --git a/apps/backend/drizzle/0016_good_deathbird.sql b/apps/backend/drizzle/0016_good_deathbird.sql new file mode 100644 index 00000000..7c6505de --- /dev/null +++ b/apps/backend/drizzle/0016_good_deathbird.sql @@ -0,0 +1,7 @@ +ALTER TABLE "chunks" ALTER COLUMN "embeddings" SET DATA TYPE vector(768);--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "documents_search_idx" ON "documents" USING gin (( + setweight(to_tsvector('english', coalesce("content", '')),'A') || + setweight(to_tsvector('english', coalesce("title", '')),'B') || + setweight(to_tsvector('english', coalesce("description", '')),'C') || + setweight(to_tsvector('english', coalesce("url", '')),'D') + ));
\ No newline at end of file diff --git a/apps/backend/drizzle/meta/0016_snapshot.json b/apps/backend/drizzle/meta/0016_snapshot.json new file mode 100644 index 00000000..2020c691 --- /dev/null +++ b/apps/backend/drizzle/meta/0016_snapshot.json @@ -0,0 +1,1222 @@ +{ + "id": "23a39e70-a9c2-44cd-a3fb-22b19efef79e", + "prevId": "8529db1b-2d33-49e0-a413-f517eae7e4e4", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.chat_threads": { + "name": "chat_threads", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "firstMessage": { + "name": "firstMessage", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "messages": { + "name": "messages", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "chat_threads_user_idx": { + "name": "chat_threads_user_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "chat_threads_user_id_users_id_fk": { + "name": "chat_threads_user_id_users_id_fk", + "tableFrom": "chat_threads", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "chat_threads_uuid_unique": { + "name": "chat_threads_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + } + } + }, + "public.chunks": { + "name": "chunks", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "document_id": { + "name": "document_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "text_content": { + "name": "text_content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "order_in_document": { + "name": "order_in_document", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "embeddings": { + "name": "embeddings", + "type": "vector(768)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "chunk_id_idx": { + "name": "chunk_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "chunk_document_id_idx": { + "name": "chunk_document_id_idx", + "columns": [ + { + "expression": "document_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "embeddingIndex": { + "name": "embeddingIndex", + "columns": [ + { + "expression": "embeddings", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": { + "chunks_document_id_documents_id_fk": { + "name": "chunks_document_id_documents_id_fk", + "tableFrom": "chunks", + "tableTo": "documents", + "columnsFrom": [ + "document_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.content_to_space": { + "name": "content_to_space", + "schema": "", + "columns": { + "content_id": { + "name": "content_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "space_id": { + "name": "space_id", + "type": "integer", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "content_id_space_id_unique": { + "name": "content_id_space_id_unique", + "columns": [ + { + "expression": "content_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "space_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "content_to_space_content_id_documents_id_fk": { + "name": "content_to_space_content_id_documents_id_fk", + "tableFrom": "content_to_space", + "tableTo": "documents", + "columnsFrom": [ + "content_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "content_to_space_space_id_spaces_id_fk": { + "name": "content_to_space_space_id_spaces_id_fk", + "tableFrom": "content_to_space", + "tableTo": "spaces", + "columnsFrom": [ + "space_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.document_type": { + "name": "document_type", + "schema": "", + "columns": { + "type": { + "name": "type", + "type": "text", + "primaryKey": true, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.documents": { + "name": "documents", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "og_image": { + "name": "og_image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "raw": { + "name": "raw", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_successfully_processed": { + "name": "is_successfully_processed", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_hash": { + "name": "content_hash", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "document_id_idx": { + "name": "document_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_uuid_idx": { + "name": "document_uuid_idx", + "columns": [ + { + "expression": "uuid", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_type_idx": { + "name": "document_type_idx", + "columns": [ + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_raw_user_idx": { + "name": "document_raw_user_idx", + "columns": [ + { + "expression": "raw", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "documents_search_idx": { + "name": "documents_search_idx", + "columns": [ + { + "expression": "(\n setweight(to_tsvector('english', coalesce(\"content\", '')),'A') ||\n setweight(to_tsvector('english', coalesce(\"title\", '')),'B') ||\n setweight(to_tsvector('english', coalesce(\"description\", '')),'C') ||\n setweight(to_tsvector('english', coalesce(\"url\", '')),'D')\n )", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "gin", + "with": {} + } + }, + "foreignKeys": { + "documents_type_document_type_type_fk": { + "name": "documents_type_document_type_type_fk", + "tableFrom": "documents", + "tableTo": "document_type", + "columnsFrom": [ + "type" + ], + "columnsTo": [ + "type" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "documents_user_id_users_id_fk": { + "name": "documents_user_id_users_id_fk", + "tableFrom": "documents", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "documents_uuid_unique": { + "name": "documents_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + } + } + }, + "public.job": { + "name": "job", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "attempts": { + "name": "attempts", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "lastAttemptAt": { + "name": "lastAttemptAt", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "user_id_url_idx": { + "name": "user_id_url_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "job_user_id_users_id_fk": { + "name": "job_user_id_users_id_fk", + "tableFrom": "job", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.saved_spaces": { + "name": "saved_spaces", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "space_id": { + "name": "space_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "saved_at": { + "name": "saved_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "saved_spaces_user_space_idx": { + "name": "saved_spaces_user_space_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "space_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "saved_spaces_user_id_users_id_fk": { + "name": "saved_spaces_user_id_users_id_fk", + "tableFrom": "saved_spaces", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "saved_spaces_space_id_spaces_id_fk": { + "name": "saved_spaces_space_id_spaces_id_fk", + "tableFrom": "saved_spaces", + "tableTo": "spaces", + "columnsFrom": [ + "space_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.space_access": { + "name": "space_access", + "schema": "", + "columns": { + "space_id": { + "name": "space_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_email": { + "name": "user_email", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_type": { + "name": "access_type", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'read'" + } + }, + "indexes": { + "space_id_user_email_idx": { + "name": "space_id_user_email_idx", + "columns": [ + { + "expression": "space_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "space_access_space_id_spaces_id_fk": { + "name": "space_access_space_id_spaces_id_fk", + "tableFrom": "space_access", + "tableTo": "spaces", + "columnsFrom": [ + "space_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "space_access_status_space_access_status_status_fk": { + "name": "space_access_status_space_access_status_status_fk", + "tableFrom": "space_access", + "tableTo": "space_access_status", + "columnsFrom": [ + "status" + ], + "columnsTo": [ + "status" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.space_access_status": { + "name": "space_access_status", + "schema": "", + "columns": { + "status": { + "name": "status", + "type": "text", + "primaryKey": true, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.space_members": { + "name": "space_members", + "schema": "", + "columns": { + "spaceId": { + "name": "spaceId", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "space_members_space_user_idx": { + "name": "space_members_space_user_idx", + "columns": [ + { + "expression": "spaceId", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "space_members_spaceId_users_id_fk": { + "name": "space_members_spaceId_users_id_fk", + "tableFrom": "space_members", + "tableTo": "users", + "columnsFrom": [ + "spaceId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + }, + "space_members_user_id_users_id_fk": { + "name": "space_members_user_id_users_id_fk", + "tableFrom": "space_members", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.spaces": { + "name": "spaces", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "ownerId": { + "name": "ownerId", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": { + "spaces_id_idx": { + "name": "spaces_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "spaces_owner_id_idx": { + "name": "spaces_owner_id_idx", + "columns": [ + { + "expression": "ownerId", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "spaces_name_idx": { + "name": "spaces_name_idx", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "spaces_uuid_unique": { + "name": "spaces_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + } + } + }, + "public.users": { + "name": "users", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "first_name": { + "name": "first_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "last_name": { + "name": "last_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email_verified": { + "name": "email_verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "profile_picture_url": { + "name": "profile_picture_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "telegram_id": { + "name": "telegram_id", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "has_onboarded": { + "name": "has_onboarded", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_api_key_generated_at": { + "name": "last_api_key_generated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "tier": { + "name": "tier", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'free'" + } + }, + "indexes": { + "users_id_idx": { + "name": "users_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_uuid_idx": { + "name": "users_uuid_idx", + "columns": [ + { + "expression": "uuid", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_email_idx": { + "name": "users_email_idx", + "columns": [ + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_name_idx": { + "name": "users_name_idx", + "columns": [ + { + "expression": "first_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_created_at_idx": { + "name": "users_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_telegram_id_idx": { + "name": "users_telegram_id_idx", + "columns": [ + { + "expression": "telegram_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "users_uuid_unique": { + "name": "users_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + }, + "users_email_unique": { + "name": "users_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + } + } + }, + "public.waitlist": { + "name": "waitlist", + "schema": "", + "columns": { + "email": { + "name": "email", + "type": "varchar(512)", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +}
\ No newline at end of file diff --git a/apps/backend/drizzle/meta/_journal.json b/apps/backend/drizzle/meta/_journal.json index c8cea61a..7f777651 100644 --- a/apps/backend/drizzle/meta/_journal.json +++ b/apps/backend/drizzle/meta/_journal.json @@ -113,6 +113,13 @@ "when": 1737920848112, "tag": "0015_perpetual_mauler", "breakpoints": true + }, + { + "idx": 16, + "version": "7", + "when": 1739937938319, + "tag": "0016_good_deathbird", + "breakpoints": true } ] }
\ No newline at end of file diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts index c0801ada..0bc26052 100644 --- a/apps/backend/src/routes/actions.ts +++ b/apps/backend/src/routes/actions.ts @@ -88,7 +88,8 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() apiKey: c.env.BRAINTRUST_API_KEY, }); - const googleClient = wrapAISDKModel(openai(c.env).chat("gpt-4o-mini-2024-07-18")); + const googleClient = wrapAISDKModel( + openai(c.env).chat("gpt-4o-mini-2024-07-18")); // Get last user message and generate embedding in parallel with thread creation let lastUserMessage = coreMessages.findLast((i) => i.role === "user"); @@ -123,9 +124,15 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() return c.json({ error: "Failed to generate embedding" }, 500); } - // Perform semantic search - const similarity = sql<number>`1 - (${cosineDistance(chunk.embeddings, embedding[0])})`; - + // Pre-compute the vector similarity expression to avoid multiple calculations + const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embedding[0])}::vector)`; + const textSearchRank = sql<number>`ts_rank_cd(( + setweight(to_tsvector('english', coalesce(${documents.content}, '')),'A') || + setweight(to_tsvector('english', coalesce(${documents.title}, '')),'B') || + setweight(to_tsvector('english', coalesce(${documents.description}, '')),'C') || + setweight(to_tsvector('english', coalesce(${documents.url}, '')),'D') + ), plainto_tsquery('english', ${queryText}))`; + const finalResults = await db .select({ id: documents.id, @@ -138,12 +145,25 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() userId: documents.userId, description: documents.description, ogImage: documents.ogImage, + similarity: vectorSimilarity, + textRank: textSearchRank, }) .from(chunk) .innerJoin(documents, eq(chunk.documentId, documents.id)) - .where(and(eq(documents.userId, user.id), sql`${similarity} > 0.4`)) - .orderBy(desc(similarity)) - .limit(5); + .where( + and( + eq(documents.userId, user.id), + sql`${vectorSimilarity} > 0.5` + ) + ) + .orderBy( + desc(sql<number>`( + 0.6 * ${vectorSimilarity} + + 0.25 * ${textSearchRank} + + 0.15 * (1.0 / (1.0 + extract(epoch from age(${documents.updatedAt})) / (90 * 24 * 60 * 60))) + )::float`) + ) + .limit(15); const cleanDocumentsForContext = finalResults.map((d) => ({ title: d.title, @@ -531,24 +551,37 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() ); } - // Perform semantic search using cosine similarity - const results = await database(c.env.HYPERDRIVE.connectionString) + // Pre-compute the vector similarity expression to avoid multiple calculations + const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector)`; + const textSearchRank = sql<number>`ts_rank_cd(( + setweight(to_tsvector('english', coalesce(${documents.content}, '')),'A') || + setweight(to_tsvector('english', coalesce(${documents.title}, '')),'B') || + setweight(to_tsvector('english', coalesce(${documents.description}, '')),'C') || + setweight(to_tsvector('english', coalesce(${documents.url}, '')),'D') + ), plainto_tsquery('english', ${query}))`; + + const results = await db .select({ id: documents.id, uuid: documents.uuid, content: documents.content, + type: documents.type, + url: documents.url, + title: documents.title, createdAt: documents.createdAt, - chunkContent: chunk.textContent, - similarity: sql<number>`1 - (embeddings <=> ${JSON.stringify( - embeddings.data[0] - )}::vector)`, + updatedAt: documents.updatedAt, + userId: documents.userId, + description: documents.description, + ogImage: documents.ogImage, + similarity: vectorSimilarity, + textRank: textSearchRank, }) .from(chunk) .innerJoin(documents, eq(chunk.documentId, documents.id)) .where( and( eq(documents.userId, user.id), - sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) >= ${threshold}`, + sql`${vectorSimilarity} > ${threshold}`, ...(spaces && spaces.length > 0 ? [ exists( @@ -570,7 +603,11 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() ) ) .orderBy( - sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) desc` + desc(sql<number>`( + 0.6 * ${vectorSimilarity} + + 0.25 * ${textSearchRank} + + 0.15 * (1.0 / (1.0 + extract(epoch from age(${documents.updatedAt})) / (90 * 24 * 60 * 60))) + )::float`) ) .limit(limit); diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts index 24a1ff3e..8efcfacc 100644 --- a/apps/backend/src/workflow/index.ts +++ b/apps/backend/src/workflow/index.ts @@ -24,7 +24,9 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { async run(event: WorkflowEvent<WorkflowParams>, step: WorkflowStep) { // Step 0: Check if user has reached memory limit await step.do("check memory limit", async () => { - const existingMemories = await database(this.env.HYPERDRIVE.connectionString) + const existingMemories = await database( + this.env.HYPERDRIVE.connectionString + ) .select() .from(documents) .where(eq(documents.userId, event.payload.userId)); @@ -33,7 +35,9 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { await database(this.env.HYPERDRIVE.connectionString) .delete(documents) .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("You have reached the maximum limit of 2000 memories"); + throw new NonRetryableError( + "You have reached the maximum limit of 2000 memories" + ); } }); @@ -142,12 +146,14 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { ); } + // Step 3: Generate embeddings + const { data: embeddings } = await this.env.AI.run( + "@cf/baai/bge-base-en-v1.5", + { + text: chunked, + } + ); - const {data: embeddings} = await this.env.AI.run("@cf/baai/bge-base-en-v1.5", { - text: chunked, - }); - - // Step 4: Prepare chunk data const chunkInsertData: ChunkInsert[] = await step.do( "prepare chunk data", @@ -160,8 +166,6 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { })) ); - console.log(chunkInsertData); - // Step 5: Insert chunks if (chunkInsertData.length > 0) { await step.do("insert chunks", async () => |