aboutsummaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorDhravya Shah <[email protected]>2025-02-18 21:20:52 -0700
committerGitHub <[email protected]>2025-02-18 21:20:52 -0700
commit0c6db45d32661921445c8e7e9f070da36cc8b2b7 (patch)
tree4e00c1a9aef015f80e00542537a7c66f98740812 /apps
parentbetter space selector (diff)
parentimplemented proper hybrid search with date relevancy into consideration (diff)
downloadsupermemory-0c6db45d32661921445c8e7e9f070da36cc8b2b7.tar.xz
supermemory-0c6db45d32661921445c8e7e9f070da36cc8b2b7.zip
Merge pull request #334 from supermemoryai/hybrid-rag
Hybrid rag
Diffstat (limited to 'apps')
-rw-r--r--apps/backend/drizzle/0016_good_deathbird.sql7
-rw-r--r--apps/backend/drizzle/meta/0016_snapshot.json1222
-rw-r--r--apps/backend/drizzle/meta/_journal.json7
-rw-r--r--apps/backend/src/routes/actions.ts67
-rw-r--r--apps/backend/src/workflow/index.ts22
5 files changed, 1301 insertions, 24 deletions
diff --git a/apps/backend/drizzle/0016_good_deathbird.sql b/apps/backend/drizzle/0016_good_deathbird.sql
new file mode 100644
index 00000000..7c6505de
--- /dev/null
+++ b/apps/backend/drizzle/0016_good_deathbird.sql
@@ -0,0 +1,7 @@
+ALTER TABLE "chunks" ALTER COLUMN "embeddings" SET DATA TYPE vector(768);--> statement-breakpoint
+CREATE INDEX IF NOT EXISTS "documents_search_idx" ON "documents" USING gin ((
+ setweight(to_tsvector('english', coalesce("content", '')),'A') ||
+ setweight(to_tsvector('english', coalesce("title", '')),'B') ||
+ setweight(to_tsvector('english', coalesce("description", '')),'C') ||
+ setweight(to_tsvector('english', coalesce("url", '')),'D')
+ )); \ No newline at end of file
diff --git a/apps/backend/drizzle/meta/0016_snapshot.json b/apps/backend/drizzle/meta/0016_snapshot.json
new file mode 100644
index 00000000..2020c691
--- /dev/null
+++ b/apps/backend/drizzle/meta/0016_snapshot.json
@@ -0,0 +1,1222 @@
+{
+ "id": "23a39e70-a9c2-44cd-a3fb-22b19efef79e",
+ "prevId": "8529db1b-2d33-49e0-a413-f517eae7e4e4",
+ "version": "7",
+ "dialect": "postgresql",
+ "tables": {
+ "public.chat_threads": {
+ "name": "chat_threads",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "bigserial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "firstMessage": {
+ "name": "firstMessage",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "messages": {
+ "name": "messages",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "chat_threads_user_idx": {
+ "name": "chat_threads_user_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "chat_threads_user_id_users_id_fk": {
+ "name": "chat_threads_user_id_users_id_fk",
+ "tableFrom": "chat_threads",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "chat_threads_uuid_unique": {
+ "name": "chat_threads_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ }
+ }
+ },
+ "public.chunks": {
+ "name": "chunks",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "document_id": {
+ "name": "document_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "text_content": {
+ "name": "text_content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "order_in_document": {
+ "name": "order_in_document",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "embeddings": {
+ "name": "embeddings",
+ "type": "vector(768)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "metadata": {
+ "name": "metadata",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "chunk_id_idx": {
+ "name": "chunk_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "chunk_document_id_idx": {
+ "name": "chunk_document_id_idx",
+ "columns": [
+ {
+ "expression": "document_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "embeddingIndex": {
+ "name": "embeddingIndex",
+ "columns": [
+ {
+ "expression": "embeddings",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last",
+ "opclass": "vector_cosine_ops"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "hnsw",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "chunks_document_id_documents_id_fk": {
+ "name": "chunks_document_id_documents_id_fk",
+ "tableFrom": "chunks",
+ "tableTo": "documents",
+ "columnsFrom": [
+ "document_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.content_to_space": {
+ "name": "content_to_space",
+ "schema": "",
+ "columns": {
+ "content_id": {
+ "name": "content_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "space_id": {
+ "name": "space_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {
+ "content_id_space_id_unique": {
+ "name": "content_id_space_id_unique",
+ "columns": [
+ {
+ "expression": "content_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "space_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "content_to_space_content_id_documents_id_fk": {
+ "name": "content_to_space_content_id_documents_id_fk",
+ "tableFrom": "content_to_space",
+ "tableTo": "documents",
+ "columnsFrom": [
+ "content_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "content_to_space_space_id_spaces_id_fk": {
+ "name": "content_to_space_space_id_spaces_id_fk",
+ "tableFrom": "content_to_space",
+ "tableTo": "spaces",
+ "columnsFrom": [
+ "space_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.document_type": {
+ "name": "document_type",
+ "schema": "",
+ "columns": {
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.documents": {
+ "name": "documents",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "bigserial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "og_image": {
+ "name": "og_image",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "raw": {
+ "name": "raw",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "content": {
+ "name": "content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "is_successfully_processed": {
+ "name": "is_successfully_processed",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": false,
+ "default": false
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "content_hash": {
+ "name": "content_hash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "document_id_idx": {
+ "name": "document_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_uuid_idx": {
+ "name": "document_uuid_idx",
+ "columns": [
+ {
+ "expression": "uuid",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_type_idx": {
+ "name": "document_type_idx",
+ "columns": [
+ {
+ "expression": "type",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_raw_user_idx": {
+ "name": "document_raw_user_idx",
+ "columns": [
+ {
+ "expression": "raw",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "documents_search_idx": {
+ "name": "documents_search_idx",
+ "columns": [
+ {
+ "expression": "(\n setweight(to_tsvector('english', coalesce(\"content\", '')),'A') ||\n setweight(to_tsvector('english', coalesce(\"title\", '')),'B') ||\n setweight(to_tsvector('english', coalesce(\"description\", '')),'C') ||\n setweight(to_tsvector('english', coalesce(\"url\", '')),'D')\n )",
+ "asc": true,
+ "isExpression": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "gin",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "documents_type_document_type_type_fk": {
+ "name": "documents_type_document_type_type_fk",
+ "tableFrom": "documents",
+ "tableTo": "document_type",
+ "columnsFrom": [
+ "type"
+ ],
+ "columnsTo": [
+ "type"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "documents_user_id_users_id_fk": {
+ "name": "documents_user_id_users_id_fk",
+ "tableFrom": "documents",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "documents_uuid_unique": {
+ "name": "documents_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ }
+ }
+ },
+ "public.job": {
+ "name": "job",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "status": {
+ "name": "status",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "attempts": {
+ "name": "attempts",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "lastAttemptAt": {
+ "name": "lastAttemptAt",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "error": {
+ "name": "error",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "user_id_url_idx": {
+ "name": "user_id_url_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "url",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "job_user_id_users_id_fk": {
+ "name": "job_user_id_users_id_fk",
+ "tableFrom": "job",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.saved_spaces": {
+ "name": "saved_spaces",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "space_id": {
+ "name": "space_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "saved_at": {
+ "name": "saved_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "saved_spaces_user_space_idx": {
+ "name": "saved_spaces_user_space_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "space_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "saved_spaces_user_id_users_id_fk": {
+ "name": "saved_spaces_user_id_users_id_fk",
+ "tableFrom": "saved_spaces",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "saved_spaces_space_id_spaces_id_fk": {
+ "name": "saved_spaces_space_id_spaces_id_fk",
+ "tableFrom": "saved_spaces",
+ "tableTo": "spaces",
+ "columnsFrom": [
+ "space_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.space_access": {
+ "name": "space_access",
+ "schema": "",
+ "columns": {
+ "space_id": {
+ "name": "space_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_email": {
+ "name": "user_email",
+ "type": "varchar(512)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "status": {
+ "name": "status",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "access_type": {
+ "name": "access_type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'read'"
+ }
+ },
+ "indexes": {
+ "space_id_user_email_idx": {
+ "name": "space_id_user_email_idx",
+ "columns": [
+ {
+ "expression": "space_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_email",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "space_access_space_id_spaces_id_fk": {
+ "name": "space_access_space_id_spaces_id_fk",
+ "tableFrom": "space_access",
+ "tableTo": "spaces",
+ "columnsFrom": [
+ "space_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "space_access_status_space_access_status_status_fk": {
+ "name": "space_access_status_space_access_status_status_fk",
+ "tableFrom": "space_access",
+ "tableTo": "space_access_status",
+ "columnsFrom": [
+ "status"
+ ],
+ "columnsTo": [
+ "status"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.space_access_status": {
+ "name": "space_access_status",
+ "schema": "",
+ "columns": {
+ "status": {
+ "name": "status",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.space_members": {
+ "name": "space_members",
+ "schema": "",
+ "columns": {
+ "spaceId": {
+ "name": "spaceId",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {
+ "space_members_space_user_idx": {
+ "name": "space_members_space_user_idx",
+ "columns": [
+ {
+ "expression": "spaceId",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "space_members_spaceId_users_id_fk": {
+ "name": "space_members_spaceId_users_id_fk",
+ "tableFrom": "space_members",
+ "tableTo": "users",
+ "columnsFrom": [
+ "spaceId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "restrict",
+ "onUpdate": "no action"
+ },
+ "space_members_user_id_users_id_fk": {
+ "name": "space_members_user_id_users_id_fk",
+ "tableFrom": "space_members",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "restrict",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.spaces": {
+ "name": "spaces",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "bigserial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "ownerId": {
+ "name": "ownerId",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "is_public": {
+ "name": "is_public",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ }
+ },
+ "indexes": {
+ "spaces_id_idx": {
+ "name": "spaces_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "spaces_owner_id_idx": {
+ "name": "spaces_owner_id_idx",
+ "columns": [
+ {
+ "expression": "ownerId",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "spaces_name_idx": {
+ "name": "spaces_name_idx",
+ "columns": [
+ {
+ "expression": "name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "spaces_uuid_unique": {
+ "name": "spaces_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ }
+ }
+ },
+ "public.users": {
+ "name": "users",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "first_name": {
+ "name": "first_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "last_name": {
+ "name": "last_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "email_verified": {
+ "name": "email_verified",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "profile_picture_url": {
+ "name": "profile_picture_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "telegram_id": {
+ "name": "telegram_id",
+ "type": "varchar(255)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "has_onboarded": {
+ "name": "has_onboarded",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "last_api_key_generated_at": {
+ "name": "last_api_key_generated_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "now()"
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "tier": {
+ "name": "tier",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'free'"
+ }
+ },
+ "indexes": {
+ "users_id_idx": {
+ "name": "users_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_uuid_idx": {
+ "name": "users_uuid_idx",
+ "columns": [
+ {
+ "expression": "uuid",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_email_idx": {
+ "name": "users_email_idx",
+ "columns": [
+ {
+ "expression": "email",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_name_idx": {
+ "name": "users_name_idx",
+ "columns": [
+ {
+ "expression": "first_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "last_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_created_at_idx": {
+ "name": "users_created_at_idx",
+ "columns": [
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_telegram_id_idx": {
+ "name": "users_telegram_id_idx",
+ "columns": [
+ {
+ "expression": "telegram_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "users_uuid_unique": {
+ "name": "users_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ },
+ "users_email_unique": {
+ "name": "users_email_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "email"
+ ]
+ }
+ }
+ },
+ "public.waitlist": {
+ "name": "waitlist",
+ "schema": "",
+ "columns": {
+ "email": {
+ "name": "email",
+ "type": "varchar(512)",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ }
+ },
+ "enums": {},
+ "schemas": {},
+ "sequences": {},
+ "_meta": {
+ "columns": {},
+ "schemas": {},
+ "tables": {}
+ }
+} \ No newline at end of file
diff --git a/apps/backend/drizzle/meta/_journal.json b/apps/backend/drizzle/meta/_journal.json
index c8cea61a..7f777651 100644
--- a/apps/backend/drizzle/meta/_journal.json
+++ b/apps/backend/drizzle/meta/_journal.json
@@ -113,6 +113,13 @@
"when": 1737920848112,
"tag": "0015_perpetual_mauler",
"breakpoints": true
+ },
+ {
+ "idx": 16,
+ "version": "7",
+ "when": 1739937938319,
+ "tag": "0016_good_deathbird",
+ "breakpoints": true
}
]
} \ No newline at end of file
diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts
index c0801ada..0bc26052 100644
--- a/apps/backend/src/routes/actions.ts
+++ b/apps/backend/src/routes/actions.ts
@@ -88,7 +88,8 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
apiKey: c.env.BRAINTRUST_API_KEY,
});
- const googleClient = wrapAISDKModel(openai(c.env).chat("gpt-4o-mini-2024-07-18"));
+ const googleClient = wrapAISDKModel(
+ openai(c.env).chat("gpt-4o-mini-2024-07-18"));
// Get last user message and generate embedding in parallel with thread creation
let lastUserMessage = coreMessages.findLast((i) => i.role === "user");
@@ -123,9 +124,15 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
return c.json({ error: "Failed to generate embedding" }, 500);
}
- // Perform semantic search
- const similarity = sql<number>`1 - (${cosineDistance(chunk.embeddings, embedding[0])})`;
-
+ // Pre-compute the vector similarity expression to avoid multiple calculations
+ const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embedding[0])}::vector)`;
+ const textSearchRank = sql<number>`ts_rank_cd((
+ setweight(to_tsvector('english', coalesce(${documents.content}, '')),'A') ||
+ setweight(to_tsvector('english', coalesce(${documents.title}, '')),'B') ||
+ setweight(to_tsvector('english', coalesce(${documents.description}, '')),'C') ||
+ setweight(to_tsvector('english', coalesce(${documents.url}, '')),'D')
+ ), plainto_tsquery('english', ${queryText}))`;
+
const finalResults = await db
.select({
id: documents.id,
@@ -138,12 +145,25 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
userId: documents.userId,
description: documents.description,
ogImage: documents.ogImage,
+ similarity: vectorSimilarity,
+ textRank: textSearchRank,
})
.from(chunk)
.innerJoin(documents, eq(chunk.documentId, documents.id))
- .where(and(eq(documents.userId, user.id), sql`${similarity} > 0.4`))
- .orderBy(desc(similarity))
- .limit(5);
+ .where(
+ and(
+ eq(documents.userId, user.id),
+ sql`${vectorSimilarity} > 0.5`
+ )
+ )
+ .orderBy(
+ desc(sql<number>`(
+ 0.6 * ${vectorSimilarity} +
+ 0.25 * ${textSearchRank} +
+ 0.15 * (1.0 / (1.0 + extract(epoch from age(${documents.updatedAt})) / (90 * 24 * 60 * 60)))
+ )::float`)
+ )
+ .limit(15);
const cleanDocumentsForContext = finalResults.map((d) => ({
title: d.title,
@@ -531,24 +551,37 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
);
}
- // Perform semantic search using cosine similarity
- const results = await database(c.env.HYPERDRIVE.connectionString)
+ // Pre-compute the vector similarity expression to avoid multiple calculations
+ const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector)`;
+ const textSearchRank = sql<number>`ts_rank_cd((
+ setweight(to_tsvector('english', coalesce(${documents.content}, '')),'A') ||
+ setweight(to_tsvector('english', coalesce(${documents.title}, '')),'B') ||
+ setweight(to_tsvector('english', coalesce(${documents.description}, '')),'C') ||
+ setweight(to_tsvector('english', coalesce(${documents.url}, '')),'D')
+ ), plainto_tsquery('english', ${query}))`;
+
+ const results = await db
.select({
id: documents.id,
uuid: documents.uuid,
content: documents.content,
+ type: documents.type,
+ url: documents.url,
+ title: documents.title,
createdAt: documents.createdAt,
- chunkContent: chunk.textContent,
- similarity: sql<number>`1 - (embeddings <=> ${JSON.stringify(
- embeddings.data[0]
- )}::vector)`,
+ updatedAt: documents.updatedAt,
+ userId: documents.userId,
+ description: documents.description,
+ ogImage: documents.ogImage,
+ similarity: vectorSimilarity,
+ textRank: textSearchRank,
})
.from(chunk)
.innerJoin(documents, eq(chunk.documentId, documents.id))
.where(
and(
eq(documents.userId, user.id),
- sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) >= ${threshold}`,
+ sql`${vectorSimilarity} > ${threshold}`,
...(spaces && spaces.length > 0
? [
exists(
@@ -570,7 +603,11 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
)
)
.orderBy(
- sql`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector) desc`
+ desc(sql<number>`(
+ 0.6 * ${vectorSimilarity} +
+ 0.25 * ${textSearchRank} +
+ 0.15 * (1.0 / (1.0 + extract(epoch from age(${documents.updatedAt})) / (90 * 24 * 60 * 60)))
+ )::float`)
)
.limit(limit);
diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts
index 24a1ff3e..8efcfacc 100644
--- a/apps/backend/src/workflow/index.ts
+++ b/apps/backend/src/workflow/index.ts
@@ -24,7 +24,9 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
async run(event: WorkflowEvent<WorkflowParams>, step: WorkflowStep) {
// Step 0: Check if user has reached memory limit
await step.do("check memory limit", async () => {
- const existingMemories = await database(this.env.HYPERDRIVE.connectionString)
+ const existingMemories = await database(
+ this.env.HYPERDRIVE.connectionString
+ )
.select()
.from(documents)
.where(eq(documents.userId, event.payload.userId));
@@ -33,7 +35,9 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
await database(this.env.HYPERDRIVE.connectionString)
.delete(documents)
.where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("You have reached the maximum limit of 2000 memories");
+ throw new NonRetryableError(
+ "You have reached the maximum limit of 2000 memories"
+ );
}
});
@@ -142,12 +146,14 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
);
}
+ // Step 3: Generate embeddings
+ const { data: embeddings } = await this.env.AI.run(
+ "@cf/baai/bge-base-en-v1.5",
+ {
+ text: chunked,
+ }
+ );
- const {data: embeddings} = await this.env.AI.run("@cf/baai/bge-base-en-v1.5", {
- text: chunked,
- });
-
-
// Step 4: Prepare chunk data
const chunkInsertData: ChunkInsert[] = await step.do(
"prepare chunk data",
@@ -160,8 +166,6 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
}))
);
- console.log(chunkInsertData);
-
// Step 5: Insert chunks
if (chunkInsertData.length > 0) {
await step.do("insert chunks", async () =>