diff options
| -rw-r--r-- | apps/backend/drizzle.config.prod.ts | 10 | ||||
| -rw-r--r-- | apps/backend/drizzle.config.ts | 4 | ||||
| -rw-r--r-- | apps/backend/drizzle/0015_messy_karma.sql | 16 | ||||
| -rw-r--r-- | apps/backend/drizzle/meta/0015_snapshot.json | 1322 | ||||
| -rw-r--r-- | apps/backend/drizzle/meta/_journal.json | 7 | ||||
| -rw-r--r-- | apps/backend/package.json | 12 | ||||
| -rw-r--r-- | apps/backend/src/auth.ts | 2 | ||||
| -rw-r--r-- | apps/backend/src/index.tsx | 4 | ||||
| -rw-r--r-- | apps/backend/src/routes/actions.ts | 169 | ||||
| -rw-r--r-- | apps/backend/src/utils/extractor.ts | 4 | ||||
| -rw-r--r-- | apps/backend/src/workflow/index.ts | 334 | ||||
| -rw-r--r-- | apps/backend/test/routes/memories.test.ts | 247 | ||||
| -rw-r--r-- | apps/backend/test/setup.ts | 32 | ||||
| -rw-r--r-- | apps/backend/tsconfig.json | 3 | ||||
| -rw-r--r-- | apps/backend/vitest.config.ts | 11 | ||||
| -rw-r--r-- | apps/backend/wrangler.toml | 1 | ||||
| -rw-r--r-- | apps/web/app/components/memories/Integrations.tsx | 102 | ||||
| -rw-r--r-- | apps/web/tsconfig.json | 2 | ||||
| -rw-r--r-- | package.json | 1 | ||||
| -rw-r--r-- | packages/db/schema.ts | 39 |
20 files changed, 2093 insertions, 229 deletions
diff --git a/apps/backend/drizzle.config.prod.ts b/apps/backend/drizzle.config.prod.ts deleted file mode 100644 index 4707bf25..00000000 --- a/apps/backend/drizzle.config.prod.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { defineConfig } from "drizzle-kit"; - -export default defineConfig({ - dialect: "postgresql", - schema: "../../packages/db", - out: "./drizzle", - dbCredentials: { - url: process.env.PROD_DATABASE_URL!, - }, -}); diff --git a/apps/backend/drizzle.config.ts b/apps/backend/drizzle.config.ts index 3f198a9f..0636e53f 100644 --- a/apps/backend/drizzle.config.ts +++ b/apps/backend/drizzle.config.ts @@ -6,6 +6,8 @@ config(); if (process.env.NODE_ENV !== "production" && !process.env.DATABASE_URL) { throw new Error("DATABASE_URL is not set"); +} else if (process.env.NODE_ENV === "production" && !process.env.PROD_DATABASE_URL) { + throw new Error("PROD_DATABASE_URL is not set"); } export default defineConfig({ @@ -13,6 +15,6 @@ export default defineConfig({ schema: "../../packages/db", out: "./drizzle", dbCredentials: { - url: process.env.DATABASE_URL!, + url: process.env.NODE_ENV === "production" ? process.env.PROD_DATABASE_URL! : process.env.DATABASE_URL!, }, }); diff --git a/apps/backend/drizzle/0015_messy_karma.sql b/apps/backend/drizzle/0015_messy_karma.sql new file mode 100644 index 00000000..23c763f5 --- /dev/null +++ b/apps/backend/drizzle/0015_messy_karma.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS "processed_content" ( + "id" serial PRIMARY KEY NOT NULL, + "content_hash" text NOT NULL, + "content" text NOT NULL, + "is_successfully_processed" boolean DEFAULT false, + "error_message" text, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL, + CONSTRAINT "processed_content_content_hash_unique" UNIQUE("content_hash") +); +--> statement-breakpoint +ALTER TABLE "chunks" ALTER COLUMN "text_content" SET NOT NULL;--> statement-breakpoint +ALTER TABLE "chunks" ADD COLUMN "content_hash" text;--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "processed_content_hash_idx" ON "processed_content" USING btree ("content_hash");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "chunk_content_hash_idx" ON "chunks" USING btree ("content_hash");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "document_content_hash_idx" ON "documents" USING btree ("content_hash");
\ No newline at end of file diff --git a/apps/backend/drizzle/meta/0015_snapshot.json b/apps/backend/drizzle/meta/0015_snapshot.json new file mode 100644 index 00000000..ab108e86 --- /dev/null +++ b/apps/backend/drizzle/meta/0015_snapshot.json @@ -0,0 +1,1322 @@ +{ + "id": "eee9f91b-d7a4-4dab-8ce9-30dde703e6f7", + "prevId": "f5cef50a-50ec-4b49-99c2-4da3a0f6a098", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.chat_threads": { + "name": "chat_threads", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "firstMessage": { + "name": "firstMessage", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "messages": { + "name": "messages", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "chat_threads_user_idx": { + "name": "chat_threads_user_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "chat_threads_user_id_users_id_fk": { + "name": "chat_threads_user_id_users_id_fk", + "tableFrom": "chat_threads", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "chat_threads_uuid_unique": { + "name": "chat_threads_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + } + } + }, + "public.chunks": { + "name": "chunks", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "document_id": { + "name": "document_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "text_content": { + "name": "text_content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content_hash": { + "name": "content_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "order_in_document": { + "name": "order_in_document", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "embeddings": { + "name": "embeddings", + "type": "vector(1536)", + "primaryKey": false, + "notNull": false + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "chunk_id_idx": { + "name": "chunk_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "chunk_document_id_idx": { + "name": "chunk_document_id_idx", + "columns": [ + { + "expression": "document_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "chunk_content_hash_idx": { + "name": "chunk_content_hash_idx", + "columns": [ + { + "expression": "content_hash", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "embeddingIndex": { + "name": "embeddingIndex", + "columns": [ + { + "expression": "embeddings", + "isExpression": false, + "asc": true, + "nulls": "last", + "opclass": "vector_cosine_ops" + } + ], + "isUnique": false, + "concurrently": false, + "method": "hnsw", + "with": {} + } + }, + "foreignKeys": { + "chunks_document_id_documents_id_fk": { + "name": "chunks_document_id_documents_id_fk", + "tableFrom": "chunks", + "tableTo": "documents", + "columnsFrom": [ + "document_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.content_to_space": { + "name": "content_to_space", + "schema": "", + "columns": { + "content_id": { + "name": "content_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "space_id": { + "name": "space_id", + "type": "integer", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "content_id_space_id_unique": { + "name": "content_id_space_id_unique", + "columns": [ + { + "expression": "content_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "space_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "content_to_space_content_id_documents_id_fk": { + "name": "content_to_space_content_id_documents_id_fk", + "tableFrom": "content_to_space", + "tableTo": "documents", + "columnsFrom": [ + "content_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "content_to_space_space_id_spaces_id_fk": { + "name": "content_to_space_space_id_spaces_id_fk", + "tableFrom": "content_to_space", + "tableTo": "spaces", + "columnsFrom": [ + "space_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.document_type": { + "name": "document_type", + "schema": "", + "columns": { + "type": { + "name": "type", + "type": "text", + "primaryKey": true, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.documents": { + "name": "documents", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "og_image": { + "name": "og_image", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "raw": { + "name": "raw", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "content_hash": { + "name": "content_hash", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_successfully_processed": { + "name": "is_successfully_processed", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "document_id_idx": { + "name": "document_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_uuid_idx": { + "name": "document_uuid_idx", + "columns": [ + { + "expression": "uuid", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_type_idx": { + "name": "document_type_idx", + "columns": [ + { + "expression": "type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_raw_user_idx": { + "name": "document_raw_user_idx", + "columns": [ + { + "expression": "raw", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "document_content_hash_idx": { + "name": "document_content_hash_idx", + "columns": [ + { + "expression": "content_hash", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "documents_type_document_type_type_fk": { + "name": "documents_type_document_type_type_fk", + "tableFrom": "documents", + "tableTo": "document_type", + "columnsFrom": [ + "type" + ], + "columnsTo": [ + "type" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "documents_user_id_users_id_fk": { + "name": "documents_user_id_users_id_fk", + "tableFrom": "documents", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "documents_uuid_unique": { + "name": "documents_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + } + } + }, + "public.job": { + "name": "job", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "attempts": { + "name": "attempts", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "lastAttemptAt": { + "name": "lastAttemptAt", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "user_id_url_idx": { + "name": "user_id_url_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "job_user_id_users_id_fk": { + "name": "job_user_id_users_id_fk", + "tableFrom": "job", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.processed_content": { + "name": "processed_content", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "content_hash": { + "name": "content_hash", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "is_successfully_processed": { + "name": "is_successfully_processed", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "processed_content_hash_idx": { + "name": "processed_content_hash_idx", + "columns": [ + { + "expression": "content_hash", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "processed_content_content_hash_unique": { + "name": "processed_content_content_hash_unique", + "nullsNotDistinct": false, + "columns": [ + "content_hash" + ] + } + } + }, + "public.saved_spaces": { + "name": "saved_spaces", + "schema": "", + "columns": { + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "space_id": { + "name": "space_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "saved_at": { + "name": "saved_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "saved_spaces_user_space_idx": { + "name": "saved_spaces_user_space_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "space_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "saved_spaces_user_id_users_id_fk": { + "name": "saved_spaces_user_id_users_id_fk", + "tableFrom": "saved_spaces", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "saved_spaces_space_id_spaces_id_fk": { + "name": "saved_spaces_space_id_spaces_id_fk", + "tableFrom": "saved_spaces", + "tableTo": "spaces", + "columnsFrom": [ + "space_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.space_access": { + "name": "space_access", + "schema": "", + "columns": { + "space_id": { + "name": "space_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "user_email": { + "name": "user_email", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "access_type": { + "name": "access_type", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'read'" + } + }, + "indexes": { + "space_id_user_email_idx": { + "name": "space_id_user_email_idx", + "columns": [ + { + "expression": "space_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "space_access_space_id_spaces_id_fk": { + "name": "space_access_space_id_spaces_id_fk", + "tableFrom": "space_access", + "tableTo": "spaces", + "columnsFrom": [ + "space_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "space_access_status_space_access_status_status_fk": { + "name": "space_access_status_space_access_status_status_fk", + "tableFrom": "space_access", + "tableTo": "space_access_status", + "columnsFrom": [ + "status" + ], + "columnsTo": [ + "status" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.space_access_status": { + "name": "space_access_status", + "schema": "", + "columns": { + "status": { + "name": "status", + "type": "text", + "primaryKey": true, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.space_members": { + "name": "space_members", + "schema": "", + "columns": { + "spaceId": { + "name": "spaceId", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "space_members_space_user_idx": { + "name": "space_members_space_user_idx", + "columns": [ + { + "expression": "spaceId", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "space_members_spaceId_users_id_fk": { + "name": "space_members_spaceId_users_id_fk", + "tableFrom": "space_members", + "tableTo": "users", + "columnsFrom": [ + "spaceId" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + }, + "space_members_user_id_users_id_fk": { + "name": "space_members_user_id_users_id_fk", + "tableFrom": "space_members", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.spaces": { + "name": "spaces", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "bigserial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "ownerId": { + "name": "ownerId", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_public": { + "name": "is_public", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + } + }, + "indexes": { + "spaces_id_idx": { + "name": "spaces_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "spaces_owner_id_idx": { + "name": "spaces_owner_id_idx", + "columns": [ + { + "expression": "ownerId", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "spaces_name_idx": { + "name": "spaces_name_idx", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "spaces_uuid_unique": { + "name": "spaces_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + } + } + }, + "public.users": { + "name": "users", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "uuid": { + "name": "uuid", + "type": "varchar(36)", + "primaryKey": false, + "notNull": true + }, + "email": { + "name": "email", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "first_name": { + "name": "first_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "last_name": { + "name": "last_name", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "email_verified": { + "name": "email_verified", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "profile_picture_url": { + "name": "profile_picture_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "telegram_id": { + "name": "telegram_id", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "has_onboarded": { + "name": "has_onboarded", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "last_api_key_generated_at": { + "name": "last_api_key_generated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "stripe_customer_id": { + "name": "stripe_customer_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "tier": { + "name": "tier", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "'free'" + } + }, + "indexes": { + "users_id_idx": { + "name": "users_id_idx", + "columns": [ + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_uuid_idx": { + "name": "users_uuid_idx", + "columns": [ + { + "expression": "uuid", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_email_idx": { + "name": "users_email_idx", + "columns": [ + { + "expression": "email", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_name_idx": { + "name": "users_name_idx", + "columns": [ + { + "expression": "first_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "last_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_created_at_idx": { + "name": "users_created_at_idx", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "users_telegram_id_idx": { + "name": "users_telegram_id_idx", + "columns": [ + { + "expression": "telegram_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "users_uuid_unique": { + "name": "users_uuid_unique", + "nullsNotDistinct": false, + "columns": [ + "uuid" + ] + }, + "users_email_unique": { + "name": "users_email_unique", + "nullsNotDistinct": false, + "columns": [ + "email" + ] + } + } + }, + "public.waitlist": { + "name": "waitlist", + "schema": "", + "columns": { + "email": { + "name": "email", + "type": "varchar(512)", + "primaryKey": true, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +}
\ No newline at end of file diff --git a/apps/backend/drizzle/meta/_journal.json b/apps/backend/drizzle/meta/_journal.json index 2ef7aaa9..a7927929 100644 --- a/apps/backend/drizzle/meta/_journal.json +++ b/apps/backend/drizzle/meta/_journal.json @@ -106,6 +106,13 @@ "when": 1736852938881, "tag": "0014_mighty_the_captain", "breakpoints": true + }, + { + "idx": 15, + "version": "7", + "when": 1737670732529, + "tag": "0015_messy_karma", + "breakpoints": true } ] }
\ No newline at end of file diff --git a/apps/backend/package.json b/apps/backend/package.json index 7b961b8d..81ba67c5 100644 --- a/apps/backend/package.json +++ b/apps/backend/package.json @@ -1,15 +1,19 @@ { - "name": "supermemory-backend", + "name": "@supermemory/backend", + "type": "module", "scripts": { "dev": "bunx wrangler -v && wrangler dev", "deploy": "bunx wrangler deploy --minify", "generate-migration": "dotenv -- npx drizzle-kit generate", "migrate:local": "bun run ./scripts/migrate.ts", - "tail": "bunx wrangler tail" + "tail": "bunx wrangler tail", + "test": "vitest", + "test:watch": "vitest watch" }, "dependencies": { "@ai-sdk/google": "^0.0.51", "@ai-sdk/openai": "^0.0.70", + "@cloudflare/vitest-pool-workers": "^0.6.6", "@hono/zod-validator": "^0.4.1", "@supermemory/db": "workspace:*", "ai": "4.0.16", @@ -21,10 +25,12 @@ "openai": "^4.68.4", "postgres": "^3.4.4", "uuid": "^11.0.1", + "vitest": "2.1.8", "zod": "^3.23.8" }, "devDependencies": { - "@cloudflare/workers-types": "^4.20240925.0" + "@cloudflare/workers-types": "^4.20240925.0", + "vitest-environment-miniflare": "^2.14.4" }, "overrides": { "iron-webcrypto": "^1.2.1" diff --git a/apps/backend/src/auth.ts b/apps/backend/src/auth.ts index b66bba39..206624d3 100644 --- a/apps/backend/src/auth.ts +++ b/apps/backend/src/auth.ts @@ -93,7 +93,6 @@ export const auth = async ( }; const session = await getSessionFromRequest(c.req.raw, context); - console.log("Session", session); c.set("session", session); if (session?.user?.id) { @@ -129,7 +128,6 @@ export const auth = async ( user = Array.isArray(user) ? user[0] : user; c.set("user", user); - console.log("User", user); } } } diff --git a/apps/backend/src/index.tsx b/apps/backend/src/index.tsx index cb63e53e..dea96448 100644 --- a/apps/backend/src/index.tsx +++ b/apps/backend/src/index.tsx @@ -48,6 +48,10 @@ export const app = new Hono<{ Variables: Variables; Bindings: Env }>() .use("/api/*", (c, next) => { const user = c.get("user"); + if (c.env.NODE_ENV === "development") { + return next(); + } + // RATELIMITS const rateLimitConfig = { // Endpoints that bypass rate limiting diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts index 4de1d339..28124347 100644 --- a/apps/backend/src/routes/actions.ts +++ b/apps/backend/src/routes/actions.ts @@ -717,40 +717,6 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() const db = database(c.env.HYPERDRIVE.connectionString); - // Calculate document hash early to enable faster duplicate detection - const content = body.prefetched?.contentToVectorize || body.content; - const encoder = new TextEncoder(); - const data = encoder.encode(content); - const hashBuffer = await crypto.subtle.digest("SHA-256", data); - const hashArray = Array.from(new Uint8Array(hashBuffer)); - const documentHash = hashArray - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - - // Check for duplicates using hash - const existingDocs = await db - .select() - .from(documents) - .where( - and( - eq(documents.userId, user.id), - or( - eq(documents.contentHash, documentHash), - and( - eq(documents.type, type.value), - or(eq(documents.url, body.content), eq(documents.raw, content)) - ) - ) - ) - ); - - if (existingDocs.length > 0) { - return c.json( - { error: `That ${type.value} already exists in your memories` }, - 409 - ); - } - // Check space permissions if spaces are specified if (body.spaces && body.spaces.length > 0) { const spacePermissions = await Promise.all( @@ -828,38 +794,90 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() ? body.content : `https://supermemory.ai/content/${contentId}`; - // Insert into documents table with hash + // Insert minimal document record try { - await db.insert(documents).values({ - uuid: contentId, - userId: user.id, - type: type.value, + // Check if document with same URL exists + console.log( + "[Add] Checking for existing document with URL:", + indexedUrl + ); + const existingDoc = await db + .select() + .from(documents) + .where( + and( + eq(documents.userId, user.id), + eq(documents.url, indexedUrl), + sql`${documents.url} IS NOT NULL` + ) + ) + .limit(1); + + let documentId = contentId; + + if (existingDoc.length > 0) { + console.log("[Add] Found existing document:", { + id: existingDoc[0].id, + uuid: existingDoc[0].uuid, + url: existingDoc[0].url, + }); + documentId = existingDoc[0].uuid; + // Update the raw content of existing document + console.log("[Add] Updating existing document content"); + await db + .update(documents) + .set({ + raw: + (body.prefetched ?? body.content) + + "\n\n" + + body.spaces?.join(" "), + updatedAt: new Date(), + }) + .where(eq(documents.id, existingDoc[0].id)); + console.log("[Add] Document updated successfully"); + } else { + console.log("[Add] No existing document found, creating new one"); + // Insert new document + await db.insert(documents).values({ + uuid: contentId, + userId: user.id, + type: type.value, + url: indexedUrl, + raw: + (body.prefetched ?? body.content) + + "\n\n" + + body.spaces?.join(" "), + }); + console.log("[Add] New document created successfully"); + } + + console.log("[Add] Starting workflow with params:", { + documentId, url: indexedUrl, - title: body.prefetched?.title, - description: body.prefetched?.description, - ogImage: body.prefetched?.ogImage, - contentHash: documentHash, - raw: - (body.prefetched ?? body.content) + "\n\n" + body.spaces?.join(" "), + isUpdate: existingDoc.length > 0, }); - + // Start the workflow which will handle everything else await c.env.CONTENT_WORKFLOW.create({ params: { userId: user.id, content: body.content, spaces: body.spaces, type: type.value, - uuid: contentId, + uuid: documentId, url: indexedUrl, prefetched: body.prefetched, }, - id: contentId, + id: documentId, }); return c.json({ - message: "Content added successfully", - id: contentId, + message: + existingDoc.length > 0 + ? "Content update started" + : "Content processing started", + id: documentId, type: type.value, + updated: existingDoc.length > 0, }); } catch (error) { console.error("[Add Content Error]", error); @@ -1054,6 +1072,57 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>() ? content : `https://supermemory.ai/content/${contentId}`; + // Check for existing document with same URL + const existingDoc = await db + .select() + .from(documents) + .where( + and( + eq(documents.userId, user.id), + eq(documents.url, url), + sql`${documents.url} IS NOT NULL` + ) + ) + .limit(1); + + if (existingDoc.length > 0) { + // Update existing document + await db + .update(documents) + .set({ + title, + contentHash: documentHash, + raw: content + "\n\n" + spaces?.join(" "), + updatedAt: new Date(), + }) + .where(eq(documents.id, existingDoc[0].id)); + + // Create workflow for updating + await c.env.CONTENT_WORKFLOW.create({ + params: { + userId: user.id, + content, + spaces, + type: type.value, + uuid: existingDoc[0].uuid, + url, + }, + id: existingDoc[0].uuid, + }); + + succeeded++; + sendMessage({ + progress: Math.round((processed / total) * 100), + status: "updated", + title: typeof item === "string" ? item : item.title, + processed, + total, + succeeded, + failed, + }); + continue; + } + // Insert into documents table await db.insert(documents).values({ uuid: contentId, diff --git a/apps/backend/src/utils/extractor.ts b/apps/backend/src/utils/extractor.ts index 9bf76181..8201cd28 100644 --- a/apps/backend/src/utils/extractor.ts +++ b/apps/backend/src/utils/extractor.ts @@ -2,7 +2,7 @@ import { Env } from "../types"; export const extractPageContent = async (content: string, env: Env) => { console.log("content", content); - const resp = await fetch(`https://md.dhr.wtf?url=${content}`); + const resp = await fetch(`https://md.dhr.wtf?url=${content}?nocache`); if (!resp.ok) { throw new Error( @@ -10,7 +10,7 @@ export const extractPageContent = async (content: string, env: Env) => { ); } - const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}`); + const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}?nocache`); if (!metadataResp.ok) { throw new Error( diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts index 41c73015..fe1f4143 100644 --- a/apps/backend/src/workflow/index.ts +++ b/apps/backend/src/workflow/index.ts @@ -6,17 +6,106 @@ import { import { Env, WorkflowParams } from "../types"; import { fetchContent } from "../utils/fetchers"; import chunkText from "../utils/chunkers"; -import { database, eq, inArray } from "@supermemory/db"; +import { database, eq, inArray, and, or, sql } from "@supermemory/db"; import { ChunkInsert, contentToSpace, documents, spaces, + chunk, + Document, } from "@supermemory/db/schema"; import { embedMany } from "ai"; import { openai } from "../providers"; -import { chunk } from "@supermemory/db/schema"; import { NonRetryableError } from "cloudflare:workflows"; +import { createHash } from "crypto"; + +// Helper function to generate content hash +const generateHash = (content: string) => { + return createHash("sha256").update(content).digest("hex"); +}; + +interface ChunkUpdate { + oldChunk?: typeof chunk.$inferSelect; + newContent?: string; + orderInDocument: number; + needsUpdate: boolean; +} + +// Helper function to determine which chunks need updates +const analyzeContentChanges = async ( + oldContent: string, + newContent: string, + existingChunks: (typeof chunk.$inferSelect)[], + chunkSize: number = 768 +): Promise<ChunkUpdate[]> => { + // First, chunk the new content with size limits + const newChunks = chunkText(newContent, chunkSize); + const updates: ChunkUpdate[] = []; + + // Map existing chunks for quick lookup + const existingChunksMap = new Map( + existingChunks.map((c) => [c.orderInDocument, c]) + ); + + // Track which old chunks have been processed + const processedOldChunks = new Set<number>(); + + // Process new chunks and match with old ones + let currentOrder = 0; + for (const newChunkText of newChunks) { + const oldChunk = existingChunksMap.get(currentOrder); + const newChunkHash = generateHash(newChunkText); + + if (oldChunk) { + processedOldChunks.add(currentOrder); + } + + // If the new chunk is too large, we need to split it + if (newChunkText.length > chunkSize) { + // Re-chunk this specific piece to ensure it fits + const subChunks = chunkText(newChunkText, chunkSize); + + // Add each sub-chunk as a separate update + for (let i = 0; i < subChunks.length; i++) { + const subChunk = subChunks[i]; + const subChunkHash = generateHash(subChunk); + + updates.push({ + oldChunk: i === 0 ? oldChunk : undefined, // Only use the old chunk for the first sub-chunk + newContent: subChunk, + orderInDocument: currentOrder + i, + needsUpdate: true, // Always need to update since we split the chunk + }); + } + + currentOrder += subChunks.length; + } else { + // Normal case - chunk fits within size limit + updates.push({ + oldChunk, + newContent: newChunkText, + orderInDocument: currentOrder, + needsUpdate: !oldChunk || oldChunk.contentHash !== newChunkHash, + }); + currentOrder++; + } + } + + // Handle any remaining old chunks that weren't processed + for (const [order, oldChunk] of existingChunksMap) { + if (!processedOldChunks.has(order)) { + updates.push({ + oldChunk, + orderInDocument: order, + needsUpdate: true, // Mark for deletion since it wasn't used in new content + }); + } + } + + // Sort updates by order to ensure proper sequence + return updates.sort((a, b) => a.orderInDocument - b.orderInDocument); +}; // TODO: handle errors properly here. @@ -24,16 +113,17 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { async run(event: WorkflowEvent<WorkflowParams>, step: WorkflowStep) { // Step 0: Check if user has reached memory limit await step.do("check memory limit", async () => { - const existingMemories = await database(this.env.HYPERDRIVE.connectionString) + const existingMemories = await database( + this.env.HYPERDRIVE.connectionString + ) .select() .from(documents) .where(eq(documents.userId, event.payload.userId)); if (existingMemories.length >= 2000) { - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("You have reached the maximum limit of 2000 memories"); + throw new NonRetryableError( + "You have reached the maximum limit of 2000 memories" + ); } }); @@ -53,29 +143,59 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { throw new NonRetryableError("The content is too big (maximum 20 pages)"); } - const chunked = await step.do("chunk content", async () => - chunkText(rawContent.contentToVectorize, 768) - ); + // Generate content hash + const contentHash = generateHash(rawContent.contentToVectorize); + + // Step 2: Check for existing document by URL + const existingDocument = await step.do( + "check existing document", + async () => { + if (!event.payload.url) return null; - // Step 2: Create the document in the database. - const document = await step.do("create document", async () => { - try { - // First check if document exists - const existingDoc = await database(this.env.HYPERDRIVE.connectionString) + console.log( + "[Workflow] Checking for existing document with URL:", + event.payload.url + ); + const docs = await database(this.env.HYPERDRIVE.connectionString) .select() .from(documents) - .where(eq(documents.uuid, event.payload.uuid)) + .where( + and( + eq(documents.userId, event.payload.userId), + eq(documents.url, event.payload.url), + sql`${documents.url} IS NOT NULL` + ) + ) .limit(1); - return await database(this.env.HYPERDRIVE.connectionString) - .insert(documents) - .values({ - userId: event.payload.userId, - type: event.payload.type, - uuid: event.payload.uuid, - ...(event.payload.url && { url: event.payload.url }), + if (docs[0]) { + console.log("[Workflow] Found existing document:", { + id: docs[0].id, + uuid: docs[0].uuid, + url: docs[0].url, + }); + } else { + console.log("[Workflow] No existing document found for URL"); + } + + return docs[0] || null; + } + ); + + // Step 3: Update or create document + const document = await step.do("update or create document", async () => { + const db = database(this.env.HYPERDRIVE.connectionString); + + if (existingDocument) { + console.log("[Workflow] Updating existing document:", { + id: existingDocument.id, + uuid: existingDocument.uuid, + }); + // Update existing document + await db + .update(documents) + .set({ title: rawContent.title, - content: rawContent.contentToSave, description: "description" in rawContent ? (rawContent.description ?? "") @@ -85,62 +205,56 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { ? (rawContent.image ?? "") : (event.payload.prefetched?.ogImage ?? undefined), raw: rawContent.contentToVectorize, + content: rawContent.contentToSave, + contentHash, isSuccessfullyProcessed: false, updatedAt: new Date(), - ...(event.payload.createdAt && { - createdAt: new Date(event.payload.createdAt), - }), }) - .onConflictDoUpdate({ - target: documents.uuid, - set: { - title: rawContent.title, - content: rawContent.contentToSave, - description: - "description" in rawContent - ? (rawContent.description ?? "") - : (event.payload.prefetched?.description ?? undefined), - ogImage: - "image" in rawContent - ? (rawContent.image ?? "") - : (event.payload.prefetched?.ogImage ?? undefined), - raw: rawContent.contentToVectorize, - isSuccessfullyProcessed: false, - updatedAt: new Date(), - }, - }) - .returning(); - } catch (error) { - console.log("here's the error", error); - // Check if error is a unique constraint violation - if ( - error instanceof Error && - error.message.includes("document_url_user_id_idx") - ) { - // Document already exists for this user, stop workflow - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("Document already exists for this user"); - } - if ( - error instanceof Error && - error.message.includes("document_raw_user_idx") - ) { - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("The exact same document already exists"); - } - throw error; // Re-throw other errors + .where(eq(documents.id, existingDocument.id)); + console.log("[Workflow] Document updated successfully"); + + return [existingDocument]; } - }); - if (!document || document.length === 0) { - throw new Error( - "Failed to create/update document - no document returned" + console.log( + "[Workflow] Updating document with UUID:", + event.payload.uuid ); - } + // Create new document + const updated = await db + .update(documents) + .set({ + title: rawContent.title, + description: + "description" in rawContent + ? (rawContent.description ?? "") + : (event.payload.prefetched?.description ?? undefined), + ogImage: + "image" in rawContent + ? (rawContent.image ?? "") + : (event.payload.prefetched?.ogImage ?? undefined), + content: rawContent.contentToSave, + contentHash, + isSuccessfullyProcessed: false, + updatedAt: new Date(), + }) + .where(eq(documents.uuid, event.payload.uuid)) + .returning(); + console.log("[Workflow] Document update result:", { + updatedId: updated[0]?.id, + updatedUuid: updated[0]?.uuid, + }); + return updated; + }); + + // Step 4: Process content + console.log("[Workflow] Processing content for document:", { + id: document[0].id, + uuid: document[0].uuid, + }); + const chunked = await step.do("chunk content", async () => + chunkText(rawContent.contentToVectorize, 768) + ); const model = openai(this.env, this.env.OPEN_AI_API_KEY).embedding( "text-embedding-3-large", @@ -149,7 +263,7 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { } ); - // Step 3: Create chunks from the content. + // Create embeddings for chunks const embeddings = await step.do( "create embeddings", { @@ -167,52 +281,60 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { values: chunked, } ); - return embeddings; } ); - // Step 4: Prepare chunk data - const chunkInsertData: ChunkInsert[] = await step.do( - "prepare chunk data", - async () => - chunked.map((chunk, index) => ({ + // Step 5: Update chunks + await step.do("update chunks", async () => { + const db = database(this.env.HYPERDRIVE.connectionString); + + // Delete existing chunks if any + await db.delete(chunk).where(eq(chunk.documentId, document[0].id)); + + // Insert new chunks + const chunkInsertData: ChunkInsert[] = chunked.map( + (chunkText, index) => ({ documentId: document[0].id, - textContent: chunk, + textContent: chunkText, + contentHash: generateHash(chunkText), orderInDocument: index, embeddings: embeddings[index], - })) - ); - - console.log(chunkInsertData); + }) + ); - // Step 5: Insert chunks - if (chunkInsertData.length > 0) { - await step.do("insert chunks", async () => - database(this.env.HYPERDRIVE.connectionString).transaction( - async (trx) => { - await trx.insert(chunk).values(chunkInsertData); + if (chunkInsertData.length > 0) { + await db.transaction(async (trx) => { + for (const chunkData of chunkInsertData) { + await trx + .insert(chunk) + .values(chunkData) + .onConflictDoNothing({ target: chunk.contentHash }); } - ) - ); - } + }); + } + }); - // step 6: add content to spaces + // Step 6: Mark document as processed + await step.do("mark document as processed", async () => { + await database(this.env.HYPERDRIVE.connectionString) + .update(documents) + .set({ isSuccessfullyProcessed: true }) + .where(eq(documents.id, document[0].id)); + }); + + // Step 7: Add content to spaces if specified if (event.payload.spaces) { await step.do("add content to spaces", async () => { await database(this.env.HYPERDRIVE.connectionString).transaction( async (trx) => { - // First get the space IDs from the UUIDs const spaceIds = await trx .select({ id: spaces.id }) .from(spaces) .where(inArray(spaces.uuid, event.payload.spaces ?? [])); - if (spaceIds.length === 0) { - return; - } + if (spaceIds.length === 0) return; - // Then insert the content-space mappings using the actual space IDs await trx.insert(contentToSpace).values( spaceIds.map((space) => ({ contentId: document[0].id, @@ -223,15 +345,5 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { ); }); } - - // Step 7: Mark the document as successfully processed - await step.do("mark document as successfully processed", async () => { - await database(this.env.HYPERDRIVE.connectionString) - .update(documents) - .set({ - isSuccessfullyProcessed: true, - }) - .where(eq(documents.id, document[0].id)); - }); } } diff --git a/apps/backend/test/routes/memories.test.ts b/apps/backend/test/routes/memories.test.ts new file mode 100644 index 00000000..c7a8f21d --- /dev/null +++ b/apps/backend/test/routes/memories.test.ts @@ -0,0 +1,247 @@ +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import { app } from "../../src"; +import type { Document } from "@supermemory/db/schema"; + +import { testClient } from "hono/testing"; +import { env, SELF } from "cloudflare:test"; + +const API_KEY = + "sm_4y1M2QIpRtKJMMfXWCqAhD_NVXOqLqOzOOuIZ1qhL1Gj0BFWFPuRlng5TPvZ1OIu1Zn1G8_xuuz59M4o4l-sAkSieZgaW09COg"; + +interface MemoryResponse { + items: Array<Document>; + total: number; +} + +interface SuccessResponse { + success: boolean; +} + +describe("Memories Routes", () => { + const client = testClient(app, env); + + describe("GET /api/memories", () => { + it("should list memories with default pagination", async () => { + const res = await client.api.memories.$get( + { query: {} }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + + expect(res.status).toBe(200); + const data = (await res.json()) as MemoryResponse; + expect(data).toHaveProperty("items"); + expect(data).toHaveProperty("total"); + expect(Array.isArray(data.items)).toBe(true); + }); + + it("should respect pagination parameters", async () => { + const res = await client.api.memories.$get( + { + query: { + start: "1", + count: "5", + }, + }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + expect(res.status).toBe(200); + const data = (await res.json()) as MemoryResponse; + expect(data.items.length).toBeLessThanOrEqual(5); + }); + + it("should filter by space ID", async () => { + const res = await client.api.memories.$get( + { + query: { + spaceId: "test-space-uuid", + }, + }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + expect(res.status).toBe(200); + const data = (await res.json()) as MemoryResponse; + expect(data).toHaveProperty("items"); + expect(data).toHaveProperty("total"); + }); + + it("should handle unauthorized space access", async () => { + const res = await client.api.memories.$get( + { + query: { + spaceId: "private-space-uuid", + }, + }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + expect(res.status).toBe(401); + }); + + it("should reject requests without API key", async () => { + const res = await client.api.memories.$get( + { query: {} }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + expect(res.status).toBe(401); + }); + + it("should respect ETag caching", async () => { + // First request to get ETag + const res1 = await client.api.memories.$get( + { query: {} }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + const etag = res1.headers.get("ETag"); + expect(etag).toBeDefined(); + + // Second request with ETag + const res2 = await client.api.memories.$get( + { query: {} }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + "If-None-Match": etag!, + }, + } + ); + expect(res2.status).toBe(304); + }); + }); + + describe("GET /api/memories/:id", () => { + let testMemoryId: string; + + beforeAll(async () => { + const res = await client.api.memories.$get( + { query: {} }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + const data = (await res.json()) as MemoryResponse; + if (data.items.length > 0) { + testMemoryId = data.items[0].uuid; + } + }); + + it("should retrieve a specific memory", async () => { + if (!testMemoryId) { + console.warn("No test memory available, skipping test"); + return; + } + + const res = await client.api.memories[":id"].$get( + { + param: { + id: testMemoryId, + }, + }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + expect(res.status).toBe(200); + const memory = await res.json(); + expect(memory).toHaveProperty("uuid", testMemoryId); + }); + + it("should reject unauthorized access", async () => { + const res = await client.api.memories[":id"].$get( + { + param: { + id: testMemoryId, + }, + }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + expect(res.status).toBe(401); + }); + }); + + describe("DELETE /api/memories/:id", () => { + let memoryId: string; + + beforeAll(async () => { + const res = await client.api.memories.$get( + { query: {} }, + { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, + } + ); + const data = (await res.json()) as MemoryResponse; + if (data.items.length > 0) { + memoryId = data.items[0].uuid; + } + }); + + it("should delete a memory", async () => { + const res = await client.api.memories[":id"].$delete( + { + param: { id: memoryId }, + }, + { + headers: { Authorization: `Bearer ${API_KEY}` }, + } + ); + + expect(res.status).toBe(200); + expect(await res.json()).toEqual({ success: true }); + + // Verify deletion + const getRes = await client.api.memories[":id"].$get( + { + param: { id: memoryId }, + }, + { + headers: { Authorization: `Bearer ${API_KEY}` }, + } + ); + expect(getRes.status).toBe(404); + }); + + it("should reject unauthorized deletion", async () => { + const res = await client.api.memories[":id"].$delete( + { + param: { id: memoryId }, + }, + { + headers: { Authorization: `Bearer ${API_KEY}` }, + } + ); + expect(res.status).toBe(401); + }); + }); +}); diff --git a/apps/backend/test/setup.ts b/apps/backend/test/setup.ts new file mode 100644 index 00000000..62c993c7 --- /dev/null +++ b/apps/backend/test/setup.ts @@ -0,0 +1,32 @@ +import { Hono } from "hono"; +import type { Env } from "../src/types"; +import { describe, it, expect, beforeAll, afterAll } from "vitest"; + +// Helper to generate random test content +export function generateTestContent(length: number = 1000): string { + return Array(length) + .fill(0) + .map(() => Math.random().toString(36).charAt(2)) + .join(""); +} + +// Helper to measure response time +export async function measureResponseTime( + fn: () => Promise<Response> +): Promise<number> { + const start = performance.now(); + await fn(); + return performance.now() - start; +} + +// Common test expectations +export const expect200 = (response: Response) => + expect(response.status).toBe(200); +export const expect401 = (response: Response) => + expect(response.status).toBe(401); +export const expect403 = (response: Response) => + expect(response.status).toBe(403); +export const expect404 = (response: Response) => + expect(response.status).toBe(404); +export const expect429 = (response: Response) => + expect(response.status).toBe(429); diff --git a/apps/backend/tsconfig.json b/apps/backend/tsconfig.json index a45a87e9..54772d5f 100644 --- a/apps/backend/tsconfig.json +++ b/apps/backend/tsconfig.json @@ -8,7 +8,8 @@ "lib": ["ESNext"], "types": [ "@cloudflare/workers-types/experimental", - "@cloudflare/workers-types" + "@cloudflare/workers-types", + "@cloudflare/vitest-pool-workers" ], "jsx": "react-jsx", "jsxImportSource": "hono/jsx" diff --git a/apps/backend/vitest.config.ts b/apps/backend/vitest.config.ts new file mode 100644 index 00000000..17964ab8 --- /dev/null +++ b/apps/backend/vitest.config.ts @@ -0,0 +1,11 @@ +import { defineWorkersConfig } from "@cloudflare/vitest-pool-workers/config"; + +export default defineWorkersConfig({ + test: { + poolOptions: { + workers: { + wrangler: { configPath: "./wrangler.toml" }, + }, + } + }, +}); diff --git a/apps/backend/wrangler.toml b/apps/backend/wrangler.toml index 6b7d053d..e87b1b0a 100644 --- a/apps/backend/wrangler.toml +++ b/apps/backend/wrangler.toml @@ -19,7 +19,6 @@ name = "content-workflow-supermemory" binding = "CONTENT_WORKFLOW" class_name = "ContentWorkflow" - [[kv_namespaces]] binding= "MD_CACHE" id = "3186489f943d409a9b772d876a58a73e" diff --git a/apps/web/app/components/memories/Integrations.tsx b/apps/web/app/components/memories/Integrations.tsx index 9b01796b..2b9d7215 100644 --- a/apps/web/app/components/memories/Integrations.tsx +++ b/apps/web/app/components/memories/Integrations.tsx @@ -13,8 +13,6 @@ import { AlertCircle, BookIcon, CheckCircle, - Clipboard, - ClipboardCheckIcon, FileUpIcon, X, } from "lucide-react"; @@ -220,50 +218,66 @@ function Integrations() { </p> </div> - <div className="max-w-full bg-neutral-50 dark:bg-neutral-900 p-4 rounded-lg border border-neutral-200 dark:border-neutral-800 mb-8"> - <div className="flex items-center gap-2"> - <span className="text-sm font-medium text-neutral-600 dark:text-neutral-400"> - Your API Key - </span> - {apiKey ? ( - <button - onClick={() => { - navigator.clipboard.writeText(apiKey); - setCopied(true); - toast.success("API key copied to clipboard!"); - }} - className="flex-1 flex items-center gap-2 font-mono text-sm bg-white dark:bg-neutral-800 px-3 py-1.5 rounded group hover:bg-neutral-100 dark:hover:bg-neutral-700 transition-all overflow-hidden" + <div className="flex flex-col gap-2 mb-4"> + <div className="flex justify-end"> + <a + href="https://docs.supermemory.ai" + target="_blank" + rel="noopener noreferrer" + className="inline-flex items-center gap-1.5 px-2 py-1 text-xs font-medium text-neutral-600 dark:text-neutral-400 hover:text-neutral-900 dark:hover:text-neutral-200 transition-colors" > - <span className="flex-shrink-0 text-neutral-400 dark:text-neutral-500 group-hover:text-neutral-600 dark:group-hover:text-neutral-300 transition-all"> - {copied ? ( - <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> - <path - strokeLinecap="round" - strokeLinejoin="round" - strokeWidth={2} - d="M5 13l4 4L19 7" - /> - </svg> - ) : ( - <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> - <path - strokeLinecap="round" - strokeLinejoin="round" - strokeWidth={2} - d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z" - /> - </svg> - )} - </span> - <span className="blur-sm group-hover:blur-none transition-all truncate max-w-[500px]"> - {apiKey} - </span> - </button> - ) : ( - <div className="flex-1 text-sm text-neutral-600 dark:text-neutral-400">Loading...</div> - )} + <BookIcon className="w-3 h-3" /> + API Documentation + </a> + </div> + <div className="flex items-center gap-2"> + <span className="text-sm font-medium text-neutral-600 dark:text-neutral-400"> + Your API Key + </span> + {apiKey ? ( + <button + onClick={() => { + navigator.clipboard.writeText(apiKey); + setCopied(true); + toast.success("API key copied to clipboard!"); + }} + className="flex-1 flex items-center gap-2 font-mono text-sm bg-white dark:bg-neutral-800 px-3 py-1.5 rounded group hover:bg-neutral-100 dark:hover:bg-neutral-700 transition-all overflow-hidden" + > + <span className="flex-shrink-0 text-neutral-400 dark:text-neutral-500 group-hover:text-neutral-600 dark:group-hover:text-neutral-300 transition-all"> + {copied ? ( + <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> + <path + strokeLinecap="round" + strokeLinejoin="round" + strokeWidth={2} + d="M5 13l4 4L19 7" + /> + </svg> + ) : ( + <svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24"> + <path + strokeLinecap="round" + strokeLinejoin="round" + strokeWidth={2} + d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z" + /> + </svg> + )} + </span> + <div className="relative flex-1"> + <span className="absolute inset-0 flex items-center justify-center text-neutral-400 dark:text-neutral-500 opacity-0 group-hover:opacity-0 transition-opacity"> + Hover to reveal + </span> + <span className="blur-sm group-hover:blur-none transition-all truncate max-w-[500px]"> + {apiKey} + </span> + </div> + </button> + ) : ( + <div className="flex-1 text-sm text-neutral-600 dark:text-neutral-400">Loading...</div> + )} + </div> </div> - </div> <div className="flex flex-wrap gap-4 overflow-x-auto"> <Card diff --git a/apps/web/tsconfig.json b/apps/web/tsconfig.json index 47889f62..80362c5c 100644 --- a/apps/web/tsconfig.json +++ b/apps/web/tsconfig.json @@ -10,7 +10,7 @@ ], "compilerOptions": { "lib": ["DOM", "DOM.Iterable", "ES2022"], - "types": ["@remix-run/cloudflare", "vite/client", "@cloudflare/workers-types/2023-07-01", "chrome"], + "types": ["@remix-run/cloudflare", "vite/client", "@cloudflare/workers-types/2023-07-01", "chrome" ], "isolatedModules": true, "esModuleInterop": true, "jsx": "react-jsx", diff --git a/package.json b/package.json index 5031ce4d..53663ca4 100644 --- a/package.json +++ b/package.json @@ -78,6 +78,7 @@ "resend": "^4.0.1", "shiki": "^1.22.1", "sonner": "^1.7.0", + "vitest": "^3.0.4", "web": "^0.0.2", "wrangler": "latest" }, diff --git a/packages/db/schema.ts b/packages/db/schema.ts index a2f24b06..8ffa8992 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -146,6 +146,30 @@ export const documentType = pgTable("document_type", { type: text("type").primaryKey(), }); +export const processedContent = pgTable( + "processed_content", + { + id: serial("id").primaryKey(), + contentHash: text("content_hash").notNull().unique(), + content: text("content").notNull(), + isSuccessfullyProcessed: boolean("is_successfully_processed").default( + false + ), + errorMessage: text("error_message"), + createdAt: timestamp("created_at", { withTimezone: true }) + .notNull() + .defaultNow(), + updatedAt: timestamp("updated_at", { withTimezone: true }) + .notNull() + .defaultNow(), + }, + (table) => ({ + contentHashIdx: uniqueIndex("processed_content_hash_idx").on( + table.contentHash + ), + }) +); + export const documents = pgTable( "documents", { @@ -167,11 +191,11 @@ export const documents = pgTable( .notNull() .references(() => users.id, { onDelete: "cascade" }), content: text("content"), + contentHash: text("content_hash"), isSuccessfullyProcessed: boolean("is_successfully_processed").default( false ), errorMessage: text("error_message"), - contentHash: text("content_hash"), }, (document) => ({ documentsIdIdx: uniqueIndex("document_id_idx").on(document.id), @@ -181,6 +205,13 @@ export const documents = pgTable( document.raw, document.userId ), + documentContentHashIdx: index("document_content_hash_idx").on( + document.contentHash + ), + documentUrlUserIdx: uniqueIndex("document_url_user_idx").on( + document.url, + document.userId + ), }) ); @@ -213,7 +244,8 @@ export const chunk = pgTable( documentId: integer("document_id") .references(() => documents.id, { onDelete: "cascade" }) .notNull(), - textContent: text("text_content"), + textContent: text("text_content").notNull(), + contentHash: text("content_hash"), orderInDocument: integer("order_in_document").notNull(), embeddings: vector("embeddings", { dimensions: 1536 }), metadata: jsonb("metadata").$type<Metadata>(), @@ -222,11 +254,12 @@ export const chunk = pgTable( .defaultNow(), updated_at: timestamp("updated_at", { withTimezone: true }) .notNull() - .defaultNow(), // handle deletion on application layer + .defaultNow(), }, (chunk) => ({ chunkIdIdx: uniqueIndex("chunk_id_idx").on(chunk.id), chunkDocumentIdIdx: index("chunk_document_id_idx").on(chunk.documentId), + chunkContentHashIdx: index("chunk_content_hash_idx").on(chunk.contentHash), embeddingIndex: index("embeddingIndex").using( "hnsw", chunk.embeddings.op("vector_cosine_ops") |