aboutsummaryrefslogtreecommitdiff
path: root/apps/backend
diff options
context:
space:
mode:
Diffstat (limited to 'apps/backend')
-rw-r--r--apps/backend/drizzle.config.prod.ts10
-rw-r--r--apps/backend/drizzle.config.ts4
-rw-r--r--apps/backend/drizzle/0015_messy_karma.sql16
-rw-r--r--apps/backend/drizzle/meta/0015_snapshot.json1322
-rw-r--r--apps/backend/drizzle/meta/_journal.json7
-rw-r--r--apps/backend/package.json12
-rw-r--r--apps/backend/src/auth.ts2
-rw-r--r--apps/backend/src/index.tsx4
-rw-r--r--apps/backend/src/routes/actions.ts169
-rw-r--r--apps/backend/src/utils/extractor.ts4
-rw-r--r--apps/backend/src/workflow/index.ts334
-rw-r--r--apps/backend/test/routes/memories.test.ts247
-rw-r--r--apps/backend/test/setup.ts32
-rw-r--r--apps/backend/tsconfig.json3
-rw-r--r--apps/backend/vitest.config.ts11
-rw-r--r--apps/backend/wrangler.toml1
16 files changed, 1997 insertions, 181 deletions
diff --git a/apps/backend/drizzle.config.prod.ts b/apps/backend/drizzle.config.prod.ts
deleted file mode 100644
index 4707bf25..00000000
--- a/apps/backend/drizzle.config.prod.ts
+++ /dev/null
@@ -1,10 +0,0 @@
-import { defineConfig } from "drizzle-kit";
-
-export default defineConfig({
- dialect: "postgresql",
- schema: "../../packages/db",
- out: "./drizzle",
- dbCredentials: {
- url: process.env.PROD_DATABASE_URL!,
- },
-});
diff --git a/apps/backend/drizzle.config.ts b/apps/backend/drizzle.config.ts
index 3f198a9f..0636e53f 100644
--- a/apps/backend/drizzle.config.ts
+++ b/apps/backend/drizzle.config.ts
@@ -6,6 +6,8 @@ config();
if (process.env.NODE_ENV !== "production" && !process.env.DATABASE_URL) {
throw new Error("DATABASE_URL is not set");
+} else if (process.env.NODE_ENV === "production" && !process.env.PROD_DATABASE_URL) {
+ throw new Error("PROD_DATABASE_URL is not set");
}
export default defineConfig({
@@ -13,6 +15,6 @@ export default defineConfig({
schema: "../../packages/db",
out: "./drizzle",
dbCredentials: {
- url: process.env.DATABASE_URL!,
+ url: process.env.NODE_ENV === "production" ? process.env.PROD_DATABASE_URL! : process.env.DATABASE_URL!,
},
});
diff --git a/apps/backend/drizzle/0015_messy_karma.sql b/apps/backend/drizzle/0015_messy_karma.sql
new file mode 100644
index 00000000..23c763f5
--- /dev/null
+++ b/apps/backend/drizzle/0015_messy_karma.sql
@@ -0,0 +1,16 @@
+CREATE TABLE IF NOT EXISTS "processed_content" (
+ "id" serial PRIMARY KEY NOT NULL,
+ "content_hash" text NOT NULL,
+ "content" text NOT NULL,
+ "is_successfully_processed" boolean DEFAULT false,
+ "error_message" text,
+ "created_at" timestamp with time zone DEFAULT now() NOT NULL,
+ "updated_at" timestamp with time zone DEFAULT now() NOT NULL,
+ CONSTRAINT "processed_content_content_hash_unique" UNIQUE("content_hash")
+);
+--> statement-breakpoint
+ALTER TABLE "chunks" ALTER COLUMN "text_content" SET NOT NULL;--> statement-breakpoint
+ALTER TABLE "chunks" ADD COLUMN "content_hash" text;--> statement-breakpoint
+CREATE UNIQUE INDEX IF NOT EXISTS "processed_content_hash_idx" ON "processed_content" USING btree ("content_hash");--> statement-breakpoint
+CREATE INDEX IF NOT EXISTS "chunk_content_hash_idx" ON "chunks" USING btree ("content_hash");--> statement-breakpoint
+CREATE INDEX IF NOT EXISTS "document_content_hash_idx" ON "documents" USING btree ("content_hash"); \ No newline at end of file
diff --git a/apps/backend/drizzle/meta/0015_snapshot.json b/apps/backend/drizzle/meta/0015_snapshot.json
new file mode 100644
index 00000000..ab108e86
--- /dev/null
+++ b/apps/backend/drizzle/meta/0015_snapshot.json
@@ -0,0 +1,1322 @@
+{
+ "id": "eee9f91b-d7a4-4dab-8ce9-30dde703e6f7",
+ "prevId": "f5cef50a-50ec-4b49-99c2-4da3a0f6a098",
+ "version": "7",
+ "dialect": "postgresql",
+ "tables": {
+ "public.chat_threads": {
+ "name": "chat_threads",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "bigserial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "firstMessage": {
+ "name": "firstMessage",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "messages": {
+ "name": "messages",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "chat_threads_user_idx": {
+ "name": "chat_threads_user_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "chat_threads_user_id_users_id_fk": {
+ "name": "chat_threads_user_id_users_id_fk",
+ "tableFrom": "chat_threads",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "chat_threads_uuid_unique": {
+ "name": "chat_threads_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ }
+ }
+ },
+ "public.chunks": {
+ "name": "chunks",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "document_id": {
+ "name": "document_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "text_content": {
+ "name": "text_content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "content_hash": {
+ "name": "content_hash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "order_in_document": {
+ "name": "order_in_document",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "embeddings": {
+ "name": "embeddings",
+ "type": "vector(1536)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "metadata": {
+ "name": "metadata",
+ "type": "jsonb",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "chunk_id_idx": {
+ "name": "chunk_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "chunk_document_id_idx": {
+ "name": "chunk_document_id_idx",
+ "columns": [
+ {
+ "expression": "document_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "chunk_content_hash_idx": {
+ "name": "chunk_content_hash_idx",
+ "columns": [
+ {
+ "expression": "content_hash",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "embeddingIndex": {
+ "name": "embeddingIndex",
+ "columns": [
+ {
+ "expression": "embeddings",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last",
+ "opclass": "vector_cosine_ops"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "hnsw",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "chunks_document_id_documents_id_fk": {
+ "name": "chunks_document_id_documents_id_fk",
+ "tableFrom": "chunks",
+ "tableTo": "documents",
+ "columnsFrom": [
+ "document_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.content_to_space": {
+ "name": "content_to_space",
+ "schema": "",
+ "columns": {
+ "content_id": {
+ "name": "content_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "space_id": {
+ "name": "space_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {
+ "content_id_space_id_unique": {
+ "name": "content_id_space_id_unique",
+ "columns": [
+ {
+ "expression": "content_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "space_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "content_to_space_content_id_documents_id_fk": {
+ "name": "content_to_space_content_id_documents_id_fk",
+ "tableFrom": "content_to_space",
+ "tableTo": "documents",
+ "columnsFrom": [
+ "content_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "content_to_space_space_id_spaces_id_fk": {
+ "name": "content_to_space_space_id_spaces_id_fk",
+ "tableFrom": "content_to_space",
+ "tableTo": "spaces",
+ "columnsFrom": [
+ "space_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.document_type": {
+ "name": "document_type",
+ "schema": "",
+ "columns": {
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.documents": {
+ "name": "documents",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "bigserial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "type": {
+ "name": "type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "title": {
+ "name": "title",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "description": {
+ "name": "description",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "og_image": {
+ "name": "og_image",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "raw": {
+ "name": "raw",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "content": {
+ "name": "content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "content_hash": {
+ "name": "content_hash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "is_successfully_processed": {
+ "name": "is_successfully_processed",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": false,
+ "default": false
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "document_id_idx": {
+ "name": "document_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_uuid_idx": {
+ "name": "document_uuid_idx",
+ "columns": [
+ {
+ "expression": "uuid",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_type_idx": {
+ "name": "document_type_idx",
+ "columns": [
+ {
+ "expression": "type",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_raw_user_idx": {
+ "name": "document_raw_user_idx",
+ "columns": [
+ {
+ "expression": "raw",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "document_content_hash_idx": {
+ "name": "document_content_hash_idx",
+ "columns": [
+ {
+ "expression": "content_hash",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "documents_type_document_type_type_fk": {
+ "name": "documents_type_document_type_type_fk",
+ "tableFrom": "documents",
+ "tableTo": "document_type",
+ "columnsFrom": [
+ "type"
+ ],
+ "columnsTo": [
+ "type"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ },
+ "documents_user_id_users_id_fk": {
+ "name": "documents_user_id_users_id_fk",
+ "tableFrom": "documents",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "documents_uuid_unique": {
+ "name": "documents_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ }
+ }
+ },
+ "public.job": {
+ "name": "job",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "url": {
+ "name": "url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "status": {
+ "name": "status",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "attempts": {
+ "name": "attempts",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "lastAttemptAt": {
+ "name": "lastAttemptAt",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "error": {
+ "name": "error",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": false
+ }
+ },
+ "indexes": {
+ "user_id_url_idx": {
+ "name": "user_id_url_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "url",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "job_user_id_users_id_fk": {
+ "name": "job_user_id_users_id_fk",
+ "tableFrom": "job",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.processed_content": {
+ "name": "processed_content",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "content_hash": {
+ "name": "content_hash",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "content": {
+ "name": "content",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "is_successfully_processed": {
+ "name": "is_successfully_processed",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": false,
+ "default": false
+ },
+ "error_message": {
+ "name": "error_message",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "processed_content_hash_idx": {
+ "name": "processed_content_hash_idx",
+ "columns": [
+ {
+ "expression": "content_hash",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "processed_content_content_hash_unique": {
+ "name": "processed_content_content_hash_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "content_hash"
+ ]
+ }
+ }
+ },
+ "public.saved_spaces": {
+ "name": "saved_spaces",
+ "schema": "",
+ "columns": {
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "space_id": {
+ "name": "space_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "saved_at": {
+ "name": "saved_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {
+ "saved_spaces_user_space_idx": {
+ "name": "saved_spaces_user_space_idx",
+ "columns": [
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "space_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "saved_spaces_user_id_users_id_fk": {
+ "name": "saved_spaces_user_id_users_id_fk",
+ "tableFrom": "saved_spaces",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "saved_spaces_space_id_spaces_id_fk": {
+ "name": "saved_spaces_space_id_spaces_id_fk",
+ "tableFrom": "saved_spaces",
+ "tableTo": "spaces",
+ "columnsFrom": [
+ "space_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.space_access": {
+ "name": "space_access",
+ "schema": "",
+ "columns": {
+ "space_id": {
+ "name": "space_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "user_email": {
+ "name": "user_email",
+ "type": "varchar(512)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "status": {
+ "name": "status",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "access_type": {
+ "name": "access_type",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'read'"
+ }
+ },
+ "indexes": {
+ "space_id_user_email_idx": {
+ "name": "space_id_user_email_idx",
+ "columns": [
+ {
+ "expression": "space_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_email",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "space_access_space_id_spaces_id_fk": {
+ "name": "space_access_space_id_spaces_id_fk",
+ "tableFrom": "space_access",
+ "tableTo": "spaces",
+ "columnsFrom": [
+ "space_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "cascade",
+ "onUpdate": "no action"
+ },
+ "space_access_status_space_access_status_status_fk": {
+ "name": "space_access_status_space_access_status_status_fk",
+ "tableFrom": "space_access",
+ "tableTo": "space_access_status",
+ "columnsFrom": [
+ "status"
+ ],
+ "columnsTo": [
+ "status"
+ ],
+ "onDelete": "no action",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.space_access_status": {
+ "name": "space_access_status",
+ "schema": "",
+ "columns": {
+ "status": {
+ "name": "status",
+ "type": "text",
+ "primaryKey": true,
+ "notNull": true
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.space_members": {
+ "name": "space_members",
+ "schema": "",
+ "columns": {
+ "spaceId": {
+ "name": "spaceId",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "user_id": {
+ "name": "user_id",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ }
+ },
+ "indexes": {
+ "space_members_space_user_idx": {
+ "name": "space_members_space_user_idx",
+ "columns": [
+ {
+ "expression": "spaceId",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "user_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {
+ "space_members_spaceId_users_id_fk": {
+ "name": "space_members_spaceId_users_id_fk",
+ "tableFrom": "space_members",
+ "tableTo": "users",
+ "columnsFrom": [
+ "spaceId"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "restrict",
+ "onUpdate": "no action"
+ },
+ "space_members_user_id_users_id_fk": {
+ "name": "space_members_user_id_users_id_fk",
+ "tableFrom": "space_members",
+ "tableTo": "users",
+ "columnsFrom": [
+ "user_id"
+ ],
+ "columnsTo": [
+ "id"
+ ],
+ "onDelete": "restrict",
+ "onUpdate": "no action"
+ }
+ },
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ },
+ "public.spaces": {
+ "name": "spaces",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "bigserial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "name": {
+ "name": "name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "ownerId": {
+ "name": "ownerId",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "is_public": {
+ "name": "is_public",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ }
+ },
+ "indexes": {
+ "spaces_id_idx": {
+ "name": "spaces_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "spaces_owner_id_idx": {
+ "name": "spaces_owner_id_idx",
+ "columns": [
+ {
+ "expression": "ownerId",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "spaces_name_idx": {
+ "name": "spaces_name_idx",
+ "columns": [
+ {
+ "expression": "name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "spaces_uuid_unique": {
+ "name": "spaces_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ }
+ }
+ },
+ "public.users": {
+ "name": "users",
+ "schema": "",
+ "columns": {
+ "id": {
+ "name": "id",
+ "type": "serial",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "uuid": {
+ "name": "uuid",
+ "type": "varchar(36)",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "email": {
+ "name": "email",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true
+ },
+ "first_name": {
+ "name": "first_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "last_name": {
+ "name": "last_name",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "email_verified": {
+ "name": "email_verified",
+ "type": "boolean",
+ "primaryKey": false,
+ "notNull": true,
+ "default": false
+ },
+ "profile_picture_url": {
+ "name": "profile_picture_url",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "telegram_id": {
+ "name": "telegram_id",
+ "type": "varchar(255)",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "has_onboarded": {
+ "name": "has_onboarded",
+ "type": "integer",
+ "primaryKey": false,
+ "notNull": true,
+ "default": 0
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "updated_at": {
+ "name": "updated_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ },
+ "last_api_key_generated_at": {
+ "name": "last_api_key_generated_at",
+ "type": "timestamp",
+ "primaryKey": false,
+ "notNull": false,
+ "default": "now()"
+ },
+ "stripe_customer_id": {
+ "name": "stripe_customer_id",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": false
+ },
+ "tier": {
+ "name": "tier",
+ "type": "text",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "'free'"
+ }
+ },
+ "indexes": {
+ "users_id_idx": {
+ "name": "users_id_idx",
+ "columns": [
+ {
+ "expression": "id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_uuid_idx": {
+ "name": "users_uuid_idx",
+ "columns": [
+ {
+ "expression": "uuid",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_email_idx": {
+ "name": "users_email_idx",
+ "columns": [
+ {
+ "expression": "email",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_name_idx": {
+ "name": "users_name_idx",
+ "columns": [
+ {
+ "expression": "first_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ },
+ {
+ "expression": "last_name",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_created_at_idx": {
+ "name": "users_created_at_idx",
+ "columns": [
+ {
+ "expression": "created_at",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": false,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ },
+ "users_telegram_id_idx": {
+ "name": "users_telegram_id_idx",
+ "columns": [
+ {
+ "expression": "telegram_id",
+ "isExpression": false,
+ "asc": true,
+ "nulls": "last"
+ }
+ ],
+ "isUnique": true,
+ "concurrently": false,
+ "method": "btree",
+ "with": {}
+ }
+ },
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {
+ "users_uuid_unique": {
+ "name": "users_uuid_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "uuid"
+ ]
+ },
+ "users_email_unique": {
+ "name": "users_email_unique",
+ "nullsNotDistinct": false,
+ "columns": [
+ "email"
+ ]
+ }
+ }
+ },
+ "public.waitlist": {
+ "name": "waitlist",
+ "schema": "",
+ "columns": {
+ "email": {
+ "name": "email",
+ "type": "varchar(512)",
+ "primaryKey": true,
+ "notNull": true
+ },
+ "created_at": {
+ "name": "created_at",
+ "type": "timestamp with time zone",
+ "primaryKey": false,
+ "notNull": true,
+ "default": "now()"
+ }
+ },
+ "indexes": {},
+ "foreignKeys": {},
+ "compositePrimaryKeys": {},
+ "uniqueConstraints": {}
+ }
+ },
+ "enums": {},
+ "schemas": {},
+ "sequences": {},
+ "_meta": {
+ "columns": {},
+ "schemas": {},
+ "tables": {}
+ }
+} \ No newline at end of file
diff --git a/apps/backend/drizzle/meta/_journal.json b/apps/backend/drizzle/meta/_journal.json
index 2ef7aaa9..a7927929 100644
--- a/apps/backend/drizzle/meta/_journal.json
+++ b/apps/backend/drizzle/meta/_journal.json
@@ -106,6 +106,13 @@
"when": 1736852938881,
"tag": "0014_mighty_the_captain",
"breakpoints": true
+ },
+ {
+ "idx": 15,
+ "version": "7",
+ "when": 1737670732529,
+ "tag": "0015_messy_karma",
+ "breakpoints": true
}
]
} \ No newline at end of file
diff --git a/apps/backend/package.json b/apps/backend/package.json
index 7b961b8d..81ba67c5 100644
--- a/apps/backend/package.json
+++ b/apps/backend/package.json
@@ -1,15 +1,19 @@
{
- "name": "supermemory-backend",
+ "name": "@supermemory/backend",
+ "type": "module",
"scripts": {
"dev": "bunx wrangler -v && wrangler dev",
"deploy": "bunx wrangler deploy --minify",
"generate-migration": "dotenv -- npx drizzle-kit generate",
"migrate:local": "bun run ./scripts/migrate.ts",
- "tail": "bunx wrangler tail"
+ "tail": "bunx wrangler tail",
+ "test": "vitest",
+ "test:watch": "vitest watch"
},
"dependencies": {
"@ai-sdk/google": "^0.0.51",
"@ai-sdk/openai": "^0.0.70",
+ "@cloudflare/vitest-pool-workers": "^0.6.6",
"@hono/zod-validator": "^0.4.1",
"@supermemory/db": "workspace:*",
"ai": "4.0.16",
@@ -21,10 +25,12 @@
"openai": "^4.68.4",
"postgres": "^3.4.4",
"uuid": "^11.0.1",
+ "vitest": "2.1.8",
"zod": "^3.23.8"
},
"devDependencies": {
- "@cloudflare/workers-types": "^4.20240925.0"
+ "@cloudflare/workers-types": "^4.20240925.0",
+ "vitest-environment-miniflare": "^2.14.4"
},
"overrides": {
"iron-webcrypto": "^1.2.1"
diff --git a/apps/backend/src/auth.ts b/apps/backend/src/auth.ts
index b66bba39..206624d3 100644
--- a/apps/backend/src/auth.ts
+++ b/apps/backend/src/auth.ts
@@ -93,7 +93,6 @@ export const auth = async (
};
const session = await getSessionFromRequest(c.req.raw, context);
- console.log("Session", session);
c.set("session", session);
if (session?.user?.id) {
@@ -129,7 +128,6 @@ export const auth = async (
user = Array.isArray(user) ? user[0] : user;
c.set("user", user);
- console.log("User", user);
}
}
}
diff --git a/apps/backend/src/index.tsx b/apps/backend/src/index.tsx
index cb63e53e..dea96448 100644
--- a/apps/backend/src/index.tsx
+++ b/apps/backend/src/index.tsx
@@ -48,6 +48,10 @@ export const app = new Hono<{ Variables: Variables; Bindings: Env }>()
.use("/api/*", (c, next) => {
const user = c.get("user");
+ if (c.env.NODE_ENV === "development") {
+ return next();
+ }
+
// RATELIMITS
const rateLimitConfig = {
// Endpoints that bypass rate limiting
diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts
index 4de1d339..28124347 100644
--- a/apps/backend/src/routes/actions.ts
+++ b/apps/backend/src/routes/actions.ts
@@ -717,40 +717,6 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
const db = database(c.env.HYPERDRIVE.connectionString);
- // Calculate document hash early to enable faster duplicate detection
- const content = body.prefetched?.contentToVectorize || body.content;
- const encoder = new TextEncoder();
- const data = encoder.encode(content);
- const hashBuffer = await crypto.subtle.digest("SHA-256", data);
- const hashArray = Array.from(new Uint8Array(hashBuffer));
- const documentHash = hashArray
- .map((b) => b.toString(16).padStart(2, "0"))
- .join("");
-
- // Check for duplicates using hash
- const existingDocs = await db
- .select()
- .from(documents)
- .where(
- and(
- eq(documents.userId, user.id),
- or(
- eq(documents.contentHash, documentHash),
- and(
- eq(documents.type, type.value),
- or(eq(documents.url, body.content), eq(documents.raw, content))
- )
- )
- )
- );
-
- if (existingDocs.length > 0) {
- return c.json(
- { error: `That ${type.value} already exists in your memories` },
- 409
- );
- }
-
// Check space permissions if spaces are specified
if (body.spaces && body.spaces.length > 0) {
const spacePermissions = await Promise.all(
@@ -828,38 +794,90 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
? body.content
: `https://supermemory.ai/content/${contentId}`;
- // Insert into documents table with hash
+ // Insert minimal document record
try {
- await db.insert(documents).values({
- uuid: contentId,
- userId: user.id,
- type: type.value,
+ // Check if document with same URL exists
+ console.log(
+ "[Add] Checking for existing document with URL:",
+ indexedUrl
+ );
+ const existingDoc = await db
+ .select()
+ .from(documents)
+ .where(
+ and(
+ eq(documents.userId, user.id),
+ eq(documents.url, indexedUrl),
+ sql`${documents.url} IS NOT NULL`
+ )
+ )
+ .limit(1);
+
+ let documentId = contentId;
+
+ if (existingDoc.length > 0) {
+ console.log("[Add] Found existing document:", {
+ id: existingDoc[0].id,
+ uuid: existingDoc[0].uuid,
+ url: existingDoc[0].url,
+ });
+ documentId = existingDoc[0].uuid;
+ // Update the raw content of existing document
+ console.log("[Add] Updating existing document content");
+ await db
+ .update(documents)
+ .set({
+ raw:
+ (body.prefetched ?? body.content) +
+ "\n\n" +
+ body.spaces?.join(" "),
+ updatedAt: new Date(),
+ })
+ .where(eq(documents.id, existingDoc[0].id));
+ console.log("[Add] Document updated successfully");
+ } else {
+ console.log("[Add] No existing document found, creating new one");
+ // Insert new document
+ await db.insert(documents).values({
+ uuid: contentId,
+ userId: user.id,
+ type: type.value,
+ url: indexedUrl,
+ raw:
+ (body.prefetched ?? body.content) +
+ "\n\n" +
+ body.spaces?.join(" "),
+ });
+ console.log("[Add] New document created successfully");
+ }
+
+ console.log("[Add] Starting workflow with params:", {
+ documentId,
url: indexedUrl,
- title: body.prefetched?.title,
- description: body.prefetched?.description,
- ogImage: body.prefetched?.ogImage,
- contentHash: documentHash,
- raw:
- (body.prefetched ?? body.content) + "\n\n" + body.spaces?.join(" "),
+ isUpdate: existingDoc.length > 0,
});
-
+ // Start the workflow which will handle everything else
await c.env.CONTENT_WORKFLOW.create({
params: {
userId: user.id,
content: body.content,
spaces: body.spaces,
type: type.value,
- uuid: contentId,
+ uuid: documentId,
url: indexedUrl,
prefetched: body.prefetched,
},
- id: contentId,
+ id: documentId,
});
return c.json({
- message: "Content added successfully",
- id: contentId,
+ message:
+ existingDoc.length > 0
+ ? "Content update started"
+ : "Content processing started",
+ id: documentId,
type: type.value,
+ updated: existingDoc.length > 0,
});
} catch (error) {
console.error("[Add Content Error]", error);
@@ -1054,6 +1072,57 @@ const actions = new Hono<{ Variables: Variables; Bindings: Env }>()
? content
: `https://supermemory.ai/content/${contentId}`;
+ // Check for existing document with same URL
+ const existingDoc = await db
+ .select()
+ .from(documents)
+ .where(
+ and(
+ eq(documents.userId, user.id),
+ eq(documents.url, url),
+ sql`${documents.url} IS NOT NULL`
+ )
+ )
+ .limit(1);
+
+ if (existingDoc.length > 0) {
+ // Update existing document
+ await db
+ .update(documents)
+ .set({
+ title,
+ contentHash: documentHash,
+ raw: content + "\n\n" + spaces?.join(" "),
+ updatedAt: new Date(),
+ })
+ .where(eq(documents.id, existingDoc[0].id));
+
+ // Create workflow for updating
+ await c.env.CONTENT_WORKFLOW.create({
+ params: {
+ userId: user.id,
+ content,
+ spaces,
+ type: type.value,
+ uuid: existingDoc[0].uuid,
+ url,
+ },
+ id: existingDoc[0].uuid,
+ });
+
+ succeeded++;
+ sendMessage({
+ progress: Math.round((processed / total) * 100),
+ status: "updated",
+ title: typeof item === "string" ? item : item.title,
+ processed,
+ total,
+ succeeded,
+ failed,
+ });
+ continue;
+ }
+
// Insert into documents table
await db.insert(documents).values({
uuid: contentId,
diff --git a/apps/backend/src/utils/extractor.ts b/apps/backend/src/utils/extractor.ts
index 9bf76181..8201cd28 100644
--- a/apps/backend/src/utils/extractor.ts
+++ b/apps/backend/src/utils/extractor.ts
@@ -2,7 +2,7 @@ import { Env } from "../types";
export const extractPageContent = async (content: string, env: Env) => {
console.log("content", content);
- const resp = await fetch(`https://md.dhr.wtf?url=${content}`);
+ const resp = await fetch(`https://md.dhr.wtf?url=${content}?nocache`);
if (!resp.ok) {
throw new Error(
@@ -10,7 +10,7 @@ export const extractPageContent = async (content: string, env: Env) => {
);
}
- const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}`);
+ const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}?nocache`);
if (!metadataResp.ok) {
throw new Error(
diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts
index 41c73015..fe1f4143 100644
--- a/apps/backend/src/workflow/index.ts
+++ b/apps/backend/src/workflow/index.ts
@@ -6,17 +6,106 @@ import {
import { Env, WorkflowParams } from "../types";
import { fetchContent } from "../utils/fetchers";
import chunkText from "../utils/chunkers";
-import { database, eq, inArray } from "@supermemory/db";
+import { database, eq, inArray, and, or, sql } from "@supermemory/db";
import {
ChunkInsert,
contentToSpace,
documents,
spaces,
+ chunk,
+ Document,
} from "@supermemory/db/schema";
import { embedMany } from "ai";
import { openai } from "../providers";
-import { chunk } from "@supermemory/db/schema";
import { NonRetryableError } from "cloudflare:workflows";
+import { createHash } from "crypto";
+
+// Helper function to generate content hash
+const generateHash = (content: string) => {
+ return createHash("sha256").update(content).digest("hex");
+};
+
+interface ChunkUpdate {
+ oldChunk?: typeof chunk.$inferSelect;
+ newContent?: string;
+ orderInDocument: number;
+ needsUpdate: boolean;
+}
+
+// Helper function to determine which chunks need updates
+const analyzeContentChanges = async (
+ oldContent: string,
+ newContent: string,
+ existingChunks: (typeof chunk.$inferSelect)[],
+ chunkSize: number = 768
+): Promise<ChunkUpdate[]> => {
+ // First, chunk the new content with size limits
+ const newChunks = chunkText(newContent, chunkSize);
+ const updates: ChunkUpdate[] = [];
+
+ // Map existing chunks for quick lookup
+ const existingChunksMap = new Map(
+ existingChunks.map((c) => [c.orderInDocument, c])
+ );
+
+ // Track which old chunks have been processed
+ const processedOldChunks = new Set<number>();
+
+ // Process new chunks and match with old ones
+ let currentOrder = 0;
+ for (const newChunkText of newChunks) {
+ const oldChunk = existingChunksMap.get(currentOrder);
+ const newChunkHash = generateHash(newChunkText);
+
+ if (oldChunk) {
+ processedOldChunks.add(currentOrder);
+ }
+
+ // If the new chunk is too large, we need to split it
+ if (newChunkText.length > chunkSize) {
+ // Re-chunk this specific piece to ensure it fits
+ const subChunks = chunkText(newChunkText, chunkSize);
+
+ // Add each sub-chunk as a separate update
+ for (let i = 0; i < subChunks.length; i++) {
+ const subChunk = subChunks[i];
+ const subChunkHash = generateHash(subChunk);
+
+ updates.push({
+ oldChunk: i === 0 ? oldChunk : undefined, // Only use the old chunk for the first sub-chunk
+ newContent: subChunk,
+ orderInDocument: currentOrder + i,
+ needsUpdate: true, // Always need to update since we split the chunk
+ });
+ }
+
+ currentOrder += subChunks.length;
+ } else {
+ // Normal case - chunk fits within size limit
+ updates.push({
+ oldChunk,
+ newContent: newChunkText,
+ orderInDocument: currentOrder,
+ needsUpdate: !oldChunk || oldChunk.contentHash !== newChunkHash,
+ });
+ currentOrder++;
+ }
+ }
+
+ // Handle any remaining old chunks that weren't processed
+ for (const [order, oldChunk] of existingChunksMap) {
+ if (!processedOldChunks.has(order)) {
+ updates.push({
+ oldChunk,
+ orderInDocument: order,
+ needsUpdate: true, // Mark for deletion since it wasn't used in new content
+ });
+ }
+ }
+
+ // Sort updates by order to ensure proper sequence
+ return updates.sort((a, b) => a.orderInDocument - b.orderInDocument);
+};
// TODO: handle errors properly here.
@@ -24,16 +113,17 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
async run(event: WorkflowEvent<WorkflowParams>, step: WorkflowStep) {
// Step 0: Check if user has reached memory limit
await step.do("check memory limit", async () => {
- const existingMemories = await database(this.env.HYPERDRIVE.connectionString)
+ const existingMemories = await database(
+ this.env.HYPERDRIVE.connectionString
+ )
.select()
.from(documents)
.where(eq(documents.userId, event.payload.userId));
if (existingMemories.length >= 2000) {
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("You have reached the maximum limit of 2000 memories");
+ throw new NonRetryableError(
+ "You have reached the maximum limit of 2000 memories"
+ );
}
});
@@ -53,29 +143,59 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
throw new NonRetryableError("The content is too big (maximum 20 pages)");
}
- const chunked = await step.do("chunk content", async () =>
- chunkText(rawContent.contentToVectorize, 768)
- );
+ // Generate content hash
+ const contentHash = generateHash(rawContent.contentToVectorize);
+
+ // Step 2: Check for existing document by URL
+ const existingDocument = await step.do(
+ "check existing document",
+ async () => {
+ if (!event.payload.url) return null;
- // Step 2: Create the document in the database.
- const document = await step.do("create document", async () => {
- try {
- // First check if document exists
- const existingDoc = await database(this.env.HYPERDRIVE.connectionString)
+ console.log(
+ "[Workflow] Checking for existing document with URL:",
+ event.payload.url
+ );
+ const docs = await database(this.env.HYPERDRIVE.connectionString)
.select()
.from(documents)
- .where(eq(documents.uuid, event.payload.uuid))
+ .where(
+ and(
+ eq(documents.userId, event.payload.userId),
+ eq(documents.url, event.payload.url),
+ sql`${documents.url} IS NOT NULL`
+ )
+ )
.limit(1);
- return await database(this.env.HYPERDRIVE.connectionString)
- .insert(documents)
- .values({
- userId: event.payload.userId,
- type: event.payload.type,
- uuid: event.payload.uuid,
- ...(event.payload.url && { url: event.payload.url }),
+ if (docs[0]) {
+ console.log("[Workflow] Found existing document:", {
+ id: docs[0].id,
+ uuid: docs[0].uuid,
+ url: docs[0].url,
+ });
+ } else {
+ console.log("[Workflow] No existing document found for URL");
+ }
+
+ return docs[0] || null;
+ }
+ );
+
+ // Step 3: Update or create document
+ const document = await step.do("update or create document", async () => {
+ const db = database(this.env.HYPERDRIVE.connectionString);
+
+ if (existingDocument) {
+ console.log("[Workflow] Updating existing document:", {
+ id: existingDocument.id,
+ uuid: existingDocument.uuid,
+ });
+ // Update existing document
+ await db
+ .update(documents)
+ .set({
title: rawContent.title,
- content: rawContent.contentToSave,
description:
"description" in rawContent
? (rawContent.description ?? "")
@@ -85,62 +205,56 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
? (rawContent.image ?? "")
: (event.payload.prefetched?.ogImage ?? undefined),
raw: rawContent.contentToVectorize,
+ content: rawContent.contentToSave,
+ contentHash,
isSuccessfullyProcessed: false,
updatedAt: new Date(),
- ...(event.payload.createdAt && {
- createdAt: new Date(event.payload.createdAt),
- }),
})
- .onConflictDoUpdate({
- target: documents.uuid,
- set: {
- title: rawContent.title,
- content: rawContent.contentToSave,
- description:
- "description" in rawContent
- ? (rawContent.description ?? "")
- : (event.payload.prefetched?.description ?? undefined),
- ogImage:
- "image" in rawContent
- ? (rawContent.image ?? "")
- : (event.payload.prefetched?.ogImage ?? undefined),
- raw: rawContent.contentToVectorize,
- isSuccessfullyProcessed: false,
- updatedAt: new Date(),
- },
- })
- .returning();
- } catch (error) {
- console.log("here's the error", error);
- // Check if error is a unique constraint violation
- if (
- error instanceof Error &&
- error.message.includes("document_url_user_id_idx")
- ) {
- // Document already exists for this user, stop workflow
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("Document already exists for this user");
- }
- if (
- error instanceof Error &&
- error.message.includes("document_raw_user_idx")
- ) {
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("The exact same document already exists");
- }
- throw error; // Re-throw other errors
+ .where(eq(documents.id, existingDocument.id));
+ console.log("[Workflow] Document updated successfully");
+
+ return [existingDocument];
}
- });
- if (!document || document.length === 0) {
- throw new Error(
- "Failed to create/update document - no document returned"
+ console.log(
+ "[Workflow] Updating document with UUID:",
+ event.payload.uuid
);
- }
+ // Create new document
+ const updated = await db
+ .update(documents)
+ .set({
+ title: rawContent.title,
+ description:
+ "description" in rawContent
+ ? (rawContent.description ?? "")
+ : (event.payload.prefetched?.description ?? undefined),
+ ogImage:
+ "image" in rawContent
+ ? (rawContent.image ?? "")
+ : (event.payload.prefetched?.ogImage ?? undefined),
+ content: rawContent.contentToSave,
+ contentHash,
+ isSuccessfullyProcessed: false,
+ updatedAt: new Date(),
+ })
+ .where(eq(documents.uuid, event.payload.uuid))
+ .returning();
+ console.log("[Workflow] Document update result:", {
+ updatedId: updated[0]?.id,
+ updatedUuid: updated[0]?.uuid,
+ });
+ return updated;
+ });
+
+ // Step 4: Process content
+ console.log("[Workflow] Processing content for document:", {
+ id: document[0].id,
+ uuid: document[0].uuid,
+ });
+ const chunked = await step.do("chunk content", async () =>
+ chunkText(rawContent.contentToVectorize, 768)
+ );
const model = openai(this.env, this.env.OPEN_AI_API_KEY).embedding(
"text-embedding-3-large",
@@ -149,7 +263,7 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
}
);
- // Step 3: Create chunks from the content.
+ // Create embeddings for chunks
const embeddings = await step.do(
"create embeddings",
{
@@ -167,52 +281,60 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
values: chunked,
}
);
-
return embeddings;
}
);
- // Step 4: Prepare chunk data
- const chunkInsertData: ChunkInsert[] = await step.do(
- "prepare chunk data",
- async () =>
- chunked.map((chunk, index) => ({
+ // Step 5: Update chunks
+ await step.do("update chunks", async () => {
+ const db = database(this.env.HYPERDRIVE.connectionString);
+
+ // Delete existing chunks if any
+ await db.delete(chunk).where(eq(chunk.documentId, document[0].id));
+
+ // Insert new chunks
+ const chunkInsertData: ChunkInsert[] = chunked.map(
+ (chunkText, index) => ({
documentId: document[0].id,
- textContent: chunk,
+ textContent: chunkText,
+ contentHash: generateHash(chunkText),
orderInDocument: index,
embeddings: embeddings[index],
- }))
- );
-
- console.log(chunkInsertData);
+ })
+ );
- // Step 5: Insert chunks
- if (chunkInsertData.length > 0) {
- await step.do("insert chunks", async () =>
- database(this.env.HYPERDRIVE.connectionString).transaction(
- async (trx) => {
- await trx.insert(chunk).values(chunkInsertData);
+ if (chunkInsertData.length > 0) {
+ await db.transaction(async (trx) => {
+ for (const chunkData of chunkInsertData) {
+ await trx
+ .insert(chunk)
+ .values(chunkData)
+ .onConflictDoNothing({ target: chunk.contentHash });
}
- )
- );
- }
+ });
+ }
+ });
- // step 6: add content to spaces
+ // Step 6: Mark document as processed
+ await step.do("mark document as processed", async () => {
+ await database(this.env.HYPERDRIVE.connectionString)
+ .update(documents)
+ .set({ isSuccessfullyProcessed: true })
+ .where(eq(documents.id, document[0].id));
+ });
+
+ // Step 7: Add content to spaces if specified
if (event.payload.spaces) {
await step.do("add content to spaces", async () => {
await database(this.env.HYPERDRIVE.connectionString).transaction(
async (trx) => {
- // First get the space IDs from the UUIDs
const spaceIds = await trx
.select({ id: spaces.id })
.from(spaces)
.where(inArray(spaces.uuid, event.payload.spaces ?? []));
- if (spaceIds.length === 0) {
- return;
- }
+ if (spaceIds.length === 0) return;
- // Then insert the content-space mappings using the actual space IDs
await trx.insert(contentToSpace).values(
spaceIds.map((space) => ({
contentId: document[0].id,
@@ -223,15 +345,5 @@ export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
);
});
}
-
- // Step 7: Mark the document as successfully processed
- await step.do("mark document as successfully processed", async () => {
- await database(this.env.HYPERDRIVE.connectionString)
- .update(documents)
- .set({
- isSuccessfullyProcessed: true,
- })
- .where(eq(documents.id, document[0].id));
- });
}
}
diff --git a/apps/backend/test/routes/memories.test.ts b/apps/backend/test/routes/memories.test.ts
new file mode 100644
index 00000000..c7a8f21d
--- /dev/null
+++ b/apps/backend/test/routes/memories.test.ts
@@ -0,0 +1,247 @@
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import { app } from "../../src";
+import type { Document } from "@supermemory/db/schema";
+
+import { testClient } from "hono/testing";
+import { env, SELF } from "cloudflare:test";
+
+const API_KEY =
+ "sm_4y1M2QIpRtKJMMfXWCqAhD_NVXOqLqOzOOuIZ1qhL1Gj0BFWFPuRlng5TPvZ1OIu1Zn1G8_xuuz59M4o4l-sAkSieZgaW09COg";
+
+interface MemoryResponse {
+ items: Array<Document>;
+ total: number;
+}
+
+interface SuccessResponse {
+ success: boolean;
+}
+
+describe("Memories Routes", () => {
+ const client = testClient(app, env);
+
+ describe("GET /api/memories", () => {
+ it("should list memories with default pagination", async () => {
+ const res = await client.api.memories.$get(
+ { query: {} },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+
+ expect(res.status).toBe(200);
+ const data = (await res.json()) as MemoryResponse;
+ expect(data).toHaveProperty("items");
+ expect(data).toHaveProperty("total");
+ expect(Array.isArray(data.items)).toBe(true);
+ });
+
+ it("should respect pagination parameters", async () => {
+ const res = await client.api.memories.$get(
+ {
+ query: {
+ start: "1",
+ count: "5",
+ },
+ },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ expect(res.status).toBe(200);
+ const data = (await res.json()) as MemoryResponse;
+ expect(data.items.length).toBeLessThanOrEqual(5);
+ });
+
+ it("should filter by space ID", async () => {
+ const res = await client.api.memories.$get(
+ {
+ query: {
+ spaceId: "test-space-uuid",
+ },
+ },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ expect(res.status).toBe(200);
+ const data = (await res.json()) as MemoryResponse;
+ expect(data).toHaveProperty("items");
+ expect(data).toHaveProperty("total");
+ });
+
+ it("should handle unauthorized space access", async () => {
+ const res = await client.api.memories.$get(
+ {
+ query: {
+ spaceId: "private-space-uuid",
+ },
+ },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ expect(res.status).toBe(401);
+ });
+
+ it("should reject requests without API key", async () => {
+ const res = await client.api.memories.$get(
+ { query: {} },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ expect(res.status).toBe(401);
+ });
+
+ it("should respect ETag caching", async () => {
+ // First request to get ETag
+ const res1 = await client.api.memories.$get(
+ { query: {} },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ const etag = res1.headers.get("ETag");
+ expect(etag).toBeDefined();
+
+ // Second request with ETag
+ const res2 = await client.api.memories.$get(
+ { query: {} },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ "If-None-Match": etag!,
+ },
+ }
+ );
+ expect(res2.status).toBe(304);
+ });
+ });
+
+ describe("GET /api/memories/:id", () => {
+ let testMemoryId: string;
+
+ beforeAll(async () => {
+ const res = await client.api.memories.$get(
+ { query: {} },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ const data = (await res.json()) as MemoryResponse;
+ if (data.items.length > 0) {
+ testMemoryId = data.items[0].uuid;
+ }
+ });
+
+ it("should retrieve a specific memory", async () => {
+ if (!testMemoryId) {
+ console.warn("No test memory available, skipping test");
+ return;
+ }
+
+ const res = await client.api.memories[":id"].$get(
+ {
+ param: {
+ id: testMemoryId,
+ },
+ },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ expect(res.status).toBe(200);
+ const memory = await res.json();
+ expect(memory).toHaveProperty("uuid", testMemoryId);
+ });
+
+ it("should reject unauthorized access", async () => {
+ const res = await client.api.memories[":id"].$get(
+ {
+ param: {
+ id: testMemoryId,
+ },
+ },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ expect(res.status).toBe(401);
+ });
+ });
+
+ describe("DELETE /api/memories/:id", () => {
+ let memoryId: string;
+
+ beforeAll(async () => {
+ const res = await client.api.memories.$get(
+ { query: {} },
+ {
+ headers: {
+ Authorization: `Bearer ${API_KEY}`,
+ },
+ }
+ );
+ const data = (await res.json()) as MemoryResponse;
+ if (data.items.length > 0) {
+ memoryId = data.items[0].uuid;
+ }
+ });
+
+ it("should delete a memory", async () => {
+ const res = await client.api.memories[":id"].$delete(
+ {
+ param: { id: memoryId },
+ },
+ {
+ headers: { Authorization: `Bearer ${API_KEY}` },
+ }
+ );
+
+ expect(res.status).toBe(200);
+ expect(await res.json()).toEqual({ success: true });
+
+ // Verify deletion
+ const getRes = await client.api.memories[":id"].$get(
+ {
+ param: { id: memoryId },
+ },
+ {
+ headers: { Authorization: `Bearer ${API_KEY}` },
+ }
+ );
+ expect(getRes.status).toBe(404);
+ });
+
+ it("should reject unauthorized deletion", async () => {
+ const res = await client.api.memories[":id"].$delete(
+ {
+ param: { id: memoryId },
+ },
+ {
+ headers: { Authorization: `Bearer ${API_KEY}` },
+ }
+ );
+ expect(res.status).toBe(401);
+ });
+ });
+});
diff --git a/apps/backend/test/setup.ts b/apps/backend/test/setup.ts
new file mode 100644
index 00000000..62c993c7
--- /dev/null
+++ b/apps/backend/test/setup.ts
@@ -0,0 +1,32 @@
+import { Hono } from "hono";
+import type { Env } from "../src/types";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+
+// Helper to generate random test content
+export function generateTestContent(length: number = 1000): string {
+ return Array(length)
+ .fill(0)
+ .map(() => Math.random().toString(36).charAt(2))
+ .join("");
+}
+
+// Helper to measure response time
+export async function measureResponseTime(
+ fn: () => Promise<Response>
+): Promise<number> {
+ const start = performance.now();
+ await fn();
+ return performance.now() - start;
+}
+
+// Common test expectations
+export const expect200 = (response: Response) =>
+ expect(response.status).toBe(200);
+export const expect401 = (response: Response) =>
+ expect(response.status).toBe(401);
+export const expect403 = (response: Response) =>
+ expect(response.status).toBe(403);
+export const expect404 = (response: Response) =>
+ expect(response.status).toBe(404);
+export const expect429 = (response: Response) =>
+ expect(response.status).toBe(429);
diff --git a/apps/backend/tsconfig.json b/apps/backend/tsconfig.json
index a45a87e9..54772d5f 100644
--- a/apps/backend/tsconfig.json
+++ b/apps/backend/tsconfig.json
@@ -8,7 +8,8 @@
"lib": ["ESNext"],
"types": [
"@cloudflare/workers-types/experimental",
- "@cloudflare/workers-types"
+ "@cloudflare/workers-types",
+ "@cloudflare/vitest-pool-workers"
],
"jsx": "react-jsx",
"jsxImportSource": "hono/jsx"
diff --git a/apps/backend/vitest.config.ts b/apps/backend/vitest.config.ts
new file mode 100644
index 00000000..17964ab8
--- /dev/null
+++ b/apps/backend/vitest.config.ts
@@ -0,0 +1,11 @@
+import { defineWorkersConfig } from "@cloudflare/vitest-pool-workers/config";
+
+export default defineWorkersConfig({
+ test: {
+ poolOptions: {
+ workers: {
+ wrangler: { configPath: "./wrangler.toml" },
+ },
+ }
+ },
+});
diff --git a/apps/backend/wrangler.toml b/apps/backend/wrangler.toml
index 6b7d053d..e87b1b0a 100644
--- a/apps/backend/wrangler.toml
+++ b/apps/backend/wrangler.toml
@@ -19,7 +19,6 @@ name = "content-workflow-supermemory"
binding = "CONTENT_WORKFLOW"
class_name = "ContentWorkflow"
-
[[kv_namespaces]]
binding= "MD_CACHE"
id = "3186489f943d409a9b772d876a58a73e"