aboutsummaryrefslogtreecommitdiff
path: root/apps/backend/src
diff options
context:
space:
mode:
authorMahesh Sanikommmu <[email protected]>2025-08-16 18:50:10 -0700
committerMahesh Sanikommmu <[email protected]>2025-08-16 18:50:10 -0700
commit39003aff23d64ff1d96074d71521f6023c9bec01 (patch)
tree3f870c04b3dce315bba1b21aa2da158494e71774 /apps/backend/src
parentMerge pull request #355 from supermemoryai/archive (diff)
downloadsupermemory-39003aff23d64ff1d96074d71521f6023c9bec01.tar.xz
supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.zip
New Version of Supermemory Consumer App
Diffstat (limited to 'apps/backend/src')
-rw-r--r--apps/backend/src/auth.ts151
-rw-r--r--apps/backend/src/components/landing.tsx234
-rw-r--r--apps/backend/src/errors/baseError.ts45
-rw-r--r--apps/backend/src/errors/results.ts31
-rw-r--r--apps/backend/src/globals.css3
-rw-r--r--apps/backend/src/index.tsx255
-rw-r--r--apps/backend/src/providers.ts19
-rw-r--r--apps/backend/src/routes/actions.ts1171
-rw-r--r--apps/backend/src/routes/integrations.ts180
-rw-r--r--apps/backend/src/routes/memories.ts293
-rw-r--r--apps/backend/src/routes/spaces.ts709
-rw-r--r--apps/backend/src/routes/user.ts193
-rw-r--r--apps/backend/src/types.ts79
-rw-r--r--apps/backend/src/utils/chunkers.ts116
-rw-r--r--apps/backend/src/utils/cipher.ts79
-rw-r--r--apps/backend/src/utils/extractDocumentContent.ts87
-rw-r--r--apps/backend/src/utils/extractor.ts50
-rw-r--r--apps/backend/src/utils/fetchers.ts143
-rw-r--r--apps/backend/src/utils/notion.ts239
-rw-r--r--apps/backend/src/utils/tweetsToThreads.ts108
-rw-r--r--apps/backend/src/utils/typeDecider.ts41
-rw-r--r--apps/backend/src/workflow/index.ts217
22 files changed, 0 insertions, 4443 deletions
diff --git a/apps/backend/src/auth.ts b/apps/backend/src/auth.ts
deleted file mode 100644
index 0ceeb0c1..00000000
--- a/apps/backend/src/auth.ts
+++ /dev/null
@@ -1,151 +0,0 @@
-import { Context, Next } from "hono";
-import { getSessionFromRequest } from "@supermemory/authkit-remix-cloudflare/src/session";
-import { and, database, eq, sql } from "@supermemory/db";
-import { User, users } from "@supermemory/db/schema";
-import { Env, Variables } from "./types";
-import { encrypt, decrypt } from "./utils/cipher";
-
-interface EncryptedData {
- userId: string;
- lastApiKeyGeneratedAt: string;
-}
-
-export const getApiKey = async (
- userId: string,
- lastApiKeyGeneratedAt: string,
- c: Context<{ Variables: Variables; Bindings: Env }>
-) => {
- const data = `${userId}-${lastApiKeyGeneratedAt}`;
- return "sm_" + (await encrypt(data, c.env.WORKOS_COOKIE_PASSWORD));
-};
-
-export const decryptApiKey = async (
- encryptedKey: string,
- c: Context<{ Variables: Variables; Bindings: Env }>
-): Promise<EncryptedData> => {
- const ourKey = encryptedKey.slice(3);
- const decrypted = await decrypt(ourKey, c.env.WORKOS_COOKIE_PASSWORD);
- const [userId, lastApiKeyGeneratedAt] = decrypted.split("-");
-
- return {
- userId,
- lastApiKeyGeneratedAt,
- };
-};
-
-export const auth = async (
- c: Context<{ Variables: Variables; Bindings: Env }>,
- next: Next
-) => {
- // Handle CORS preflight requests
- if (c.req.method === "OPTIONS") {
- return next()
- }
-
- // Set cache control headers
- c.header("Cache-Control", "private, no-cache, no-store, must-revalidate");
- c.header("Pragma", "no-cache");
- c.header("Expires", "0");
-
- let user: User | User[] | undefined;
-
- // Check for API key authentication first
- const authHeader = c.req.raw.headers.get("Authorization");
- if (authHeader?.startsWith("Bearer ")) {
- const apiKey = authHeader.slice(7);
- try {
- const { userId, lastApiKeyGeneratedAt } = await decryptApiKey(apiKey, c);
-
- // Look up user with matching id and lastApiKeyGeneratedAt
- user = await database(c.env.HYPERDRIVE.connectionString)
- .select()
- .from(users)
- .where(
- and(
- eq(users.uuid, userId)
- )
- )
- .limit(1);
-
- if (user && Array.isArray(user)) {
- user = user[0];
- if (user && user.lastApiKeyGeneratedAt?.getTime() === Number(lastApiKeyGeneratedAt)) {
- c.set("user", user);
- } else {
- return c.json({ error: "Invalid API key - user not found" }, 401);
- }
- }
- } catch (err) {
- console.error("API key authentication failed:", err);
- return c.json({ error: "Invalid API key format" }, 401);
- }
- }
-
- // If no user found via API key, try cookie authentication
- if (!user) {
- const cookies = c.req.raw.headers.get("Cookie");
- if (cookies) {
- // Fake remix context object. this just works.
- const context = {
- cloudflare: {
- env: c.env,
- },
- };
-
- const session = await getSessionFromRequest(c.req.raw, context);
- console.log("Session", session);
- c.set("session", session);
-
- if (session?.user?.id) {
- user = await database(c.env.HYPERDRIVE.connectionString)
- .select()
- .from(users)
- .where(eq(users.uuid, session.user.id))
- .limit(1);
-
- if ((!user || user.length === 0) && session?.user?.id) {
- const newUser = await database(c.env.HYPERDRIVE.connectionString)
- .insert(users)
- .values({
- uuid: session.user?.id,
- email: session.user?.email,
- firstName: session.user?.firstName,
- lastName: session.user?.lastName,
- createdAt: new Date(),
- updatedAt: new Date(),
- emailVerified: false,
- profilePictureUrl: session.user?.profilePictureUrl ?? "",
- })
- .returning()
- .onConflictDoUpdate({
- target: [users.email],
- set: {
- uuid: session.user.id,
- },
- });
-
- user = newUser[0];
- }
-
- user = Array.isArray(user) ? user[0] : user;
- c.set("user", user);
- console.log("User", user);
- }
- }
- }
-
- // Check if request requires authentication
- const isPublicSpaceRequest =
- c.req.url.includes("/v1/spaces/") || c.req.url.includes("/v1/memories");
-
- if (!isPublicSpaceRequest && !c.get("user")) {
- console.log("Unauthorized access to", c.req.url);
- if (authHeader) {
- return c.json({ error: "Invalid authentication credentials" }, 401);
- } else {
- return c.json({ error: "Authentication required" }, 401);
- }
- }
-
- return next();
-};
diff --git a/apps/backend/src/components/landing.tsx b/apps/backend/src/components/landing.tsx
deleted file mode 100644
index 87bbdb69..00000000
--- a/apps/backend/src/components/landing.tsx
+++ /dev/null
@@ -1,234 +0,0 @@
-import { html } from "hono/html";
-
-export function LandingPage() {
- return (
- <html lang="en">
- <head>
- <meta charset="UTF-8" />
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <link href="/output.css" rel="stylesheet" />
- <title>Supermemory API</title>
- </head>
- <body>
- <div className="gradient-dark">
- <header className="bg-gray-900/50 dot-pattern">
- <nav className="container mx-auto px-6 py-4">
- <div className="flex items-center justify-between">
- <div className="text-2xl font-bold text-white">
- Supermemory API
- </div>
- <div className="hidden md:flex space-x-8">
- <a
- href="#features"
- className="text-gray-300 hover:text-white"
- >
- Features
- </a>
- <a
- href="https://docs.supermemory.ai/"
- target="_blank"
- className="text-gray-300 hover:text-white"
- rel="noreferrer"
- >
- Documentation
- </a>
- </div>
- <a
- href="https://docs.supermemory.ai/"
- target="_blank"
- className="bg-blue-600 text-white px-6 py-2 rounded-lg hover:bg-blue-700"
- rel="noreferrer"
- >
- Get Started
- </a>
- </div>
- </nav>
-
- <div className="container mx-auto px-6 py-16 text-center">
- <h1 className="text-4xl font-bold text-white mb-4">
- The Modern API for Knowledge Management
- </h1>
- <p className="text-xl text-gray-300 mb-8">
- Build powerful search and AI applications with our flexible,
- production-ready API
- </p>
- <div className="flex justify-center space-x-4">
- <a
- href="https://docs.supermemory.ai/"
- target="_blank"
- className="bg-blue-600 text-white px-8 py-3 rounded-lg hover:bg-blue-700"
- rel="noreferrer"
- >
- Get Started Free
- </a>
- <a
- href="https://docs.supermemory.ai/"
- target="_blank"
- className="border border-gray-600 text-gray-300 px-8 py-3 rounded-lg hover:bg-gray-700"
- rel="noreferrer"
- >
- View Docs
- </a>
- </div>
- </div>
- </header>
-
- <section id="features" className="py-20 bg-gray-900/50 dot-pattern">
- <div className="container mx-auto px-6">
- <h2 className="text-3xl font-bold text-center text-white mb-12">
- Key Features
- </h2>
- <div className="grid md:grid-cols-3 gap-8">
- <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300">
- <h3 className="text-xl font-semibold mb-4 text-white">
- Battle-Tested RAG Stack
- </h3>
- <p className="text-gray-300">
- Production-ready retrieval augmented generation architecture
- for reliable and scalable information retrieval.
- </p>
- </div>
- <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300">
- <h3 className="text-xl font-semibold mb-4 text-white">
- Flexible LLM Integration
- </h3>
- <p className="text-gray-300">
- Use any LLM of your choice or operate in search-only mode
- for maximum flexibility and control.
- </p>
- </div>
- <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300">
- <h3 className="text-xl font-semibold mb-4 text-white">
- Advanced Access Control
- </h3>
- <p className="text-gray-300">
- Comprehensive collection filtering and permission management
- for secure data access.
- </p>
- </div>
- <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300">
- <h3 className="text-xl font-semibold mb-4 text-white">
- Seamless Data Import
- </h3>
- <p className="text-gray-300">
- Magic link import and platform synchronization for
- effortless data integration.
- </p>
- </div>
- <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300">
- <h3 className="text-xl font-semibold mb-4 text-white">
- Real-time Monitoring
- </h3>
- <p className="text-gray-300">
- Track and analyze memory usage patterns in real-time with
- detailed metrics.
- </p>
- </div>
- <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300">
- <h3 className="text-xl font-semibold mb-4 text-white">
- Easy Integration
- </h3>
- <p className="text-gray-300">
- Simple API endpoints that integrate seamlessly with your
- existing infrastructure.
- </p>
- </div>
- </div>
- </div>
- </section>
-
- <footer className="bg-black/50 dot-pattern">
- <div className="container mx-auto px-6">
- <div className="grid md:grid-cols-4 gap-8">
- <div>
- <h4 className="text-lg font-semibold mb-4">
- Supermemory API
- </h4>
- <p className="text-gray-400">
- Making memory management simple and efficient for developers
- worldwide.
- </p>
- </div>
- <div>
- <h4 className="text-lg font-semibold mb-4">Product</h4>
- <ul className="space-y-2 text-gray-400">
- <li>
- <a href="#features" className="hover:text-white">
- Features
- </a>
- </li>
- <li>
- <a
- href="https://docs.supermemory.ai/"
- target="_blank"
- className="hover:text-white"
- rel="noreferrer"
- >
- Documentation
- </a>
- </li>
- </ul>
- </div>
- <div>
- <h4 className="text-lg font-semibold mb-4">Connect</h4>
- <ul className="space-y-2 text-gray-400">
- <li>
- <a
- href="https://x.com/supermemoryai"
- target="_blank"
- className="hover:text-white"
- rel="noreferrer"
- >
- X (formerly Twitter)
- </a>
- </li>
- <li>
- <a
- href="https://github.com/supermemoryai"
- target="_blank"
- className="hover:text-white"
- rel="noreferrer"
- >
- GitHub
- </a>
- </li>
- <li>
- <a
- href="https://discord.gg/b3BgKWpbtR"
- target="_blank"
- className="hover:text-white"
- rel="noreferrer"
- >
- Discord
- </a>
- </li>
- </ul>
- </div>
- </div>
- <div className="border-t border-gray-800 mt-8 pt-8 text-center text-gray-400">
- <p>&copy; 2024 Supermemory API. All rights reserved.</p>
- </div>
- </div>
- </footer>
-
- <style
- dangerouslySetInnerHTML={{
- __html: `
- .dot-pattern {
- background-image: radial-gradient(
- rgba(255, 255, 255, 0.1) 1px,
- transparent 1px
- );
- background-size: 24px 24px;
- }
- .gradient-dark {
- background: linear-gradient(to bottom right, rgb(17 24 39), rgb(0 0 0));
- }
- `,
- }}
- />
- </div>
- </body>
- </html>
- );
-}
diff --git a/apps/backend/src/errors/baseError.ts b/apps/backend/src/errors/baseError.ts
deleted file mode 100644
index bccc54df..00000000
--- a/apps/backend/src/errors/baseError.ts
+++ /dev/null
@@ -1,45 +0,0 @@
-export class BaseHttpError extends Error {
- public status: number;
- public message: string;
-
- constructor(status: number, message: string) {
- super(message);
- this.status = status;
- this.message = message;
- Object.setPrototypeOf(this, new.target.prototype); // Restore prototype chain
- }
- }
-
-
- export class BaseError extends Error {
- type: string;
- message: string;
- source: string;
- ignoreLog: boolean;
-
- constructor(
- type: string,
- message?: string,
- source?: string,
- ignoreLog = false
- ) {
- super();
-
- Object.setPrototypeOf(this, new.target.prototype);
-
- this.type = type;
- this.message =
- message ??
- "An unknown error occurred. If this persists, please contact us.";
- this.source = source ?? "unspecified";
- this.ignoreLog = ignoreLog;
- }
-
- toJSON(): Record<PropertyKey, string> {
- return {
- type: this.type,
- message: this.message,
- source: this.source,
- };
- }
- } \ No newline at end of file
diff --git a/apps/backend/src/errors/results.ts b/apps/backend/src/errors/results.ts
deleted file mode 100644
index c5ab115b..00000000
--- a/apps/backend/src/errors/results.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-import { BaseError } from "./baseError";
-
-export type Result<T, E extends Error> =
- | { ok: true; value: T }
- | { ok: false; error: E };
-
-export const Ok = <T>(data: T): Result<T, never> => {
- return { ok: true, value: data };
-};
-
-export const Err = <E extends BaseError>(error: E): Result<never, E> => {
- return { ok: false, error };
-};
-
-export async function wrap<T, E extends BaseError>(
- p: Promise<T>,
- errorFactory: (err: Error, source: string) => E,
- source: string = "unspecified"
- ): Promise<Result<T, E>> {
- try {
- return Ok(await p);
- } catch (e) {
- return Err(errorFactory(e as Error, source));
- }
- }
-
-export function isErr<T, E extends Error>(
- result: Result<T, E>,
-): result is { ok: false; error: E } {
- return !result.ok;
-} \ No newline at end of file
diff --git a/apps/backend/src/globals.css b/apps/backend/src/globals.css
deleted file mode 100644
index b5c61c95..00000000
--- a/apps/backend/src/globals.css
+++ /dev/null
@@ -1,3 +0,0 @@
-@tailwind base;
-@tailwind components;
-@tailwind utilities;
diff --git a/apps/backend/src/index.tsx b/apps/backend/src/index.tsx
deleted file mode 100644
index 4ccbb4c0..00000000
--- a/apps/backend/src/index.tsx
+++ /dev/null
@@ -1,255 +0,0 @@
-import { z } from "zod";
-import { type Context, Hono } from "hono";
-import { auth } from "./auth";
-import { logger } from "hono/logger";
-import { timing } from "hono/timing";
-import type { Env, Variables } from "./types";
-import { zValidator } from "@hono/zod-validator";
-import { database } from "@supermemory/db";
-import { waitlist } from "@supermemory/db/schema";
-import { cors } from "hono/cors";
-import { ContentWorkflow } from "./workflow";
-import { Resend } from "resend";
-import { LandingPage } from "./components/landing";
-import user from "./routes/user";
-import spacesRoute from "./routes/spaces";
-import actions from "./routes/actions";
-import memories from "./routes/memories";
-import integrations from "./routes/integrations";
-import { fromHono } from "chanfana";
-import {
- DurableObjectRateLimiter,
- DurableObjectStore,
-} from "@hono-rate-limiter/cloudflare";
-import type { ConfigType, GeneralConfigType, rateLimiter } from "hono-rate-limiter";
-
-// Create base Hono app first
-const honoApp = new Hono<{ Variables: Variables; Bindings: Env }>();
-
-const app = fromHono(honoApp);
-
-// Add all middleware and routes
-app.use("*", timing());
-app.use("*", logger());
-app.use(
- "*",
- cors({
- origin: [
- "http://localhost:3000",
- "https://supermemory.ai",
- "https://*.supermemory.ai",
- "https://*.supermemory.com",
- "https://supermemory.com",
- "chrome-extension://*",
- ],
- allowHeaders: ["*"],
- allowMethods: ["*"],
- credentials: true,
- exposeHeaders: ["*"],
- })
-);
-
-app.use("/v1/*", auth);
-app.use("/v1/*", (c, next) => {
- const user = c.get("user");
-
- if (c.env.NODE_ENV === "development") {
- return next();
- }
-
- // RATELIMITS
- const rateLimitConfig = {
- // Endpoints that bypass rate limiting
- excludedPaths: [
- "/v1/add",
- "/v1/chat",
- "/v1/suggested-learnings",
- "/v1/recommended-questions",
- ] as (string | RegExp)[],
-
- // Custom rate limits for specific endpoints
- customLimits: {
- notionImport: {
- paths: ["/v1/integrations/notion/import", "/v1/integrations/notion"],
- windowMs: 10 * 60 * 1000, // 10 minutes
- limit: 5, // 5 requests per 10 minutes
- },
- inviteSpace: {
- paths: [/^\v1\/spaces\/[^/]+\/invite$/],
- windowMs: 60 * 1000, // 1 minute
- limit: 5, // 5 requests per minute
- },
- } as Record<
- string,
- { paths: (string | RegExp)[]; windowMs: number; limit: number }
- >,
-
- default: {
- windowMs: 60 * 1000, // 1 minute
- limit: 100, // 100 requests per minute
- },
-
- common: {
- standardHeaders: "draft-6",
- keyGenerator: (c: Context) =>
- `${user?.uuid ?? c.req.header("cf-connecting-ip")}-${new Date().getDate()}`, // day so that limit gets reset every day
- store: new DurableObjectStore({ namespace: c.env.RATE_LIMITER }),
- } as GeneralConfigType<ConfigType>,
- };
-
- if (
- c.req.path &&
- rateLimitConfig.excludedPaths.some((path) =>
- typeof path === "string" ? c.req.path === path : path.test(c.req.path)
- )
- ) {
- return next();
- }
-
- // Check for custom rate limits
- for (const [_, config] of Object.entries(rateLimitConfig.customLimits)) {
- if (
- config.paths.some((path) =>
- typeof path === "string" ? c.req.path === path : path.test(c.req.path)
- )
- ) {
- return rateLimiter({
- windowMs: config.windowMs,
- limit: config.limit,
- ...rateLimitConfig.common,
- })(c as any, next);
- }
- }
-
- // Apply default rate limit
- return rateLimiter({
- windowMs: rateLimitConfig.default.windowMs,
- limit: rateLimitConfig.default.limit,
- ...rateLimitConfig.common,
- })(c as any, next);
-});
-
-app.get("/", (c) => {
- return c.html(<LandingPage />);
-});
-
-// TEMPORARY REDIRECT
-app.all("/api/*", async (c) => {
- // Get the full URL and path
- const url = new URL(c.req.url);
- const path = url.pathname;
- const newPath = path.replace("/api", "/v1");
-
- // Preserve query parameters and build target URL
- const redirectUrl = `https://api.supermemory.ai${newPath}${url.search}`;
-
- // Use c.redirect() for a proper redirect
- return c.redirect(redirectUrl);
-});
-
-app.route("/v1/user", user);
-app.route("/v1/spaces", spacesRoute);
-app.route("/v1", actions);
-app.route("/v1/integrations", integrations);
-app.route("/v1/memories", memories);
-
-app.get("/v1/session", (c) => {
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- return c.json({
- user,
- });
-});
-
-app.post(
- "/waitlist",
- zValidator(
- "json",
- z.object({ email: z.string().email(), token: z.string() })
- ),
- async (c) => {
- const { email, token } = c.req.valid("json");
-
- const address = c.req.raw.headers.get("CF-Connecting-IP");
-
- const idempotencyKey = crypto.randomUUID();
- const url = "https://challenges.cloudflare.com/turnstile/v0/siteverify";
- const firstResult = await fetch(url, {
- body: JSON.stringify({
- secret: c.env.TURNSTILE_SECRET_KEY,
- response: token,
- remoteip: address,
- idempotency_key: idempotencyKey,
- }),
- method: "POST",
- headers: {
- "Content-Type": "application/json",
- },
- });
-
- const firstOutcome = (await firstResult.json()) as { success: boolean };
-
- if (!firstOutcome.success) {
- console.info("Turnstile verification failed", firstOutcome);
- return c.json(
- { error: "Turnstile verification failed" },
- 439 as StatusCode
- );
- }
-
- const resend = new Resend(c.env.RESEND_API_KEY);
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const ip =
- c.req.header("cf-connecting-ip") ||
- `${c.req.raw.cf?.asn}-${c.req.raw.cf?.country}-${c.req.raw.cf?.city}-${c.req.raw.cf?.region}-${c.req.raw.cf?.postalCode}`;
-
- const { success } = await c.env.EMAIL_LIMITER.limit({ key: ip });
-
- if (!success) {
- return c.json({ error: "Rate limit exceeded" }, 429);
- }
-
- const message = `Supermemory started as a side project a few months ago when I built it as a hackathon project.
- <br></br>
- you guys loved it too much. like wayy too much. it was embarrassing, because this was not it - it was nothing but a hackathon project.
- <br></br>
- I launched on github too. <a href="https://git.new/memory">https://github.com/supermemoryai/supermemory</a>, and we were somehow one of the fastest growing open source repositories in Q3 2024.
- <br></br><br></br>
- So, it's time to make this good. My vision is to make supermemory the best memory tool on the internet.
- `;
-
- try {
- await db.insert(waitlist).values({ email });
- await resend.emails.send({
- from: "Dhravya From Supermemory <[email protected]>",
- to: email,
- subject: "You're in the waitlist - A personal note from Dhravya",
- html: `<p>Hi. I'm Dhravya. I'm building Supermemory to help people remember everything.<br></br> ${message} <br></br><br></br>I'll be in touch when we launch! Till then, just reply to this email if you wanna talk :)<br></br>If you want to follow me on X, here's my handle: <a href='https://x.com/dhravyashah'>@dhravyashah</a><br></br><br></br>- Dhravya</p>`,
- });
- } catch (e) {
- console.error(e);
- return c.json({ error: "Failed to add to waitlist" }, 400);
- }
-
- return c.json({ success: true });
- }
-);
-
-app.onError((err, c) => {
- console.error(err);
- return c.json({ error: "Internal server error" }, 500);
-});
-
-export default {
- fetch: app.fetch,
-};
-
-export { ContentWorkflow, DurableObjectRateLimiter };
-
-export type AppType = typeof app;
diff --git a/apps/backend/src/providers.ts b/apps/backend/src/providers.ts
deleted file mode 100644
index ed9644a3..00000000
--- a/apps/backend/src/providers.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { createOpenAI, OpenAIProvider } from "@ai-sdk/openai";
-import { createGoogleGenerativeAI } from "@ai-sdk/google";
-import { Env } from "./types";
-
-export function openai(
- env: Env,
- apiKey?: string
-): ReturnType<typeof createOpenAI> {
- return createOpenAI({
- apiKey: apiKey || env.OPEN_AI_API_KEY,
- baseURL: "https://gateway.ai.cloudflare.com/v1/47c2b4d598af9d423c06fc9f936226d5/supermemory/openai"
- });
-}
-
-export function google(securityKey: string) {
- return createGoogleGenerativeAI({
- apiKey: securityKey,
- });
-}
diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts
deleted file mode 100644
index 581e7787..00000000
--- a/apps/backend/src/routes/actions.ts
+++ /dev/null
@@ -1,1171 +0,0 @@
-import { Hono } from "hono";
-import { Variables, Env, recommendedQuestionsSchema } from "../types";
-import { zValidator } from "@hono/zod-validator";
-import { z } from "zod";
-import {
- AISDKError,
- convertToCoreMessages,
- embed,
- generateObject,
- InvalidPromptError,
- Message,
- smoothStream,
- StreamData,
- streamText,
- TextPart,
-} from "ai";
-import {
- chatThreads,
- documents,
- chunk,
- spaces as spaceInDb,
- spaceAccess,
- type Space,
- contentToSpace,
-} from "@supermemory/db/schema";
-import { google, openai } from "../providers";
-import { randomId } from "@supermemory/shared";
-import {
- and,
- cosineDistance,
- database,
- desc,
- eq,
- exists,
- inArray,
- or,
- sql,
-} from "@supermemory/db";
-import { typeDecider } from "../utils/typeDecider";
-import { isErr, Ok } from "../errors/results";
-import { fromHono } from "chanfana";
-
-const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>())
- .post(
- "/chat",
- zValidator(
- "json",
- z.object({
- messages: z.array(z.any()).min(1, "At least one message is required"),
- threadId: z.string().optional(),
- })
- ),
- async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const { messages, threadId } = await c.req.valid("json");
-
- const unfilteredCoreMessages = convertToCoreMessages(
- (messages as Message[])
- .filter((m) => m.content.length > 0)
- .map((m) => ({
- ...m,
- content:
- m.content +
- (m.annotations
- ? `<context>${JSON.stringify(m.annotations)}</context>`
- : ""),
- experimental_attachments:
- m.experimental_attachments?.length &&
- m.experimental_attachments?.length > 0
- ? m.experimental_attachments
- : (m.data as { files: [] })?.files,
- }))
- );
-
- const coreMessages = unfilteredCoreMessages.filter(
- (message) => message.content.length > 0
- );
-
- const db = database(c.env.HYPERDRIVE.connectionString);
- const { initLogger, wrapAISDKModel } = await import("braintrust");
-
- // Initialize clients and loggers
- const logger = initLogger({
- projectName: "supermemory",
- apiKey: c.env.BRAINTRUST_API_KEY,
- });
-
- const googleClient = wrapAISDKModel(
- openai(c.env).chat("gpt-4o-mini-2024-07-18")
- );
-
- // Get last user message and generate embedding in parallel with thread creation
- let lastUserMessage = coreMessages.findLast((i) => i.role === "user");
- const queryText =
- typeof lastUserMessage?.content === "string"
- ? lastUserMessage.content
- : lastUserMessage?.content.map((c) => (c as TextPart).text).join("");
-
- if (!queryText || queryText.length === 0) {
- return c.json({ error: "Empty query" }, 400);
- }
-
- // Run embedding generation and thread creation in parallel
- const [{ data: embedding }, thread] = await Promise.all([
- c.env.AI.run("@cf/baai/bge-base-en-v1.5", { text: queryText }),
- !threadId
- ? db
- .insert(chatThreads)
- .values({
- firstMessage: messages[0].content,
- userId: user.id,
- uuid: randomId(),
- messages: coreMessages,
- })
- .returning()
- : null,
- ]);
-
- const threadUuid = threadId || thread?.[0].uuid;
-
- if (!embedding) {
- return c.json({ error: "Failed to generate embedding" }, 500);
- }
-
- try {
- const data = new StreamData();
-
- // Pre-compute the vector similarity expression
- const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embedding[0])}::vector)`;
-
- // Get matching chunks with document info
- const matchingChunks = await db
- .select({
- chunkId: chunk.id,
- documentId: chunk.documentId,
- textContent: chunk.textContent,
- orderInDocument: chunk.orderInDocument,
- metadata: chunk.metadata,
- similarity: vectorSimilarity,
- // Document fields
- docId: documents.id,
- docUuid: documents.uuid,
- docContent: documents.content,
- docType: documents.type,
- docUrl: documents.url,
- docTitle: documents.title,
- docDescription: documents.description,
- docOgImage: documents.ogImage,
- })
- .from(chunk)
- .innerJoin(documents, eq(chunk.documentId, documents.id))
- .where(
- and(eq(documents.userId, user.id), sql`${vectorSimilarity} > 0.3`)
- )
- .orderBy(desc(vectorSimilarity))
- .limit(25);
-
- // Get unique document IDs from matching chunks
- const uniqueDocIds = [
- ...new Set(matchingChunks.map((c) => c.documentId)),
- ];
-
- // Fetch all chunks for these documents to get context
- const contextChunks = await db
- .select({
- id: chunk.id,
- documentId: chunk.documentId,
- textContent: chunk.textContent,
- orderInDocument: chunk.orderInDocument,
- metadata: chunk.metadata,
- })
- .from(chunk)
- .where(inArray(chunk.documentId, uniqueDocIds))
- .orderBy(chunk.documentId, chunk.orderInDocument);
-
- // Group chunks by document
- const chunksByDocument = new Map<number, typeof contextChunks>();
- for (const chunk of contextChunks) {
- const docChunks = chunksByDocument.get(chunk.documentId) || [];
- docChunks.push(chunk);
- chunksByDocument.set(chunk.documentId, docChunks);
- }
-
- // Create context with surrounding chunks
- const contextualResults = matchingChunks.map((match) => {
- const docChunks = chunksByDocument.get(match.documentId) || [];
- const matchIndex = docChunks.findIndex((c) => c.id === match.chunkId);
-
- // Get surrounding chunks (2 before and 2 after for more context)
- const start = Math.max(0, matchIndex - 2);
- const end = Math.min(docChunks.length, matchIndex + 3);
- const relevantChunks = docChunks.slice(start, end);
-
- return {
- id: match.docId,
- title: match.docTitle,
- description: match.docDescription,
- url: match.docUrl,
- type: match.docType,
- content: relevantChunks.map((c) => c.textContent).join("\n"),
- similarity: Number(match.similarity.toFixed(4)),
- chunks: relevantChunks.map((c) => ({
- id: c.id,
- content: c.textContent,
- orderInDocument: c.orderInDocument,
- metadata: c.metadata,
- isMatch: c.id === match.chunkId,
- })),
- };
- });
-
- // Sort by similarity and take top results
- const topResults = contextualResults
- .sort((a, b) => b.similarity - a.similarity)
- .slice(0, 10);
-
- data.appendMessageAnnotation(topResults);
-
- if (lastUserMessage) {
- lastUserMessage.content =
- typeof lastUserMessage.content === "string"
- ? lastUserMessage.content +
- `<context>${JSON.stringify(topResults)}</context>`
- : [
- ...lastUserMessage.content,
- {
- type: "text",
- text: `<context>${JSON.stringify(topResults)}</context>`,
- },
- ];
- coreMessages[coreMessages.length - 1] = lastUserMessage;
- }
-
- const result = await streamText({
- model: googleClient,
- experimental_providerMetadata: {
- metadata: { userId: user.id, chatThreadId: threadUuid ?? "" },
- },
- experimental_transform: smoothStream(),
- messages: [
- {
- role: "system",
- content: `You are a knowledgeable and helpful AI assistant for Supermemory, a personal knowledge management app. Your goal is to help users explore and understand their saved content.
-
- Key guidelines:
- - Maintain natural, engaging conversation while seamlessly incorporating relevant information from the user's knowledge base
- - Build on previous messages in the conversation to provide coherent, contextual responses
- - Be concise but thorough, focusing on the most relevant details
- - When appropriate, make connections between different pieces of information
- - If you're not sure about something, be honest and say so
- - Feel free to ask clarifying questions if needed
- - Make it easy to read for the user!
- - Use markdown to format your responses but dont make your answers TOO long include any and all information related to context in the response if possible.
- - only talk about the context if the right answer is in the context.
- - You are Supermemory - a personal knowledge management app.
- - You are built by Dhravya Shah (https://dhravya.dev). And the supermemory team (https://supermemory.ai).
-
- The user's saved content is provided in <context> tags. Use this information naturally without explicitly referencing it.`,
- },
- ...coreMessages,
- ],
- async onFinish(completion) {
- try {
- if (lastUserMessage) {
- lastUserMessage.content =
- typeof lastUserMessage.content === "string"
- ? lastUserMessage.content.replace(
- /<context>[\s\S]*?<\/context>/g,
- ""
- )
- : lastUserMessage.content.filter(
- (part) =>
- !(
- part.type === "text" &&
- part.text.startsWith("<context>")
- )
- );
- coreMessages[coreMessages.length - 1] = lastUserMessage;
- }
-
- const newMessages = [
- ...coreMessages,
- {
- role: "assistant",
- content:
- completion.text +
- `<context>[${JSON.stringify(topResults)}]</context>`,
- },
- ];
-
- if (threadUuid) {
- await db
- .update(chatThreads)
- .set({ messages: newMessages })
- .where(eq(chatThreads.uuid, threadUuid));
- }
- } catch (error) {
- console.error("Failed to update thread:", error);
- } finally {
- await data.close();
- }
- },
- });
-
- return result.toDataStreamResponse({
- headers: {
- "Supermemory-Thread-Uuid": threadUuid ?? "",
- "Content-Type": "text/x-unknown",
- "content-encoding": "identity",
- "transfer-encoding": "chunked",
- },
- data,
- });
- } catch (error) {
- console.error("Chat error:", error);
-
- if (error instanceof InvalidPromptError) {
- return c.json(
- { error: "Invalid prompt - please rephrase your message" },
- 400
- );
- }
-
- if ((error as AISDKError).cause === "ECONNREFUSED") {
- return c.json({ error: "Database connection failed" }, 503);
- }
-
- return c.json(
- {
- error: "An unexpected error occurred",
- details:
- c.env.NODE_ENV === "development"
- ? (error as Error).message
- : undefined,
- },
- 500
- );
- }
- }
- )
- .get(
- "/chat/:threadUuid",
- zValidator(
- "param",
- z.object({
- threadUuid: z.string(),
- })
- ),
- async (c) => {
- const user = c.get("user");
- const threadUuid = c.req.valid("param").threadUuid;
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const chatHistory = await database(c.env.HYPERDRIVE.connectionString)
- .select()
- .from(chatThreads)
- .where(
- and(eq(chatThreads.userId, user.id), eq(chatThreads.uuid, threadUuid))
- );
-
- if (!chatHistory) {
- return c.json({ error: "Chat history not found" }, 404);
- }
-
- return c.json({ chatHistory: chatHistory[0].messages });
- }
- )
- .get("/recommended-questions", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
- const recentDocuments = await db
- .select()
- .from(documents)
- .where(eq(documents.userId, user.id))
- .orderBy(sql`RANDOM()`)
- .limit(3);
-
- if (recentDocuments.length === 0) {
- return c.json({ questions: [] });
- }
-
- const cachedQuestions = await c.env.MD_CACHE.get(`rq:${user.id}`);
- if (cachedQuestions) {
- const randomQuestions = JSON.parse(cachedQuestions)
- .questions.sort(() => Math.random() - 0.5)
- .slice(0, 3);
- return c.json({ questions: randomQuestions });
- }
-
- const { initLogger, wrapAISDKModel } = await import("braintrust");
-
- const logger = initLogger({
- projectName: "supermemory",
- apiKey: c.env.BRAINTRUST_API_KEY,
- });
-
- const model = wrapAISDKModel(openai(c.env).chat("gpt-4o-mini-2024-07-18"));
-
- const aiResponse = await generateObject({
- schema: z.object({
- questions: recommendedQuestionsSchema,
- }),
- model,
- prompt: `You are helping generate search suggestions for a user's personal knowledge base.
-
- Generate 10 specific, focused questions based on the following documents. The questions should:
- - Be highly specific and reference concrete details from the documents
- - Focus on key insights, important facts, or interesting relationships
- - Be phrased naturally, as if the user is trying to recall something they learned
- - Be 2-8 words long
- - Not include generic questions that could apply to any document
-
- Documents:
- ${recentDocuments.map((d) => d.content).join("\n\n")}`,
- });
-
- await c.env.MD_CACHE.put(
- `rq:${user.id}`,
- JSON.stringify(aiResponse.object),
- {
- // 3 hours
- expirationTtl: 10800,
- }
- );
-
- const questions = aiResponse.object.questions;
- const randomQuestions = questions
- .sort(() => Math.random() - 0.5)
- .slice(0, 3);
- return c.json({ questions: randomQuestions });
- })
- .get("/suggested-learnings", async (c) => {
- const user = await c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- // Try to get from cache first
- const cacheKey = `sl:${user.id}`;
- const cached = await c.env.MD_CACHE.get(cacheKey);
- if (cached) {
- return c.json({
- suggestedLearnings: JSON.parse(cached) as { [x: string]: string },
- });
- }
-
- // Get random sample of user's documents that are well-distributed
- const recentLearnings = await db
- .select()
- .from(documents)
- .where(eq(documents.userId, user.id))
- // Use random() to get distributed sample
- .orderBy(sql`RANDOM()`)
- .limit(7);
-
- if (recentLearnings.length === 0 || recentLearnings.length < 3) {
- return c.json({ suggestedLearnings: [] });
- }
-
- // for each document, i want to generate a list of
- // small markdown tweet-like things that the user might want to remember about
- const suggestedLearnings = await Promise.all(
- recentLearnings.map(async (document) => {
- const model = openai(c.env).chat("gpt-4o-mini-2024-07-18");
- const prompt = `Generate a concise topic recall card for this document. The card should:
- - Have a clear title that captures the main topic
- - based on when the document was saved, include a brief "Last (week/month/...), you saved notes on..." intro (do something different every time.)
- - List 2-3 key points from the content in simple bullet points
- - Keep the total length under 280 characters
- - Focus on the core concepts worth remembering
- - Be in markdown format
- - if you don't have a good suggestions, just skip that document.
-
- Here's the document content: ${document.content}, Document saved at: ${document.updatedAt}, Today's date: ${new Date().toLocaleDateString()}`;
- const response = await generateObject({
- schema: z.object({
- [document.uuid]: z.string(),
- }),
- // @ts-ignore
- model,
- prompt,
- });
- return response.object;
- })
- );
-
- // Cache the results
- await c.env.MD_CACHE.put(cacheKey, JSON.stringify(suggestedLearnings), {
- expirationTtl: 60 * 60 * 3, // 3 hours
- });
-
- return c.json({ suggestedLearnings });
- })
- .post(
- "/search",
- zValidator(
- "json",
- z.object({
- query: z.string().min(1, "Search query cannot be empty"),
- limit: z.number().min(1).max(50).default(10),
- threshold: z.number().min(0).max(1).default(0),
- spaces: z.array(z.string()).optional(),
- })
- ),
- async (c) => {
- const { query, limit, threshold, spaces } = c.req.valid("json");
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- if (spaces && spaces.length > 0) {
- const spaceDetails = await Promise.all(
- spaces.map(async (spaceId) => {
- const space = await db
- .select()
- .from(spaceInDb)
- .where(eq(spaceInDb.uuid, spaceId))
- .limit(1);
-
- if (space.length === 0) return null;
- return {
- id: space[0].id,
- ownerId: space[0].ownerId,
- uuid: space[0].uuid,
- };
- })
- );
-
- // Filter out any null values and check permissions
- const validSpaces = spaceDetails.filter(
- (s): s is NonNullable<typeof s> => s !== null
- );
- const unauthorized = validSpaces.filter((s) => s.ownerId !== user.id);
-
- if (unauthorized.length > 0) {
- return c.json(
- {
- error: "Space permission denied",
- details: unauthorized.map((s) => s.uuid).join(", "),
- },
- 403
- );
- }
-
- // Replace UUIDs with IDs for the database query
- spaces.splice(
- 0,
- spaces.length,
- ...validSpaces.map((s) => s.id.toString())
- );
- }
-
- try {
- // Generate embedding for the search query
- const embeddings = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
- text: query,
- });
-
- if (!embeddings.data) {
- return c.json(
- { error: "Failed to generate embedding for query" },
- 500
- );
- }
-
- // Pre-compute the vector similarity expression
- const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector)`;
-
- // Get matching chunks
- const results = await db
- .select({
- chunkId: chunk.id,
- documentId: chunk.documentId,
- textContent: chunk.textContent,
- orderInDocument: chunk.orderInDocument,
- metadata: chunk.metadata,
- similarity: vectorSimilarity,
- // Document fields
- docUuid: documents.uuid,
- docContent: documents.content,
- docType: documents.type,
- docUrl: documents.url,
- docTitle: documents.title,
- docCreatedAt: documents.createdAt,
- docUpdatedAt: documents.updatedAt,
- docUserId: documents.userId,
- docDescription: documents.description,
- docOgImage: documents.ogImage,
- })
- .from(chunk)
- .innerJoin(documents, eq(chunk.documentId, documents.id))
- .where(
- and(
- eq(documents.userId, user.id),
- sql`${vectorSimilarity} > ${threshold}`,
- ...(spaces && spaces.length > 0
- ? [
- exists(
- db
- .select()
- .from(contentToSpace)
- .where(
- and(
- eq(contentToSpace.contentId, documents.id),
- inArray(
- contentToSpace.spaceId,
- spaces.map((spaceId) => Number(spaceId))
- )
- )
- )
- ),
- ]
- : [])
- )
- )
- .orderBy(desc(vectorSimilarity))
- .limit(limit);
-
- // Group results by document and take the best matching chunk
- const documentResults = new Map<number, (typeof results)[0]>();
- for (const result of results) {
- const existingResult = documentResults.get(result.documentId);
- if (
- !existingResult ||
- result.similarity > existingResult.similarity
- ) {
- documentResults.set(result.documentId, result);
- }
- }
-
- // Convert back to array and format response
- const finalResults = Array.from(documentResults.values())
- .sort((a, b) => b.similarity - a.similarity)
- .map((r) => ({
- id: r.documentId,
- uuid: r.docUuid,
- content: r.docContent,
- type: r.docType,
- url: r.docUrl,
- title: r.docTitle,
- createdAt: r.docCreatedAt,
- updatedAt: r.docUpdatedAt,
- userId: r.docUserId,
- description: r.docDescription,
- ogImage: r.docOgImage,
- similarity: Number(r.similarity.toFixed(4)),
- matchingChunk: {
- id: r.chunkId,
- content: r.textContent,
- orderInDocument: r.orderInDocument,
- metadata: r.metadata,
- },
- }));
-
- return c.json({ results: finalResults });
- } catch (error) {
- console.error("[Search Error]", error);
- return c.json(
- {
- error: "Search failed",
- details:
- c.env.NODE_ENV === "development"
- ? (error as Error).message
- : undefined,
- },
- 500
- );
- }
- }
- )
- .post(
- "/add",
- zValidator(
- "json",
- z.object({
- content: z.string().min(1, "Content cannot be empty"),
- spaces: z.array(z.string()).max(5).optional(),
- id: z.string().optional(),
- // any type of metadata. must be json serializable.
- metadata: z.any().optional(),
- images: z.array(z.string()).optional(),
- prefetched: z
- .object({
- contentToVectorize: z.string(),
- contentToSave: z.string(),
- title: z.string(),
- type: z.string(),
- description: z.string().optional(),
- ogImage: z.string().optional(),
- })
- .optional(),
- })
- ),
- async (c) => {
- const body = c.req.valid("json");
-
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "You must be logged in to add content" }, 401);
- }
-
- // Do not perform seperate check for prefetched type, breaks tweet addition from extension
- const type = typeDecider(body.content);
-
- if (isErr(type)) {
- return c.json(
- {
- error: "Could not determine content type",
- details: type.error.message,
- },
- 400
- );
- }
-
- if (type.value === "page" && !body.content.startsWith("http")) {
- body.content = `https://${body.content}`;
- }
-
- const uuid = body.id ?? randomId();
- const contentId = `add-${user.id}-${uuid}`;
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- // Calculate document hash early to enable faster duplicate detection
- const content = body.prefetched?.contentToVectorize || body.content;
- const encoder = new TextEncoder();
- const data = encoder.encode(content);
- const hashBuffer = await crypto.subtle.digest("SHA-256", data);
- const hashArray = Array.from(new Uint8Array(hashBuffer));
- const documentHash = hashArray
- .map((b) => b.toString(16).padStart(2, "0"))
- .join("");
-
- // Check for duplicates using hash
- const existingDocs = await db
- .select()
- .from(documents)
- .where(
- and(
- eq(documents.userId, user.id),
- or(
- eq(documents.contentHash, documentHash),
- and(
- eq(documents.type, type.value),
- or(eq(documents.url, body.content), eq(documents.raw, content))
- )
- )
- )
- );
-
- if (existingDocs.length > 0) {
- return c.json(
- { error: `That ${type.value} already exists in your memories` },
- 409
- );
- }
-
- // Check space permissions if spaces are specified
- if (body.spaces && body.spaces.length > 0) {
- const spacePermissions = await Promise.all(
- body.spaces.map(async (spaceId) => {
- const space = await db
- .select()
- .from(spaceInDb)
- .where(eq(spaceInDb.uuid, spaceId))
- .limit(1);
-
- if (!space[0]) {
- // create a new space for the user with the given id
- const newSpace = await db
- .insert(spaceInDb)
- .values({
- uuid: spaceId,
- name: spaceId,
- isPublic: false,
- ownerId: user.id,
- createdAt: new Date(),
- updatedAt: new Date(),
- })
- .returning();
-
- return {
- spaceId: newSpace[0].id,
- allowed: true,
- error: null,
- };
- }
-
- const spaceData = space[0] as Space;
-
- // If public space, only owner can add content
- if (spaceData.isPublic) {
- return {
- spaceId,
- allowed: spaceData.ownerId === user.id,
- error:
- spaceData.ownerId !== user.id
- ? "Only space owner can add to public spaces"
- : null,
- };
- }
-
- // For private spaces, check if user is owner or in allowlist
- const spaceAccessCheck = await db
- .select()
- .from(spaceAccess)
- .where(
- and(
- eq(spaceAccess.spaceId, spaceData.id),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "accepted")
- )
- )
- .limit(1);
-
- return {
- spaceId,
- allowed:
- spaceData.ownerId === user.id || spaceAccessCheck.length > 0,
- error:
- spaceData.ownerId !== user.id && !spaceAccessCheck.length
- ? "Not authorized to add to this space"
- : null,
- };
- })
- );
-
- const unauthorized = spacePermissions.filter((p) => !p.allowed);
- if (unauthorized.length > 0) {
- return c.json(
- {
- error: "Space permission denied",
- details: unauthorized
- .map((u) => `${u.spaceId}: ${u.error}`)
- .join(", "),
- },
- 403
- );
- }
- }
-
- const isExternalContent = [
- "page",
- "tweet",
- "document",
- "notion",
- ].includes(type.value);
- const indexedUrl = isExternalContent
- ? body.content
- : `https://supermemory.ai/content/${contentId}`;
-
- // Insert into documents table with hash
- try {
- await db.insert(documents).values({
- uuid: contentId,
- userId: user.id,
- type: type.value,
- url: indexedUrl,
- title: body.prefetched?.title,
- description: body.prefetched?.description,
- ogImage: body.prefetched?.ogImage,
- contentHash: documentHash,
- raw:
- (body.prefetched ?? body.content) + "\n\n" + body.spaces?.join(" "),
- metadata: body.metadata,
- });
-
- await c.env.CONTENT_WORKFLOW.create({
- params: {
- userId: user.id,
- content: body.content,
- spaces: body.spaces,
- type: type.value,
- uuid: contentId,
- url: indexedUrl,
- prefetched: body.prefetched,
- },
- id: contentId,
- });
-
- return c.json({
- message: "Content added successfully",
- id: contentId,
- type: type.value,
- });
- } catch (error) {
- console.error("[Add Content Error]", error);
- return c.json({ error: "Failed to process content" }, 500);
- }
- }
- )
- .post(
- "/batch-add",
- zValidator(
- "json",
- z
- .object({
- urls: z
- .array(z.string())
- .min(1, "At least one URL is required")
- .optional(),
- contents: z
- .array(
- z.object({
- content: z.string(),
- title: z.string(),
- type: z.string(),
- })
- )
- .optional(),
- spaces: z.array(z.string()).max(5).optional(),
- })
- .refine((data) => data.urls || data.contents, {
- message: "Either urls or contents must be provided",
- })
- ),
- async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const { urls, contents, spaces } = await c.req.valid("json");
-
- // Check space permissions if spaces are specified
- if (spaces && spaces.length > 0) {
- const db = database(c.env.HYPERDRIVE.connectionString);
- const spacePermissions = await Promise.all(
- spaces.map(async (spaceId) => {
- const space = await db
- .select()
- .from(spaceInDb)
- .where(eq(spaceInDb.uuid, spaceId))
- .limit(1);
-
- if (!space[0]) {
- return { spaceId, allowed: false, error: "Space not found" };
- }
-
- const spaceData = space[0] as Space;
-
- // If public space, only owner can add content
- if (spaceData.isPublic) {
- return {
- spaceId,
- allowed: spaceData.ownerId === user.id,
- error:
- spaceData.ownerId !== user.id
- ? "Only space owner can add to public spaces"
- : null,
- };
- }
-
- // For private spaces, check if user is owner or in allowlist
- const spaceAccessCheck = await db
- .select()
- .from(spaceAccess)
- .where(
- and(
- eq(spaceAccess.spaceId, spaceData.id),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "accepted")
- )
- )
- .limit(1);
-
- return {
- spaceId,
- allowed:
- spaceData.ownerId === user.id || spaceAccessCheck.length > 0,
- error:
- spaceData.ownerId !== user.id && !spaceAccessCheck.length
- ? "Not authorized to add to this space"
- : null,
- };
- })
- );
-
- const unauthorized = spacePermissions.filter((p) => !p.allowed);
- if (unauthorized.length > 0) {
- return c.json(
- {
- error: "Space permission denied",
- details: unauthorized
- .map((u) => `${u.spaceId}: ${u.error}`)
- .join(", "),
- },
- 403
- );
- }
- }
-
- // Create a new ReadableStream for progress updates
- const encoder = new TextEncoder();
- const stream = new ReadableStream({
- async start(controller) {
- const db = database(c.env.HYPERDRIVE.connectionString);
- const items = urls || contents || [];
- const total = items.length;
- let processed = 0;
- let failed = 0;
- let succeeded = 0;
-
- const sendMessage = (data: any) => {
- const message = encoder.encode(`data: ${JSON.stringify(data)}\n\n`);
- controller.enqueue(message);
- };
-
- for (const item of items) {
- try {
- processed++;
-
- // Handle both URL and markdown content
- const content = typeof item === "string" ? item : item.content;
- const title = typeof item === "string" ? null : item.title;
- const type =
- typeof item === "string" ? typeDecider(item) : Ok(item.type);
-
- if (isErr(type)) {
- failed++;
- sendMessage({
- progress: Math.round((processed / total) * 100),
- status: "error",
- url: typeof item === "string" ? item : item.title,
- error: type.error.message,
- processed,
- total,
- succeeded,
- failed,
- });
- continue;
- }
-
- // Calculate document hash
- const encoder = new TextEncoder();
- const data = encoder.encode(content);
- const hashBuffer = await crypto.subtle.digest("SHA-256", data);
- const hashArray = Array.from(new Uint8Array(hashBuffer));
- const documentHash = hashArray
- .map((b) => b.toString(16).padStart(2, "0"))
- .join("");
-
- // Check for duplicates
- const existingDocs = await db
- .select()
- .from(documents)
- .where(
- and(
- eq(documents.userId, user.id),
- or(
- eq(documents.contentHash, documentHash),
- eq(documents.raw, content)
- )
- )
- );
-
- if (existingDocs.length > 0) {
- failed++;
- sendMessage({
- progress: Math.round((processed / total) * 100),
- status: "duplicate",
- title: typeof item === "string" ? item : item.title,
- processed,
- total,
- succeeded,
- failed,
- });
- continue;
- }
-
- const contentId = `add-${user.id}-${randomId()}`;
- const isExternalContent =
- typeof item === "string" &&
- ["page", "tweet", "document", "notion"].includes(type.value);
- const url = isExternalContent
- ? content
- : `https://supermemory.ai/content/${contentId}`;
-
- // Insert into documents table
- await db.insert(documents).values({
- uuid: contentId,
- userId: user.id,
- type: type.value,
- url,
- title,
- contentHash: documentHash,
- raw: content + "\n\n" + spaces?.join(" "),
- });
-
- // Create workflow for processing
- await c.env.CONTENT_WORKFLOW.create({
- params: {
- userId: user.id,
- content,
- spaces,
- type: type.value,
- uuid: contentId,
- url,
- },
- id: contentId,
- });
-
- succeeded++;
- sendMessage({
- progress: Math.round((processed / total) * 100),
- status: "success",
- title: typeof item === "string" ? item : item.title,
- processed,
- total,
- succeeded,
- failed,
- });
-
- // Add a small delay between requests
- await new Promise((resolve) => setTimeout(resolve, 100));
- } catch (error) {
- failed++;
- sendMessage({
- progress: Math.round((processed / total) * 100),
- status: "error",
- title: typeof item === "string" ? item : item.title,
- error: error instanceof Error ? error.message : "Unknown error",
- processed,
- total,
- succeeded,
- failed,
- });
- }
- }
-
- sendMessage({
- progress: 100,
- status: "complete",
- processed,
- total,
- succeeded,
- failed,
- });
- controller.close();
- },
- });
-
- return new Response(stream, {
- headers: {
- "Content-Type": "text/event-stream",
- "Cache-Control": "no-cache",
- Connection: "keep-alive",
- },
- });
- }
- );
-
-export default actions;
diff --git a/apps/backend/src/routes/integrations.ts b/apps/backend/src/routes/integrations.ts
deleted file mode 100644
index 9aa369ea..00000000
--- a/apps/backend/src/routes/integrations.ts
+++ /dev/null
@@ -1,180 +0,0 @@
-import { Hono } from "hono";
-import type { Env, Variables } from "../types";
-import { getDecryptedKV } from "encrypt-workers-kv";
-import { getAllNotionPageContents } from "../utils/notion";
-import { and, eq, or } from "@supermemory/db";
-import { documents } from "@supermemory/db/schema";
-import { database } from "@supermemory/db";
-import { fromHono } from "chanfana";
-
-const integrations = fromHono(
- new Hono<{ Variables: Variables; Bindings: Env }>()).get("/notion/import", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- // Create SSE stream
- const stream = new TransformStream();
- const writer = stream.writable.getWriter();
- const encoder = new TextEncoder();
-
- // Create response first so client gets headers immediately
- const response = new Response(stream.readable, {
- headers: {
- "Content-Type": "text/event-stream",
- "Cache-Control": "no-cache",
- Connection: "keep-alive",
- // Required CORS headers for SSE
- "Access-Control-Allow-Origin": "*",
- "Access-Control-Allow-Credentials": "true",
- },
- });
-
- const sendMessage = async (data: Record<string, any>) => {
- // Proper SSE format requires "data: " prefix and double newline
- const formattedData = `data: ${JSON.stringify(data)}\n\n`;
- await writer.write(encoder.encode(formattedData));
- };
-
- // Start processing in background
- c.executionCtx.waitUntil(
- (async () => {
- try {
- // Send initial heartbeat
- await sendMessage({ type: "connected" });
-
- const token = await getDecryptedKV(
- c.env.ENCRYPTED_TOKENS,
- `${user.uuid}-notion`,
- `${c.env.WORKOS_COOKIE_PASSWORD}-${user.uuid}`
- );
-
- const stringToken = new TextDecoder().decode(token);
- if (!stringToken) {
- await sendMessage({ type: "error", error: "No token found" });
- await writer.close();
- return;
- }
-
- await sendMessage({ type: "progress", progress: 5 });
-
- // Fetch pages with progress updates
- const pages = await getAllNotionPageContents(
- stringToken,
- async (progress) => {
- // Map progress from 0-100 to 5-40 range
- const scaledProgress = Math.floor(5 + (progress * 35) / 100);
- await sendMessage({ type: "progress", progress: scaledProgress });
- }
- );
-
- await sendMessage({ type: "progress", progress: 40 });
-
- let processed = 0;
- const totalPages = pages.length;
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- for (const page of pages) {
- // Calculate document hash for duplicate detection
- const encoder = new TextEncoder();
- const data = encoder.encode(page.content);
- const hashBuffer = await crypto.subtle.digest("SHA-256", data);
- const hashArray = Array.from(new Uint8Array(hashBuffer));
- const documentHash = hashArray
- .map((b) => b.toString(16).padStart(2, "0"))
- .join("");
-
- // Check for duplicates using hash
- const existingDocs = await db
- .select()
- .from(documents)
- .where(
- and(
- eq(documents.userId, user.id),
- or(
- eq(documents.contentHash, documentHash),
- and(
- eq(documents.type, "notion"),
- or(
- eq(documents.url, page.url),
- eq(documents.raw, page.content)
- )
- )
- )
- )
- );
-
- if (existingDocs.length > 0) {
- await sendMessage({
- type: "warning",
- message: `Skipping duplicate page: ${page.title}`,
- });
- processed++;
- continue;
- }
-
- // Insert into documents table first
- try {
- await db.insert(documents).values({
- uuid: page.id,
- userId: user.id,
- type: "notion",
- url: page.url,
- title: page.title,
- contentHash: documentHash,
- raw: page.content,
- });
-
- await c.env.CONTENT_WORKFLOW.create({
- params: {
- userId: user.id,
- content: page.url,
- spaces: [],
- type: "notion",
- uuid: page.id,
- url: page.url,
- prefetched: {
- contentToVectorize: page.content,
- contentToSave: page.content,
- title: page.title,
- type: "notion",
- },
- createdAt: page.createdAt,
- },
- id: `${user.id}-${page.id}-${new Date().getTime()}`,
- });
-
- processed++;
- const progress = 50 + Math.floor((processed / totalPages) * 50);
- await sendMessage({ type: "progress", progress, page: page.title });
- } catch (error) {
- console.error(`Failed to process page ${page.title}:`, error);
- await sendMessage({
- type: "warning",
- message: `Failed to process page: ${page.title}`,
- error: error instanceof Error ? error.message : "Unknown error",
- });
- processed++;
- continue;
- }
- }
-
- await sendMessage({ type: "complete", progress: 100 });
- await writer.close();
- } catch (error) {
- console.error("Import error:", error);
- await sendMessage({
- type: "error",
- error: error instanceof Error ? error.message : "Import failed",
- });
- await writer.close();
- }
- })()
- );
-
- return response;
-});
-
-export default integrations;
diff --git a/apps/backend/src/routes/memories.ts b/apps/backend/src/routes/memories.ts
deleted file mode 100644
index f79da8ce..00000000
--- a/apps/backend/src/routes/memories.ts
+++ /dev/null
@@ -1,293 +0,0 @@
-import { Hono } from "hono";
-import { Variables, Env } from "../types";
-import { zValidator } from "@hono/zod-validator";
-import { z } from "zod";
-import {
- documents,
- spaces,
- spaceAccess,
- contentToSpace,
-} from "@supermemory/db/schema";
-import { and, database, desc, eq, or, sql, isNull } from "@supermemory/db";
-import { fromHono } from "chanfana";
-
-const memories = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>())
- .get(
- "/",
- zValidator(
- "query",
- z.object({
- start: z.string().default("0").transform(Number),
- count: z.string().default("10").transform(Number),
- spaceId: z.string().optional(),
- })
- ),
- async (c) => {
- const { start, count, spaceId } = c.req.valid("query");
- const user = c.get("user");
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- console.log("Fetching memories with spaceId", spaceId);
- console.log(c.req.url);
- // If spaceId provided, verify access
- if (spaceId) {
- console.log("SpaceID provided", spaceId);
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId.split("---")[0]))
- .limit(1);
-
- if (!space[0]) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- // Check access - allow if public, user owns the space, or has access through spaceAccess
- if (!space[0].isPublic && !user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- if (!space[0].isPublic && space[0].ownerId !== user?.id) {
- const access = await db
- .select()
- .from(spaceAccess)
- .where(
- and(
- eq(spaceAccess.spaceId, space[0].id),
- eq(spaceAccess.userEmail, user?.email ?? ""),
- eq(spaceAccess.status, "accepted")
- )
- )
- .limit(1);
-
- if (access.length === 0) {
- console.log("Unauthorized access to", c.req.url);
- return c.json({ error: "Unauthorized" }, 401);
- }
- }
-
- // Get documents for space
- const [items, totalResult] = await Promise.all([
- db
- .select({
- documents,
- })
- .from(documents)
- .innerJoin(
- contentToSpace,
- eq(documents.id, contentToSpace.contentId)
- )
- .where(eq(contentToSpace.spaceId, space[0].id))
- .orderBy(desc(documents.createdAt))
- .limit(count)
- .offset(start),
- db
- .select({
- total: sql<number>`count(*)`.as("total"),
- })
- .from(documents)
- .innerJoin(
- contentToSpace,
- eq(documents.id, contentToSpace.contentId)
- )
- .where(eq(contentToSpace.spaceId, space[0].id)),
- ]);
-
- const total = totalResult[0]?.total ?? 0;
-
- return c.json({
- items: items.map((item) => ({
- ...item.documents,
- id: item.documents.uuid,
- })),
- total,
- });
- }
-
- // Regular user memories endpoint
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- // Set cache control headers for 5 minutes
- c.header("Cache-Control", "private, max-age=300");
- c.header("Vary", "Cookie"); // Vary on Cookie since response depends on user
-
- // Generate ETag based on user ID, start, count
- const etag = `"${user.id}-${start}-${count}"`;
- c.header("ETag", etag);
-
- // Check if client has matching ETag
- const ifNoneMatch = c.req.header("If-None-Match");
- if (ifNoneMatch === etag) {
- return new Response(null, { status: 304 });
- }
-
- const [items, [{ total }]] = await Promise.all([
- db
- .select({
- documents: documents,
- })
- .from(documents)
- .leftJoin(contentToSpace, eq(documents.id, contentToSpace.contentId))
- .where(
- and(eq(documents.userId, user.id), isNull(contentToSpace.contentId))
- )
- .orderBy(desc(documents.createdAt))
- .limit(count)
- .offset(start),
- db
- .select({
- total: sql<number>`count(*)`.as("total"),
- })
- .from(documents)
- .leftJoin(contentToSpace, eq(documents.id, contentToSpace.contentId))
- .where(
- and(eq(documents.userId, user.id), isNull(contentToSpace.contentId))
- ),
- ]);
-
- return c.json({
- items: items.map((item) => ({
- ...item.documents,
- id: item.documents.uuid,
- })),
- total,
- });
- }
- )
- .get("/:id", zValidator("param", z.object({ id: z.string() })), async (c) => {
- const { id } = c.req.valid("param");
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const memory = await database(c.env.HYPERDRIVE.connectionString)
- .select()
- .from(documents)
- .where(and(eq(documents.uuid, id), eq(documents.userId, user.id)))
- .limit(1);
-
- return c.json(memory[0]);
- })
- .delete(
- "/:id",
- zValidator("param", z.object({ id: z.string() })),
- async (c) => {
- const { id } = c.req.valid("param");
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- let documentIdNum;
-
- try {
- documentIdNum = Number(id);
- } catch (e) {
- documentIdNum = null;
- }
-
- const doc = await db
- .select()
- .from(documents)
- .where(
- and(
- documentIdNum
- ? or(eq(documents.uuid, id), eq(documents.id, documentIdNum))
- : eq(documents.uuid, id),
- eq(documents.userId, user.id)
- )
- )
- .limit(1);
-
- if (!doc[0]) {
- return c.json({ error: "Document not found" }, 404);
- }
-
- const [document, contentToSpacei] = await Promise.all([
- db
- .delete(documents)
- .where(and(eq(documents.uuid, id), eq(documents.userId, user.id))),
- db
- .delete(contentToSpace)
- .where(eq(contentToSpace.contentId, doc[0].id)),
- ]);
-
- return c.json({ success: true });
- }
- )
- .post(
- "/batch-delete",
- zValidator(
- "json",
- z.object({
- ids: z.array(z.string()),
- })
- ),
- async (c) => {
- const { ids } = c.req.valid("json");
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- try {
- // First get all valid documents that belong to the user
- const docs = await db
- .select()
- .from(documents)
- .where(
- and(
- eq(documents.userId, user.id),
- sql`${documents.uuid} = ANY(ARRAY[${ids}]::text[])`
- )
- );
-
- if (docs.length === 0) {
- return c.json({ error: "No valid documents found" }, 404);
- }
-
- const docIds = docs.map((doc) => doc.id);
-
- // Delete in transaction to ensure consistency
- await db.transaction(async (tx) => {
- await Promise.all([
- // Delete document entries
- tx
- .delete(documents)
- .where(
- and(
- eq(documents.userId, user.id),
- sql`${documents.uuid} = ANY(ARRAY[${ids}]::text[])`
- )
- ),
- // Delete space connections
- tx
- .delete(contentToSpace)
- .where(
- sql`${contentToSpace.contentId} = ANY(ARRAY[${docIds}]::int[])`
- ),
- ]);
- });
-
- return c.json({
- success: true,
- deletedCount: docs.length,
- });
- } catch (error) {
- console.error("Batch delete error:", error);
- return c.json({ error: "Failed to delete documents" }, 500);
- }
- }
- );
-
-export default memories;
diff --git a/apps/backend/src/routes/spaces.ts b/apps/backend/src/routes/spaces.ts
deleted file mode 100644
index e004ce78..00000000
--- a/apps/backend/src/routes/spaces.ts
+++ /dev/null
@@ -1,709 +0,0 @@
-import { Hono } from "hono";
-import { Env, Variables } from "../types";
-import { and, database, desc, eq, isNotNull, or, sql } from "@supermemory/db";
-import {
- contentToSpace,
- documents,
- savedSpaces,
- spaceAccess,
- spaces,
- users,
-} from "@supermemory/db/schema";
-import { zValidator } from "@hono/zod-validator";
-import { z } from "zod";
-import { randomId } from "@supermemory/shared";
-import { fromHono } from "chanfana";
-
-const spacesRoute = fromHono(
- new Hono<{ Variables: Variables; Bindings: Env }>())
- .get("/", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const [allSpaces, savedSpacesList, spaceOwners] = await Promise.all([
- db
- .select({
- id: spaces.id,
- uuid: spaces.uuid,
- name: sql<string>`REGEXP_REPLACE(${spaces.name}, E'[\\n\\r]+', ' ', 'g')`.as(
- "name"
- ),
- ownerId: spaces.ownerId,
- isPublic: spaces.isPublic,
- createdAt: spaces.createdAt,
- accessType: spaceAccess.accessType,
- })
- .from(spaces)
- .leftJoin(
- spaceAccess,
- and(
- eq(spaces.id, spaceAccess.spaceId),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "accepted")
- )
- )
- .where(or(eq(spaces.ownerId, user.id), isNotNull(spaceAccess.spaceId)))
- .orderBy(desc(spaces.createdAt)),
-
- db
- .select({
- spaceId: savedSpaces.spaceId,
- })
- .from(savedSpaces)
- .where(eq(savedSpaces.userId, user.id)),
-
- db
- .select({
- id: users.id,
- uuid: users.uuid,
- name: users.firstName,
- email: users.email,
- profileImage: users.profilePictureUrl,
- })
- .from(users)
- .innerJoin(spaces, eq(spaces.ownerId, users.id)),
- ]);
-
- const savedSpaceIds = new Set(savedSpacesList.map((s) => s.spaceId));
- const ownerMap = new Map(spaceOwners.map((owner) => [owner.id, owner]));
-
- const spacesWithDetails = allSpaces.map((space) => {
- const isOwner = space.ownerId === user.id;
- const owner = ownerMap.get(space.ownerId);
-
- return {
- ...space,
- favorited: savedSpaceIds.has(space.id),
- permissions: {
- canRead: space.isPublic || isOwner || space.accessType != null,
- canEdit: isOwner || space.accessType === "edit",
- isOwner,
- },
- owner: isOwner
- ? null
- : {
- id: owner?.uuid,
- name: owner?.name,
- email: owner?.email,
- profileImage: owner?.profileImage,
- },
- };
- });
-
- return c.json({ spaces: spacesWithDetails });
- })
- .get("/:spaceId", async (c) => {
- const user = c.get("user");
- const spaceId = c.req.param("spaceId");
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (!space[0]) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- // For public spaces, anyone can read but only owner can edit
- if (space[0].isPublic) {
- const canEdit = user?.id === space[0].ownerId;
- return c.json({
- ...space[0],
- permissions: {
- canRead: true,
- canEdit,
- isOwner: space[0].ownerId === user?.id,
- isPublic: space[0].isPublic,
- },
- });
- }
-
- // For private spaces, require authentication
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- // Check if user is owner or has access via spaceAccess
- const isOwner = space[0].ownerId === user.id;
- let canEdit = isOwner;
- if (!isOwner) {
- const spaceAccessCheck = await db
- .select()
- .from(spaceAccess)
- .where(
- and(
- eq(spaceAccess.spaceId, space[0].id),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "accepted")
- )
- )
- .limit(1);
-
- if (spaceAccessCheck.length === 0) {
- return c.json({ error: "Access denied" }, 403);
- }
-
- canEdit = spaceAccessCheck[0].accessType === "edit";
- }
-
- return c.json({
- ...space[0],
- permissions: {
- canRead: true,
- canEdit,
- isOwner: space[0].ownerId === user.id,
- isPublic: space[0].isPublic,
- },
- });
- })
- .post(
- "/create",
- zValidator(
- "json",
- z.object({
- spaceName: z.string().min(1, "Space name cannot be empty").max(100),
- isPublic: z.boolean(), // keep this explicit please
- })
- ),
- async (c) => {
- const body = c.req.valid("json");
- const user = c.get("user");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- if (body.spaceName.trim() === "<HOME>") {
- return c.json({ error: "Cannot create space with name <HOME>" }, 400);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
- const uuid = randomId();
-
- try {
- const space = await db
- .insert(spaces)
- .values({
- name: body.spaceName.trim(),
- ownerId: user.id,
- uuid,
- isPublic: body.isPublic,
- createdAt: new Date(),
- })
- .returning();
-
- return c.json({
- message: "Space created successfully",
- space: {
- uuid: space[0].uuid,
- name: space[0].name,
- ownerId: space[0].ownerId,
- isPublic: space[0].isPublic,
- createdAt: space[0].createdAt,
- },
- });
- } catch (error) {
- console.error("[Space Creation Error]", error);
- return c.json({ error: "Failed to create space" }, 500);
- }
- }
- )
- .post(
- "/:spaceId/favorite",
- zValidator(
- "param",
- z.object({
- spaceId: z.string(),
- })
- ),
- async (c) => {
- const user = c.get("user");
- const { spaceId } = c.req.valid("param");
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- // Get space details
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (!space[0]) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- // Check if it's user's own space
- if (space[0].ownerId === user.id) {
- return c.json({ error: "Cannot favorite your own space" }, 400);
- }
-
- try {
- await db.insert(savedSpaces).values({
- userId: user.id,
- spaceId: space[0].id,
- savedAt: new Date(),
- });
-
- return c.json({ message: "Space favorited successfully" });
- } catch (error) {
- if (
- error instanceof Error &&
- error.message.includes("saved_spaces_user_space_idx")
- ) {
- // Space is already favorited
- return c.json({ message: "Space already favorited" });
- }
- throw error;
- }
- }
- )
- .post(
- "/moveContent",
- zValidator(
- "json",
- z.object({
- spaceId: z.string(),
- documentId: z.string(),
- })
- ),
- async (c) => {
- const body = c.req.valid("json");
- const user = c.get("user");
- const { spaceId, documentId } = body;
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- try {
- await db.transaction(async (tx) => {
- // If moving to <HOME>, just remove all space connections
- if (spaceId === "<HOME>") {
- const doc = await tx
- .select()
- .from(documents)
- .where(eq(documents.uuid, documentId))
- .limit(1);
-
- if (!doc[0]) {
- return c.json({ error: "Document not found" }, 404);
- }
-
- await tx
- .delete(contentToSpace)
- .where(eq(contentToSpace.contentId, doc[0].id));
- return;
- }
-
- // Get space and document, verify space ownership
- const results = (
- await tx
- .select({
- spaceId: spaces.id,
- documentId: documents.id,
- ownerId: spaces.ownerId,
- spaceName: spaces.name,
- })
- .from(spaces)
- .innerJoin(
- documents,
- and(eq(spaces.uuid, spaceId), eq(documents.uuid, documentId))
- )
- .limit(1)
- )[0];
-
- if (!results) {
- return c.json({ error: "Space or document not found" }, 404);
- }
-
- if (results.ownerId !== user.id) {
- return c.json(
- { error: "Not authorized to modify this space" },
- 403
- );
- }
-
- // Delete existing space relations for this document
- await tx
- .delete(contentToSpace)
- .where(eq(contentToSpace.contentId, results.documentId));
-
- // Add new space relation
- await tx.insert(contentToSpace).values({
- contentId: results.documentId,
- spaceId: results.spaceId,
- });
- });
-
- return c.json({ success: true, spaceId });
- } catch (e) {
- console.error("Failed to move content to space:", e);
- return c.json(
- {
- error: "Failed to move content to space",
- details: e instanceof Error ? e.message : "Unknown error",
- },
- 500
- );
- }
- }
- )
- .post(
- "/addContent",
- zValidator(
- "json",
- z.object({
- spaceId: z.string(),
- documentId: z.string(),
- })
- ),
- async (c) => {
- const body = c.req.valid("json");
- const user = c.get("user");
- const { spaceId, documentId } = body;
-
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- try {
- await db.transaction(async (tx) => {
- // If adding to <HOME>, just remove all space connections
- if (spaceId === "<HOME>") {
- const doc = await tx
- .select()
- .from(documents)
- .where(eq(documents.uuid, documentId))
- .limit(1);
-
- if (!doc[0]) {
- return c.json({ error: "Document not found" }, 404);
- }
-
- await tx
- .delete(contentToSpace)
- .where(eq(contentToSpace.contentId, doc[0].id));
- return;
- }
-
- // Get space and document, verify space ownership
- const results = (
- await tx
- .select({
- spaceId: spaces.id,
- documentId: documents.id,
- ownerId: spaces.ownerId,
- })
- .from(spaces)
- .innerJoin(
- documents,
- and(eq(spaces.uuid, spaceId), eq(documents.uuid, documentId))
- )
- .limit(1)
- )[0];
-
- if (!results) {
- return c.json({ error: "Space or document not found" }, 404);
- }
-
- if (results.ownerId !== user.id) {
- return c.json(
- { error: "Not authorized to modify this space" },
- 403
- );
- }
-
- // Check if mapping already exists to avoid duplicates
- const existing = await tx
- .select()
- .from(contentToSpace)
- .where(
- and(
- eq(contentToSpace.contentId, results.documentId),
- eq(contentToSpace.spaceId, results.spaceId)
- )
- )
- .limit(1);
-
- if (existing.length > 0) {
- return c.json({ error: "Content already exists in space" }, 409);
- }
-
- await tx.insert(contentToSpace).values({
- contentId: results.documentId,
- spaceId: results.spaceId,
- });
- });
-
- return c.json({ success: true });
- } catch (e) {
- console.error("Failed to add content to space:", e);
- return c.json(
- {
- error: "Failed to add content to space",
- details: e instanceof Error ? e.message : "Unknown error",
- },
- 500
- );
- }
- }
- )
- .post(
- "/:spaceId/invite",
- zValidator(
- "json",
- z.object({
- email: z.string().email("Invalid email address"),
- accessType: z.enum(["read", "edit"], {
- errorMap: () => ({
- message: "Access type must be either 'read' or 'edit'",
- }),
- }),
- })
- ),
- async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const { spaceId } = c.req.param();
- const { email, accessType } = c.req.valid("json");
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- // Check if space exists and user has permission to invite
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (space.length === 0) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- // Only space owner can invite others
- if (space[0].ownerId !== user.id) {
- return c.json({ error: "Only space owner can invite users" }, 403);
- }
-
- // Check if invite already exists
- const existingInvite = await db
- .select()
- .from(spaceAccess)
- .where(
- and(
- eq(spaceAccess.spaceId, space[0].id),
- eq(spaceAccess.userEmail, email)
- )
- )
- .limit(1);
-
- if (existingInvite.length > 0) {
- return c.json(
- { error: "User already has access or pending invite" },
- 400
- );
- }
-
- // Create invite
- await db.insert(spaceAccess).values({
- spaceId: space[0].id,
- userEmail: email,
- accessType,
- status: "pending",
- });
-
- // TODO: send email to the user
-
- return c.json({ success: true });
- }
- )
- .get(
- "/:spaceId/invitation",
- zValidator("param", z.object({ spaceId: z.string() })),
- async (c) => {
- const { spaceId } = c.req.valid("param");
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (space.length === 0) {
- console.log("Space not found", spaceId);
- return c.json({ error: "Space not found" }, 401);
- }
-
- // Get pending invitation with access type
- const invitation = await db
- .select()
- .from(spaceAccess)
- .where(
- and(
- eq(spaceAccess.spaceId, space[0].id),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "pending")
- )
- )
- .limit(1);
-
- if (invitation.length === 0) {
- return c.json({ error: "No pending invitation found" }, 403);
- }
-
- return c.json({
- space: space[0],
- accessType: invitation[0].accessType,
- });
- }
- )
- .post(
- "/invites/:action",
- zValidator(
- "json",
- z.object({
- spaceId: z.string().min(5, "Invalid space ID format"),
- })
- ),
- async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const { action } = c.req.param();
- if (action !== "accept" && action !== "reject") {
- return c.json({ error: "Invalid action" }, 400);
- }
-
- const { spaceId } = c.req.valid("json");
- console.log("space ID", spaceId);
-
- // Get space
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (space.length === 0) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- // Update invite status
- const updateResult = await db
- .update(spaceAccess)
- .set({ status: action === "accept" ? "accepted" : "rejected" })
- .where(
- and(
- eq(spaceAccess.spaceId, space[0].id),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "pending")
- )
- );
-
- if (updateResult.length === 0) {
- return c.json({ error: "No pending invite found" }, 404);
- }
-
- return c.json({ success: true });
- }
- )
- .patch(
- "/:spaceId",
- zValidator(
- "json",
- z.object({
- name: z.string().min(1, "Space name cannot be empty").max(100),
- })
- ),
- async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const { spaceId } = c.req.param();
- const { name } = c.req.valid("json");
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- // Get space and verify ownership
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (space.length === 0) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- if (space[0].ownerId !== user.id) {
- return c.json({ error: "Only space owner can edit space name" }, 403);
- }
-
- if (name.trim() === "<HOME>") {
- return c.json({ error: "Cannot use reserved name <HOME>" }, 400);
- }
-
- // Update space name
- await db
- .update(spaces)
- .set({ name: name.trim() })
- .where(eq(spaces.uuid, spaceId));
-
- return c.json({ success: true, name: name.trim() });
- }
- )
- .delete("/:spaceId", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const { spaceId } = c.req.param();
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const space = await db
- .select()
- .from(spaces)
- .where(eq(spaces.uuid, spaceId))
- .limit(1);
-
- if (space.length === 0) {
- return c.json({ error: "Space not found" }, 404);
- }
-
- await db.delete(spaces).where(eq(spaces.uuid, spaceId));
-
- return c.json({ success: true });
- });
-
-export default spacesRoute;
diff --git a/apps/backend/src/routes/user.ts b/apps/backend/src/routes/user.ts
deleted file mode 100644
index 4905989f..00000000
--- a/apps/backend/src/routes/user.ts
+++ /dev/null
@@ -1,193 +0,0 @@
-import { Hono } from "hono";
-import { Env, Variables } from "../types";
-import { and, database, desc, eq, isNotNull, or, sql } from "@supermemory/db";
-import {
- chatThreads,
- savedSpaces,
- spaceAccess,
- spaces,
- users,
-} from "@supermemory/db/schema";
-import { decryptApiKey, getApiKey } from "../auth";
-import { DurableObjectStore } from "@hono-rate-limiter/cloudflare";
-import { rateLimiter } from "hono-rate-limiter";
-import { fromHono } from "chanfana";
-
-const user = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>())
- .get("/", (c) => {
- return c.json(c.get("user"));
- })
- .get("/spaces", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const [allSpaces, savedSpacesList, spaceOwners] = await Promise.all([
- db
- .select({
- id: spaces.id,
- uuid: spaces.uuid,
- name: sql<string>`REGEXP_REPLACE(${spaces.name}, E'[\\n\\r]+', ' ', 'g')`.as(
- "name"
- ),
- ownerId: spaces.ownerId,
- isPublic: spaces.isPublic,
- createdAt: spaces.createdAt,
- accessType: spaceAccess.accessType,
- })
- .from(spaces)
- .leftJoin(
- spaceAccess,
- and(
- eq(spaces.id, spaceAccess.spaceId),
- eq(spaceAccess.userEmail, user.email),
- eq(spaceAccess.status, "accepted")
- )
- )
- .where(or(eq(spaces.ownerId, user.id), isNotNull(spaceAccess.spaceId)))
- .orderBy(desc(spaces.createdAt)),
-
- db
- .select({
- spaceId: savedSpaces.spaceId,
- })
- .from(savedSpaces)
- .where(eq(savedSpaces.userId, user.id)),
-
- db
- .select({
- id: users.id,
- uuid: users.uuid,
- name: users.firstName,
- email: users.email,
- profileImage: users.profilePictureUrl,
- })
- .from(users)
- .innerJoin(spaces, eq(spaces.ownerId, users.id)),
- ]);
-
- const savedSpaceIds = new Set(savedSpacesList.map((s) => s.spaceId));
- const ownerMap = new Map(spaceOwners.map((owner) => [owner.id, owner]));
-
- const spacesWithDetails = allSpaces.map((space) => {
- const isOwner = space.ownerId === user.id;
- const owner = ownerMap.get(space.ownerId);
-
- return {
- ...space,
- favorited: savedSpaceIds.has(space.id),
- permissions: {
- canRead: space.isPublic || isOwner || space.accessType != null,
- canEdit: isOwner || space.accessType === "edit",
- isOwner,
- },
- owner: isOwner
- ? null
- : {
- id: owner?.uuid,
- name: owner?.name,
- email: owner?.email,
- profileImage: owner?.profileImage,
- },
- };
- });
-
- return c.json({ spaces: spacesWithDetails });
- })
- .get("/history", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const history = await database(c.env.HYPERDRIVE.connectionString)
- .select()
- .from(chatThreads)
- .where(eq(chatThreads.userId, user.id))
- .orderBy(desc(chatThreads.createdAt))
- .limit(10);
-
- return c.json({ history });
- })
- .get("/invitations", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- const invitations = await db
- .select({
- spaceAccess: spaceAccess,
- spaceUuid: spaces.uuid,
- spaceName: spaces.name,
- })
- .from(spaceAccess)
- .innerJoin(spaces, eq(spaceAccess.spaceId, spaces.id))
- .where(eq(spaceAccess.userEmail, user.email))
- .limit(100);
-
- return c.json({ invitations });
- })
- .get("/key", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- // we need user.id and user.lastApiKeyGeneratedAt
- const lastApiKeyGeneratedAt = user.lastApiKeyGeneratedAt?.getTime();
- if (!lastApiKeyGeneratedAt) {
- return c.json({ error: "No API key generated" }, 400);
- }
-
- const key = await getApiKey(user.uuid, lastApiKeyGeneratedAt.toString(), c);
-
- const decrypted = await decryptApiKey(key, c);
- return c.json({ key, decrypted });
- })
- .post("/update", async (c) => {
- const user = c.get("user");
- if (!user) {
- return c.json({ error: "Unauthorized" }, 401);
- }
-
- const body = await c.req.json();
-
- // Only allow updating specific safe fields
- const allowedFields = {
- firstName: true,
- lastName: true,
- profilePictureUrl: true,
- hasOnboarded: true,
- };
-
- const updateData: Record<string, unknown> = {};
- for (const [key, value] of Object.entries(body)) {
- if (allowedFields[key as keyof typeof allowedFields]) {
- updateData[key] = value;
- }
- }
-
- if (Object.keys(updateData).length === 0) {
- return c.json({ error: "No valid fields to update" }, 400);
- }
-
- const db = database(c.env.HYPERDRIVE.connectionString);
-
- await db
- .update(users)
- .set({
- ...updateData,
- updatedAt: new Date(),
- })
- .where(eq(users.id, user.id));
-
- return c.json({ success: true });
- });
-
-export default user;
diff --git a/apps/backend/src/types.ts b/apps/backend/src/types.ts
deleted file mode 100644
index 0323a9e6..00000000
--- a/apps/backend/src/types.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-import { DurableObjectRateLimiter } from "@hono-rate-limiter/cloudflare";
-import { Session } from "@supermemory/authkit-remix-cloudflare/src/interfaces";
-import { User } from "@supermemory/db/schema";
-import { z } from "zod";
-
-export type Variables = {
- user: User | null;
- session: Session | null;
-};
-
-export type WorkflowParams = {
- userId: number;
- content: string;
- spaces?: string[];
- type: string;
- uuid: string;
- url?: string;
- prefetched?: {
- contentToVectorize: string;
- contentToSave: string;
- title: string;
- type: string;
- description: string;
- ogImage: string;
- };
- createdAt: string;
-};
-
-export type Env = {
- WORKOS_API_KEY: string;
- WORKOS_CLIENT_ID: string;
- WORKOS_COOKIE_PASSWORD: string;
- DATABASE_URL: string;
- CONTENT_WORKFLOW: Workflow;
- GEMINI_API_KEY: string;
- NODE_ENV: string;
- OPEN_AI_API_KEY: string;
- BRAINTRUST_API_KEY: string;
- RESEND_API_KEY: string;
- TURNSTILE_SECRET_KEY: string;
-
- MD_CACHE: KVNamespace;
- HYPERDRIVE: Hyperdrive;
- EMAIL_LIMITER: {
- limit: (params: { key: string }) => Promise<{ success: boolean }>;
- };
- ENCRYPTED_TOKENS: KVNamespace;
- RATE_LIMITER: DurableObjectNamespace<DurableObjectRateLimiter>;
- AI: Ai
-};
-
-export type JobData = {
- content: string;
- spaces?: Array<string>;
- user: number;
- type: string;
-};
-
-type BaseChunks = {
- type: "tweet" | "page" | "note" | "image";
-};
-
-export type PageOrNoteChunks = BaseChunks & {
- type: "page" | "note";
- chunks: string[];
-};
-
-export type Metadata = {
- media?: Array<string>;
- links?: Array<string>; // idk how ideal this is will figure out after plate js thing
-};
-
-export type SpaceStatus = {
- type: "inviting" | "invited" | "pending" | "accepted";
-};
-
-export const recommendedQuestionsSchema = z
- .array(z.string().max(200))
- .length(10);
diff --git a/apps/backend/src/utils/chunkers.ts b/apps/backend/src/utils/chunkers.ts
deleted file mode 100644
index ce345d29..00000000
--- a/apps/backend/src/utils/chunkers.ts
+++ /dev/null
@@ -1,116 +0,0 @@
-import nlp from "compromise";
-
-export default function chunkText(
- text: string,
- maxChunkSize: number,
- overlap: number = 0.2
-): string[] {
- // Pre-process text to remove excessive whitespace
- text = text.replace(/\s+/g, " ").trim();
-
- const sentences = nlp(text).sentences().out("array");
- const chunks: {
- text: string;
- start: number;
- end: number;
- metadata?: {
- position: string;
- context?: string;
- };
- }[] = [];
-
- let currentChunk: string[] = [];
- let currentSize = 0;
-
- for (let i = 0; i < sentences.length; i++) {
- const sentence = sentences[i].trim();
-
- // Skip empty sentences
- if (!sentence) continue;
-
- // If a single sentence is longer than maxChunkSize, split it
- if (sentence.length > maxChunkSize) {
- if (currentChunk.length > 0) {
- chunks.push({
- text: currentChunk.join(" "),
- start: i - currentChunk.length,
- end: i - 1,
- metadata: {
- position: `${i - currentChunk.length}-${i - 1}`,
- context: currentChunk[0].substring(0, 100), // First 100 chars for context
- },
- });
- currentChunk = [];
- currentSize = 0;
- }
-
- // Split long sentence into smaller chunks
- const words = sentence.split(" ");
- let tempChunk: string[] = [];
-
- for (const word of words) {
- if (tempChunk.join(" ").length + word.length > maxChunkSize) {
- chunks.push({
- text: tempChunk.join(" "),
- start: i,
- end: i,
- metadata: {
- position: `${i}`,
- context: "Split sentence",
- },
- });
- tempChunk = [];
- }
- tempChunk.push(word);
- }
-
- if (tempChunk.length > 0) {
- chunks.push({
- text: tempChunk.join(" "),
- start: i,
- end: i,
- metadata: {
- position: `${i}`,
- context: "Split sentence remainder",
- },
- });
- }
- continue;
- }
-
- currentChunk.push(sentence);
- currentSize += sentence.length;
-
- if (currentSize >= maxChunkSize) {
- const overlapSize = Math.floor(currentChunk.length * overlap);
- chunks.push({
- text: currentChunk.join(" "),
- start: i - currentChunk.length + 1,
- end: i,
- metadata: {
- position: `${i - currentChunk.length + 1}-${i}`,
- context: currentChunk[0].substring(0, 100),
- },
- });
-
- // Keep overlap sentences for next chunk
- currentChunk = currentChunk.slice(-overlapSize);
- currentSize = currentChunk.reduce((sum, s) => sum + s.length, 0);
- }
- }
-
- // Handle remaining sentences
- if (currentChunk.length > 0) {
- chunks.push({
- text: currentChunk.join(" "),
- start: sentences.length - currentChunk.length,
- end: sentences.length - 1,
- metadata: {
- position: `${sentences.length - currentChunk.length}-${sentences.length - 1}`,
- context: currentChunk[0].substring(0, 100),
- },
- });
- }
-
- return chunks.map((chunk) => chunk.text);
-}
diff --git a/apps/backend/src/utils/cipher.ts b/apps/backend/src/utils/cipher.ts
deleted file mode 100644
index 3ba2e905..00000000
--- a/apps/backend/src/utils/cipher.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-async function encrypt(data: string, key: string): Promise<string> {
- try {
- const encoder = new TextEncoder();
- const encodedData = encoder.encode(data);
-
- const baseForIv = encoder.encode(data + key);
- const ivHash = await crypto.subtle.digest('SHA-256', baseForIv);
- const iv = new Uint8Array(ivHash).slice(0, 12);
-
- const cryptoKey = await crypto.subtle.importKey(
- "raw",
- encoder.encode(key),
- { name: "AES-GCM", length: 256 },
- false,
- ["encrypt", "decrypt"]
- );
-
- const encrypted = await crypto.subtle.encrypt(
- { name: "AES-GCM", iv: new Uint8Array(iv).buffer as ArrayBuffer },
- cryptoKey,
- encodedData
- );
-
- const combined = new Uint8Array([...iv, ...new Uint8Array(encrypted)]);
-
- // Convert to base64 safely
- const base64 = Buffer.from(combined).toString("base64");
-
- // Make URL-safe
- return base64.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
- } catch (err) {
- console.error("Encryption error:", err);
- throw err;
- }
-}
-
-async function decrypt(encryptedData: string, key: string): Promise<string> {
- try {
- // Restore base64 padding and convert URL-safe chars
- const base64 = encryptedData
- .replace(/-/g, "+")
- .replace(/_/g, "/")
- .padEnd(
- encryptedData.length + ((4 - (encryptedData.length % 4)) % 4),
- "="
- );
-
- // Use Buffer for safer base64 decoding
- const combined = Buffer.from(base64, "base64");
- const combinedArray = new Uint8Array(combined);
-
- // Extract the IV that was used for encryption
- const iv = combinedArray.slice(0, 12);
- const encrypted = combinedArray.slice(12);
-
- // Import the same key used for encryption
- const cryptoKey = await crypto.subtle.importKey(
- "raw",
- new TextEncoder().encode(key),
- { name: "AES-GCM", length: 256 },
- false,
- ["encrypt", "decrypt"]
- );
-
- // Use the extracted IV and key to decrypt
- const decrypted = await crypto.subtle.decrypt(
- { name: "AES-GCM", iv: new Uint8Array(iv).buffer as ArrayBuffer },
- cryptoKey,
- encrypted.buffer as ArrayBuffer
- );
-
- return new TextDecoder().decode(decrypted);
- } catch (err) {
- console.error("Decryption error:", err);
- throw err;
- }
-}
-
-export { encrypt, decrypt }; \ No newline at end of file
diff --git a/apps/backend/src/utils/extractDocumentContent.ts b/apps/backend/src/utils/extractDocumentContent.ts
deleted file mode 100644
index 8b7d9256..00000000
--- a/apps/backend/src/utils/extractDocumentContent.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import * as mammoth from "mammoth";
-import { NonRetryableError } from "cloudflare:workflows";
-import { resolvePDFJS } from 'pdfjs-serverless';
-
-interface DocumentContent {
- content: string;
- error?: string;
-}
-
-export const extractDocumentContent = async (
- url: string
-): Promise<DocumentContent> => {
- try {
- const fileExtension = url.split(".").pop()?.toLowerCase();
-
- if (!fileExtension) {
- throw new Error("Invalid file URL");
- }
-
- console.log("file", fileExtension);
-
- switch (fileExtension) {
- case "pdf":
- return await extractPdfContent(url);
- case "md":
- case "txt":
- return await extractTextContent(url);
- case "doc":
- case "docx":
- return await extractWordContent(url);
- default:
- throw new NonRetryableError(`Unsupported file type: ${fileExtension}`);
- }
- } catch (error) {
- return {
- content: "",
- error: error instanceof Error ? error.message : "Unknown error occurred",
- };
- }
-};
-
-async function extractPdfContent(url: string): Promise<DocumentContent> {
- try {
- const response = await fetch(url);
- const arrayBuffer = await response.arrayBuffer();
-
- // Initialize PDF.js with serverless compatibility
- const { getDocument } = await resolvePDFJS();
-
- // Load the PDF document
- const pdf = await getDocument({
- data: arrayBuffer,
- useSystemFonts: true,
- }).promise;
-
- let fullText = "";
-
- // Extract text from each page
- for (let i = 1; i <= pdf.numPages; i++) {
- const page = await pdf.getPage(i);
- const textContent = await page.getTextContent();
- const pageText = textContent.items.map((item: any) => item.str).join(" ");
- fullText += pageText + "\n";
- }
-
- return { content: fullText };
- } catch (error) {
- console.error("Error extracting PDF content:", error);
- return {
- content: "",
- error: error instanceof Error ? error.message : "Failed to extract PDF content",
- };
- }
-}
-
-async function extractTextContent(url: string): Promise<DocumentContent> {
- const response = await fetch(url);
- const text = await response.text();
- return { content: text };
-}
-
-async function extractWordContent(url: string): Promise<DocumentContent> {
- const response = await fetch(url);
- const arrayBuffer = await response.arrayBuffer();
- const result = await mammoth.extractRawText({ arrayBuffer });
- return { content: result.value };
-}
diff --git a/apps/backend/src/utils/extractor.ts b/apps/backend/src/utils/extractor.ts
deleted file mode 100644
index f033f8e1..00000000
--- a/apps/backend/src/utils/extractor.ts
+++ /dev/null
@@ -1,50 +0,0 @@
-import { Env } from "../types";
-
-export const extractPageContent = async (content: string, env: Env) => {
- const resp = await fetch(`https://r.jina.ai/${content}`);
-
- if (!resp.ok) {
- throw new Error(
- `Failed to fetch ${content}: ${resp.statusText}` + (await resp.text())
- );
- }
-
- const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}`);
-
- if (!metadataResp.ok) {
- throw new Error(
- `Failed to fetch metadata for ${content}: ${metadataResp.statusText}` +
- (await metadataResp.text())
- );
- }
-
- const metadata = await metadataResp.json() as {
- title?: string;
- description?: string;
- image?: string;
- favicon?: string;
- };
-
- const responseText = await resp.text();
-
- try {
- const json: {
- contentToVectorize: string;
- contentToSave: string;
- title?: string;
- description?: string;
- image?: string;
- favicon?: string;
- } = {
- contentToSave: responseText,
- contentToVectorize: responseText,
- title: metadata.title,
- description: metadata.description,
- image: metadata.image,
- favicon: metadata.favicon,
- };
- return json;
- } catch (e) {
- throw new Error(`Failed to parse JSON from ${content}: ${e}`);
- }
-};
diff --git a/apps/backend/src/utils/fetchers.ts b/apps/backend/src/utils/fetchers.ts
deleted file mode 100644
index 2329f48a..00000000
--- a/apps/backend/src/utils/fetchers.ts
+++ /dev/null
@@ -1,143 +0,0 @@
-import { WorkflowStep } from "cloudflare:workers";
-import { isErr, Ok } from "../errors/results";
-import { typeDecider } from "./typeDecider";
-import { Env, WorkflowParams } from "../types";
-import { unrollTweets } from "./tweetsToThreads";
-import { Tweet } from "react-tweet/api";
-import { NonRetryableError } from "cloudflare:workflows";
-import { extractPageContent } from "./extractor";
-import { extractDocumentContent } from "./extractDocumentContent";
-
-export const fetchContent = async (
- params: WorkflowParams,
- env: Env,
- step: WorkflowStep
-) => {
- const type = typeDecider(params.content);
-
- if (isErr(type)) {
- throw type.error;
- }
-
- switch (type.value) {
- case "page":
- const pageContent = await step?.do(
- "extract page content",
- async () => await extractPageContent(params.content, env)
- );
- return {
- ...pageContent,
- type: "page",
- };
-
- case "tweet":
- const tweetUrl = new URL(params.content);
- tweetUrl.search = ""; // Remove all search params
- const tweetId = tweetUrl.pathname.split("/").pop();
-
- const rawBaseTweetContent = await step.do(
- "extract tweet content",
- async () => {
- const url = `https://cdn.syndication.twimg.com/tweet-result?id=${tweetId}&lang=en&features=tfw_timeline_list%3A%3Btfw_follower_count_sunset%3Atrue%3Btfw_tweet_edit_backend%3Aon%3Btfw_refsrc_session%3Aon%3Btfw_fosnr_soft_interventions_enabled%3Aon%3Btfw_show_birdwatch_pivots_enabled%3Aon%3Btfw_show_business_verified_badge%3Aon%3Btfw_duplicate_scribes_to_settings%3Aon%3Btfw_use_profile_image_shape_enabled%3Aon%3Btfw_show_blue_verified_badge%3Aon%3Btfw_legacy_timeline_sunset%3Atrue%3Btfw_show_gov_verified_badge%3Aon%3Btfw_show_business_affiliate_badge%3Aon%3Btfw_tweet_edit_frontend%3Aon&token=4c2mmul6mnh`;
-
- const resp = await fetch(url, {
- headers: {
- "User-Agent":
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
- Accept: "application/json",
- "Accept-Language": "en-US,en;q=0.5",
- "Accept-Encoding": "gzip, deflate, br",
- Connection: "keep-alive",
- "Upgrade-Insecure-Requests": "1",
- "Cache-Control": "max-age=0",
- TE: "Trailers",
- },
- });
-
- const data = (await resp.json()) as Tweet;
- return data;
- }
- );
-
- let tweetContent: {
- text: string;
- metadata: {
- media?: string[] | undefined;
- links?: string[] | undefined;
- };
- raw: string;
- };
- const unrolledTweetContent = {
- value: [rawBaseTweetContent],
- };
- if (true) {
- console.error("Can't get thread, reverting back to single tweet");
- tweetContent = {
- text: rawBaseTweetContent.text,
- metadata: {
- media: [
- ...(rawBaseTweetContent.photos?.map((url) => url.expandedUrl) ??
- []),
- ...(rawBaseTweetContent.video?.variants[0].src ?? []),
- ],
- },
- raw: `<raw>${JSON.stringify(rawBaseTweetContent)}</raw>`,
- };
- } else {
- tweetContent = {
- text: unrolledTweetContent.value
- .map((tweet) => tweet.text)
- .join("\n"),
- metadata: {
- media: unrolledTweetContent.value.flatMap((tweet) => [
- ...tweet.videos,
- ...tweet.images,
- ]),
- links: unrolledTweetContent.value.flatMap((tweet) => tweet.links),
- },
- raw: `<raw>${JSON.stringify(rawBaseTweetContent)}</raw>`,
- };
- }
-
- // make it the same type as the page content
- const pageContentType: Awaited<ReturnType<typeof extractPageContent>> & {
- type: string;
- } = {
- contentToVectorize:
- tweetContent.text +
- "\n\nMetadata for this tweet:\n" +
- JSON.stringify(tweetContent.metadata) +
- "\n\nRaw tweet data:\n" +
- tweetContent.raw,
- contentToSave: tweetContent.raw,
- title: "",
- description: JSON.stringify(tweetContent.metadata),
- image: "",
- favicon: "",
- type: "tweet",
- };
- return pageContentType;
- case "note":
- const noteContent = {
- contentToVectorize: params.content,
- // TODO: different when using platejs
- contentToSave: params.content,
- // title is the first 30 characters of the first line
- title: params.content.split("\n")[0].slice(0, 30),
- type: "note",
- };
- return noteContent;
- case "document":
- const documentContent = await step.do(
- "extract document content",
- async () => await extractDocumentContent(params.content)
- );
- return {
- contentToVectorize: documentContent.content,
- contentToSave: documentContent.content,
- type: "document",
- };
- default:
- throw new NonRetryableError("Unknown content type");
- }
-};
diff --git a/apps/backend/src/utils/notion.ts b/apps/backend/src/utils/notion.ts
deleted file mode 100644
index ebe559e1..00000000
--- a/apps/backend/src/utils/notion.ts
+++ /dev/null
@@ -1,239 +0,0 @@
-interface PageContent {
- content: string;
- url: string;
- title: string;
- id: string;
- createdAt: string;
-}
-
-interface NotionBlock {
- type: string;
- [key: string]: any;
-}
-
-interface SearchResponse {
- results: {
- id: string;
- object: string;
- url: string;
- created_time: string;
- properties: {
- title?: {
- title: Array<{
- plain_text: string;
- }>;
- };
- Name?: {
- title: Array<{
- plain_text: string;
- }>;
- };
- };
- }[];
- next_cursor: string | undefined;
- has_more: boolean;
-}
-
-interface BlockResponse {
- results: NotionBlock[];
- next_cursor: string | undefined;
- has_more: boolean;
-}
-
-export const getAllNotionPageContents = async (
- token: string,
- onProgress: (progress: number) => Promise<void>
-): Promise<PageContent[]> => {
- const pages: PageContent[] = [];
- const NOTION_API_VERSION = "2022-06-28";
- const BASE_URL = "https://api.notion.com/v1";
- const MAX_RETRIES = 3;
- const BATCH_SIZE = 10; // Number of concurrent requests
- const PAGE_SIZE = 100; // Number of pages to fetch per search request
-
- const delay = (ms: number) =>
- new Promise((resolve) => setTimeout(resolve, ms));
-
- const notionFetch = async (
- endpoint: string,
- options: RequestInit = {},
- retries = 0
- ): Promise<any> => {
- try {
- const response = await fetch(`${BASE_URL}${endpoint}`, {
- ...options,
- headers: {
- Authorization: `Bearer ${token}`,
- "Notion-Version": NOTION_API_VERSION,
- "Content-Type": "application/json",
- ...((options.headers || {}) as Record<string, string>),
- },
- });
-
- if (response.status === 429) {
- // Rate limit error
- const retryAfter = parseInt(response.headers.get("Retry-After") || "5");
- if (retries < MAX_RETRIES) {
- await delay(retryAfter * 1000);
- return notionFetch(endpoint, options, retries + 1);
- }
- }
-
- if (!response.ok) {
- const errorText = await response.text();
- throw new Error(
- `Notion API error: ${response.statusText}\n${errorText}`
- );
- }
-
- return response.json();
- } catch (error) {
- if (retries < MAX_RETRIES) {
- await delay(2000 * (retries + 1)); // Exponential backoff
- return notionFetch(endpoint, options, retries + 1);
- }
- throw error;
- }
- };
-
- const convertBlockToMarkdown = (block: NotionBlock): string => {
- switch (block.type) {
- case "paragraph":
- return (
- block.paragraph?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("") || ""
- );
- case "heading_1":
- return `# ${block.heading_1?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- case "heading_2":
- return `## ${block.heading_2?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- case "heading_3":
- return `### ${block.heading_3?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- case "bulleted_list_item":
- return `* ${block.bulleted_list_item?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- case "numbered_list_item":
- return `1. ${block.numbered_list_item?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- case "to_do":
- const checked = block.to_do?.checked ? "x" : " ";
- return `- [${checked}] ${block.to_do?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- case "code":
- return `\`\`\`${block.code?.language || ""}\n${block.code?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n\`\`\`\n`;
- case "quote":
- return `> ${block.quote?.rich_text
- ?.map((text: any) => text.plain_text)
- .join("")}\n`;
- default:
- return "";
- }
- };
-
- const getAllBlocks = async (pageId: string): Promise<NotionBlock[]> => {
- const blocks: NotionBlock[] = [];
- let cursor: string | undefined = undefined;
-
- do {
- const endpoint = `/blocks/${pageId}/children${
- cursor ? `?start_cursor=${cursor}` : ""
- }`;
- const response = (await notionFetch(endpoint)) as BlockResponse;
- blocks.push(...response.results);
- cursor = response.next_cursor;
- } while (cursor);
-
- return blocks;
- };
-
- try {
- let hasMore = true;
- let cursor: string | undefined = undefined;
- let allPages: SearchResponse["results"] = [];
-
- // First, collect all pages
- while (hasMore) {
- const searchResponse = (await notionFetch("/search", {
- method: "POST",
- body: JSON.stringify({
- filter: {
- value: "page",
- property: "object",
- },
- sort: {
- direction: "ascending",
- timestamp: "last_edited_time",
- },
- start_cursor: cursor,
- page_size: PAGE_SIZE,
- }),
- })) as SearchResponse;
-
- allPages = [...allPages, ...searchResponse.results];
- cursor = searchResponse.next_cursor;
- hasMore = searchResponse.has_more;
-
- // Report progress for page collection (0-30%)
- const progressPercent = (allPages.length / (allPages.length + searchResponse.results.length)) * 30;
- await onProgress(progressPercent);
- }
-
- // Process pages in parallel batches
- for (let i = 0; i < allPages.length; i += BATCH_SIZE) {
- const batch = allPages.slice(i, i + BATCH_SIZE);
- const batchResults = await Promise.all(
- batch.map(async (page) => {
- try {
- const blocks = await getAllBlocks(page.id);
- const pageContent = {
- content: blocks.map(convertBlockToMarkdown).join("\n"),
- url: page.url || `https://notion.so/${page.id.replace(/-/g, "")}`,
- title:
- page.properties?.Name?.title?.[0]?.plain_text ||
- page.properties?.title?.title?.[0]?.plain_text ||
- "Untitled",
- id: page.id,
- createdAt: page.created_time,
- };
- return pageContent.content.length > 10 ? pageContent : null;
- } catch (error) {
- console.error(`Error processing page ${page.id}:`, error);
- return null;
- }
- })
- );
-
- pages.push(
- ...batchResults.filter(
- (result): result is PageContent => result !== null
- )
- );
-
- // Report progress for page processing (30-100%)
- const progressPercent = 30 + ((i + BATCH_SIZE) / allPages.length) * 70;
- await onProgress(Math.min(progressPercent, 100));
-
- // Add a small delay between batches to respect rate limits
- if (i + BATCH_SIZE < allPages.length) {
- await delay(1000);
- }
- }
-
- return pages.filter((page) => page.content.length > 10);
- } catch (error) {
- console.error("Error fetching Notion pages:", error);
- throw error;
- }
-};
diff --git a/apps/backend/src/utils/tweetsToThreads.ts b/apps/backend/src/utils/tweetsToThreads.ts
deleted file mode 100644
index 85f69b87..00000000
--- a/apps/backend/src/utils/tweetsToThreads.ts
+++ /dev/null
@@ -1,108 +0,0 @@
-import * as cheerio from "cheerio";
-import { BaseError } from "../errors/baseError";
-import { Ok, Result } from "../errors/results";
-
-interface Tweet {
- id: string;
- text: string;
- links: Array<string>;
- images: Array<string>;
- videos: Array<string>;
-}
-
-class ProcessTweetsError extends BaseError {
- constructor(message?: string, source?: string) {
- super("[Thread Proceessing Error]", message, source);
- }
-}
-
-type TweetProcessResult = Array<Tweet>;
-
-// there won't be a need for url caching right?
-export async function unrollTweets(
- url: string
-): Promise<Result<TweetProcessResult, ProcessTweetsError>> {
- const tweetId = url.split("/").pop();
- const response = await fetch(`https://unrollnow.com/status/${tweetId}`, {
- headers: {
- "User-Agent":
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
- "Cache-Control": "max-age=3600",
- },
- });
-
- if (!response.ok) {
- const error = await response.text();
- console.error(error);
- throw new Error(`HTTP error! status: ${response.status} - ${error}`);
- }
-
- const html = await response.text();
- const $ = cheerio.load(html);
- const tweets: Array<Tweet> = [];
-
- const urlRegex = /(https?:\/\/\S+)/g;
- const paragraphs = $(".mainarticle p").toArray();
-
- const processedTweets = await Promise.all(
- paragraphs.map(async (element, i) => {
- const $tweet = $(element);
- let tweetText = $tweet.text().trim();
- if (tweetText.length < 1) {
- return null;
- }
-
- if (i === paragraphs.length - 1 && tweetText.toLowerCase() === "yes") {
- return null;
- }
-
- const shortUrls = tweetText.match(urlRegex) || [];
- console.log("SHORT_URLS_LEN", shortUrls.length);
- console.log("SHORT_URLS", shortUrls);
-
- const expandedUrls = await Promise.all(shortUrls.map(expandShortUrl));
-
- tweetText = tweetText.replace(urlRegex, "").trim().replace(/\s+/g, " ");
-
- const images = $tweet
- .nextUntil("p")
- .find("img.tweetimg")
- .map((i, img) => $(img).attr("src"))
- .get();
-
- const videos = $tweet
- .nextUntil("p")
- .find("video > source")
- .map((i, vid) => $(vid).attr("src"))
- .get();
-
- return {
- id: `${tweetId}_${i}`,
- text: tweetText,
- links: expandedUrls,
- images: images,
- videos: videos,
- };
- })
- );
-
- tweets.push(
- ...processedTweets.filter((tweet): tweet is Tweet => tweet !== null)
- );
-
- return Ok(tweets);
-}
-
-async function expandShortUrl(shortUrl: string): Promise<string> {
- try {
- const response = await fetch(shortUrl, {
- method: "HEAD",
- redirect: "follow",
- });
- const expandedUrl = response.url;
- return expandedUrl;
- } catch (error) {
- console.error(`Failed to expand URL: ${shortUrl}`, error);
- return shortUrl;
- }
-}
diff --git a/apps/backend/src/utils/typeDecider.ts b/apps/backend/src/utils/typeDecider.ts
deleted file mode 100644
index 642b178e..00000000
--- a/apps/backend/src/utils/typeDecider.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-import { Result, Ok, Err } from "../errors/results";
-import { BaseError } from "../errors/baseError";
-
-export type contentType = "page" | "tweet" | "note" | "document" | "notion";
-
-class GetTypeError extends BaseError {
- constructor(message?: string, source?: string) {
- super("[Decide Type Error]", message, source);
- }
-}
-export const typeDecider = (
- content: string
-): Result<contentType, GetTypeError> => {
- try {
- // if the content is a URL, then it's a page. if its a URL with https://x.com/user/status/123, then it's a tweet.
- // if it ends with .pdf etc then it's a document. else, it's a note.
- // do strict checking with regex
- if (
- content.match(/https?:\/\/(x\.com|twitter\.com)\/[\w]+\/[\w]+\/[\d]+/)
- ) {
- return Ok("tweet");
- } else if (content.match(/\.(pdf|doc|docx|txt|rtf|odt|md)/i)) {
- return Ok("document");
- } else if (
- content.match(/https?:\/\/(www\.)?notion\.so\/.*/)
- ) {
- return Ok("notion");
- } else if (
- content.match(
- /^(https?:\/\/)?(www\.)?[a-z0-9]+([-.]{1}[a-z0-9]+)*\.[a-z]{2,5}(\/.*)?$/i
- )
- ) {
- return Ok("page");
- } else {
- return Ok("note");
- }
- } catch (e) {
- console.error("[Decide Type Error]", e);
- return Err(new GetTypeError((e as Error).message, "typeDecider"));
- }
-};
diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts
deleted file mode 100644
index 8efcfacc..00000000
--- a/apps/backend/src/workflow/index.ts
+++ /dev/null
@@ -1,217 +0,0 @@
-import {
- WorkflowEntrypoint,
- WorkflowStep,
- WorkflowEvent,
-} from "cloudflare:workers";
-import { Env, WorkflowParams } from "../types";
-import { fetchContent } from "../utils/fetchers";
-import chunkText from "../utils/chunkers";
-import { database, eq, inArray } from "@supermemory/db";
-import {
- ChunkInsert,
- contentToSpace,
- documents,
- spaces,
-} from "@supermemory/db/schema";
-import { embedMany } from "ai";
-import { openai } from "../providers";
-import { chunk } from "@supermemory/db/schema";
-import { NonRetryableError } from "cloudflare:workflows";
-
-// TODO: handle errors properly here.
-
-export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> {
- async run(event: WorkflowEvent<WorkflowParams>, step: WorkflowStep) {
- // Step 0: Check if user has reached memory limit
- await step.do("check memory limit", async () => {
- const existingMemories = await database(
- this.env.HYPERDRIVE.connectionString
- )
- .select()
- .from(documents)
- .where(eq(documents.userId, event.payload.userId));
-
- if (existingMemories.length >= 2000) {
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError(
- "You have reached the maximum limit of 2000 memories"
- );
- }
- });
-
- // Step 1: Get and format the content.
- const rawContent =
- event.payload.prefetched ??
- (await step.do(
- "fetch content",
- async () => await fetchContent(event.payload, this.env, step)
- ));
-
- // check that the rawcontent is not too big
- if (rawContent.contentToVectorize.length > 100000) {
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("The content is too big (maximum 20 pages)");
- }
-
- const chunked = await step.do("chunk content", async () =>
- chunkText(rawContent.contentToVectorize, 768)
- );
-
- // Step 2: Create the document in the database.
- const document = await step.do("create document", async () => {
- try {
- // First check if document exists
- const existingDoc = await database(this.env.HYPERDRIVE.connectionString)
- .select()
- .from(documents)
- .where(eq(documents.uuid, event.payload.uuid))
- .limit(1);
-
- return await database(this.env.HYPERDRIVE.connectionString)
- .insert(documents)
- .values({
- userId: event.payload.userId,
- type: event.payload.type,
- uuid: event.payload.uuid,
- ...(event.payload.url && { url: event.payload.url }),
- title: rawContent.title,
- content: rawContent.contentToSave,
- description:
- "description" in rawContent
- ? (rawContent.description ?? "")
- : (event.payload.prefetched?.description ?? undefined),
- ogImage:
- "image" in rawContent
- ? (rawContent.image ?? "")
- : (event.payload.prefetched?.ogImage ?? undefined),
- raw: rawContent.contentToVectorize,
- isSuccessfullyProcessed: false,
- updatedAt: new Date(),
- ...(event.payload.createdAt && {
- createdAt: new Date(event.payload.createdAt),
- }),
- })
- .onConflictDoUpdate({
- target: documents.uuid,
- set: {
- title: rawContent.title,
- content: rawContent.contentToSave,
- description:
- "description" in rawContent
- ? (rawContent.description ?? "")
- : (event.payload.prefetched?.description ?? undefined),
- ogImage:
- "image" in rawContent
- ? (rawContent.image ?? "")
- : (event.payload.prefetched?.ogImage ?? undefined),
- raw: rawContent.contentToVectorize,
- isSuccessfullyProcessed: false,
- updatedAt: new Date(),
- },
- })
- .returning();
- } catch (error) {
- console.log("here's the error", error);
- // Check if error is a unique constraint violation
- if (
- error instanceof Error &&
- error.message.includes("document_url_user_id_idx")
- ) {
- // Document already exists for this user, stop workflow
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("Document already exists for this user");
- }
- if (
- error instanceof Error &&
- error.message.includes("document_raw_user_idx")
- ) {
- await database(this.env.HYPERDRIVE.connectionString)
- .delete(documents)
- .where(eq(documents.uuid, event.payload.uuid));
- throw new NonRetryableError("The exact same document already exists");
- }
- throw error; // Re-throw other errors
- }
- });
-
- if (!document || document.length === 0) {
- throw new Error(
- "Failed to create/update document - no document returned"
- );
- }
-
- // Step 3: Generate embeddings
- const { data: embeddings } = await this.env.AI.run(
- "@cf/baai/bge-base-en-v1.5",
- {
- text: chunked,
- }
- );
-
- // Step 4: Prepare chunk data
- const chunkInsertData: ChunkInsert[] = await step.do(
- "prepare chunk data",
- async () =>
- chunked.map((chunk, index) => ({
- documentId: document[0].id,
- textContent: chunk,
- orderInDocument: index,
- embeddings: embeddings[index],
- }))
- );
-
- // Step 5: Insert chunks
- if (chunkInsertData.length > 0) {
- await step.do("insert chunks", async () =>
- database(this.env.HYPERDRIVE.connectionString).transaction(
- async (trx) => {
- await trx.insert(chunk).values(chunkInsertData);
- }
- )
- );
- }
-
- // step 6: add content to spaces
- if (event.payload.spaces) {
- await step.do("add content to spaces", async () => {
- await database(this.env.HYPERDRIVE.connectionString).transaction(
- async (trx) => {
- // First get the space IDs from the UUIDs
- const spaceIds = await trx
- .select({ id: spaces.id })
- .from(spaces)
- .where(inArray(spaces.uuid, event.payload.spaces ?? []));
-
- if (spaceIds.length === 0) {
- return;
- }
-
- // Then insert the content-space mappings using the actual space IDs
- await trx.insert(contentToSpace).values(
- spaceIds.map((space) => ({
- contentId: document[0].id,
- spaceId: space.id,
- }))
- );
- }
- );
- });
- }
-
- // Step 7: Mark the document as successfully processed
- await step.do("mark document as successfully processed", async () => {
- await database(this.env.HYPERDRIVE.connectionString)
- .update(documents)
- .set({
- isSuccessfullyProcessed: true,
- })
- .where(eq(documents.id, document[0].id));
- });
- }
-}