diff options
| author | Mahesh Sanikommmu <[email protected]> | 2025-08-16 18:50:10 -0700 |
|---|---|---|
| committer | Mahesh Sanikommmu <[email protected]> | 2025-08-16 18:50:10 -0700 |
| commit | 39003aff23d64ff1d96074d71521f6023c9bec01 (patch) | |
| tree | 3f870c04b3dce315bba1b21aa2da158494e71774 /apps/backend/src | |
| parent | Merge pull request #355 from supermemoryai/archive (diff) | |
| download | supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.tar.xz supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.zip | |
New Version of Supermemory Consumer App
Diffstat (limited to 'apps/backend/src')
22 files changed, 0 insertions, 4443 deletions
diff --git a/apps/backend/src/auth.ts b/apps/backend/src/auth.ts deleted file mode 100644 index 0ceeb0c1..00000000 --- a/apps/backend/src/auth.ts +++ /dev/null @@ -1,151 +0,0 @@ -import { Context, Next } from "hono"; -import { getSessionFromRequest } from "@supermemory/authkit-remix-cloudflare/src/session"; -import { and, database, eq, sql } from "@supermemory/db"; -import { User, users } from "@supermemory/db/schema"; -import { Env, Variables } from "./types"; -import { encrypt, decrypt } from "./utils/cipher"; - -interface EncryptedData { - userId: string; - lastApiKeyGeneratedAt: string; -} - -export const getApiKey = async ( - userId: string, - lastApiKeyGeneratedAt: string, - c: Context<{ Variables: Variables; Bindings: Env }> -) => { - const data = `${userId}-${lastApiKeyGeneratedAt}`; - return "sm_" + (await encrypt(data, c.env.WORKOS_COOKIE_PASSWORD)); -}; - -export const decryptApiKey = async ( - encryptedKey: string, - c: Context<{ Variables: Variables; Bindings: Env }> -): Promise<EncryptedData> => { - const ourKey = encryptedKey.slice(3); - const decrypted = await decrypt(ourKey, c.env.WORKOS_COOKIE_PASSWORD); - const [userId, lastApiKeyGeneratedAt] = decrypted.split("-"); - - return { - userId, - lastApiKeyGeneratedAt, - }; -}; - -export const auth = async ( - c: Context<{ Variables: Variables; Bindings: Env }>, - next: Next -) => { - // Handle CORS preflight requests - if (c.req.method === "OPTIONS") { - return next() - } - - // Set cache control headers - c.header("Cache-Control", "private, no-cache, no-store, must-revalidate"); - c.header("Pragma", "no-cache"); - c.header("Expires", "0"); - - let user: User | User[] | undefined; - - // Check for API key authentication first - const authHeader = c.req.raw.headers.get("Authorization"); - if (authHeader?.startsWith("Bearer ")) { - const apiKey = authHeader.slice(7); - try { - const { userId, lastApiKeyGeneratedAt } = await decryptApiKey(apiKey, c); - - // Look up user with matching id and lastApiKeyGeneratedAt - user = await database(c.env.HYPERDRIVE.connectionString) - .select() - .from(users) - .where( - and( - eq(users.uuid, userId) - ) - ) - .limit(1); - - if (user && Array.isArray(user)) { - user = user[0]; - if (user && user.lastApiKeyGeneratedAt?.getTime() === Number(lastApiKeyGeneratedAt)) { - c.set("user", user); - } else { - return c.json({ error: "Invalid API key - user not found" }, 401); - } - } - } catch (err) { - console.error("API key authentication failed:", err); - return c.json({ error: "Invalid API key format" }, 401); - } - } - - // If no user found via API key, try cookie authentication - if (!user) { - const cookies = c.req.raw.headers.get("Cookie"); - if (cookies) { - // Fake remix context object. this just works. - const context = { - cloudflare: { - env: c.env, - }, - }; - - const session = await getSessionFromRequest(c.req.raw, context); - console.log("Session", session); - c.set("session", session); - - if (session?.user?.id) { - user = await database(c.env.HYPERDRIVE.connectionString) - .select() - .from(users) - .where(eq(users.uuid, session.user.id)) - .limit(1); - - if ((!user || user.length === 0) && session?.user?.id) { - const newUser = await database(c.env.HYPERDRIVE.connectionString) - .insert(users) - .values({ - uuid: session.user?.id, - email: session.user?.email, - firstName: session.user?.firstName, - lastName: session.user?.lastName, - createdAt: new Date(), - updatedAt: new Date(), - emailVerified: false, - profilePictureUrl: session.user?.profilePictureUrl ?? "", - }) - .returning() - .onConflictDoUpdate({ - target: [users.email], - set: { - uuid: session.user.id, - }, - }); - - user = newUser[0]; - } - - user = Array.isArray(user) ? user[0] : user; - c.set("user", user); - console.log("User", user); - } - } - } - - // Check if request requires authentication - const isPublicSpaceRequest = - c.req.url.includes("/v1/spaces/") || c.req.url.includes("/v1/memories"); - - if (!isPublicSpaceRequest && !c.get("user")) { - console.log("Unauthorized access to", c.req.url); - if (authHeader) { - return c.json({ error: "Invalid authentication credentials" }, 401); - } else { - return c.json({ error: "Authentication required" }, 401); - } - } - - return next(); -}; diff --git a/apps/backend/src/components/landing.tsx b/apps/backend/src/components/landing.tsx deleted file mode 100644 index 87bbdb69..00000000 --- a/apps/backend/src/components/landing.tsx +++ /dev/null @@ -1,234 +0,0 @@ -import { html } from "hono/html"; - -export function LandingPage() { - return ( - <html lang="en"> - <head> - <meta charset="UTF-8" /> - <meta name="viewport" content="width=device-width, initial-scale=1.0" /> - <link href="/output.css" rel="stylesheet" /> - <title>Supermemory API</title> - </head> - <body> - <div className="gradient-dark"> - <header className="bg-gray-900/50 dot-pattern"> - <nav className="container mx-auto px-6 py-4"> - <div className="flex items-center justify-between"> - <div className="text-2xl font-bold text-white"> - Supermemory API - </div> - <div className="hidden md:flex space-x-8"> - <a - href="#features" - className="text-gray-300 hover:text-white" - > - Features - </a> - <a - href="https://docs.supermemory.ai/" - target="_blank" - className="text-gray-300 hover:text-white" - rel="noreferrer" - > - Documentation - </a> - </div> - <a - href="https://docs.supermemory.ai/" - target="_blank" - className="bg-blue-600 text-white px-6 py-2 rounded-lg hover:bg-blue-700" - rel="noreferrer" - > - Get Started - </a> - </div> - </nav> - - <div className="container mx-auto px-6 py-16 text-center"> - <h1 className="text-4xl font-bold text-white mb-4"> - The Modern API for Knowledge Management - </h1> - <p className="text-xl text-gray-300 mb-8"> - Build powerful search and AI applications with our flexible, - production-ready API - </p> - <div className="flex justify-center space-x-4"> - <a - href="https://docs.supermemory.ai/" - target="_blank" - className="bg-blue-600 text-white px-8 py-3 rounded-lg hover:bg-blue-700" - rel="noreferrer" - > - Get Started Free - </a> - <a - href="https://docs.supermemory.ai/" - target="_blank" - className="border border-gray-600 text-gray-300 px-8 py-3 rounded-lg hover:bg-gray-700" - rel="noreferrer" - > - View Docs - </a> - </div> - </div> - </header> - - <section id="features" className="py-20 bg-gray-900/50 dot-pattern"> - <div className="container mx-auto px-6"> - <h2 className="text-3xl font-bold text-center text-white mb-12"> - Key Features - </h2> - <div className="grid md:grid-cols-3 gap-8"> - <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300"> - <h3 className="text-xl font-semibold mb-4 text-white"> - Battle-Tested RAG Stack - </h3> - <p className="text-gray-300"> - Production-ready retrieval augmented generation architecture - for reliable and scalable information retrieval. - </p> - </div> - <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300"> - <h3 className="text-xl font-semibold mb-4 text-white"> - Flexible LLM Integration - </h3> - <p className="text-gray-300"> - Use any LLM of your choice or operate in search-only mode - for maximum flexibility and control. - </p> - </div> - <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300"> - <h3 className="text-xl font-semibold mb-4 text-white"> - Advanced Access Control - </h3> - <p className="text-gray-300"> - Comprehensive collection filtering and permission management - for secure data access. - </p> - </div> - <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300"> - <h3 className="text-xl font-semibold mb-4 text-white"> - Seamless Data Import - </h3> - <p className="text-gray-300"> - Magic link import and platform synchronization for - effortless data integration. - </p> - </div> - <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300"> - <h3 className="text-xl font-semibold mb-4 text-white"> - Real-time Monitoring - </h3> - <p className="text-gray-300"> - Track and analyze memory usage patterns in real-time with - detailed metrics. - </p> - </div> - <div className="p-6 border border-gray-600 rounded-lg bg-gray-900 hover:bg-gray-800 transition duration-300"> - <h3 className="text-xl font-semibold mb-4 text-white"> - Easy Integration - </h3> - <p className="text-gray-300"> - Simple API endpoints that integrate seamlessly with your - existing infrastructure. - </p> - </div> - </div> - </div> - </section> - - <footer className="bg-black/50 dot-pattern"> - <div className="container mx-auto px-6"> - <div className="grid md:grid-cols-4 gap-8"> - <div> - <h4 className="text-lg font-semibold mb-4"> - Supermemory API - </h4> - <p className="text-gray-400"> - Making memory management simple and efficient for developers - worldwide. - </p> - </div> - <div> - <h4 className="text-lg font-semibold mb-4">Product</h4> - <ul className="space-y-2 text-gray-400"> - <li> - <a href="#features" className="hover:text-white"> - Features - </a> - </li> - <li> - <a - href="https://docs.supermemory.ai/" - target="_blank" - className="hover:text-white" - rel="noreferrer" - > - Documentation - </a> - </li> - </ul> - </div> - <div> - <h4 className="text-lg font-semibold mb-4">Connect</h4> - <ul className="space-y-2 text-gray-400"> - <li> - <a - href="https://x.com/supermemoryai" - target="_blank" - className="hover:text-white" - rel="noreferrer" - > - X (formerly Twitter) - </a> - </li> - <li> - <a - href="https://github.com/supermemoryai" - target="_blank" - className="hover:text-white" - rel="noreferrer" - > - GitHub - </a> - </li> - <li> - <a - href="https://discord.gg/b3BgKWpbtR" - target="_blank" - className="hover:text-white" - rel="noreferrer" - > - Discord - </a> - </li> - </ul> - </div> - </div> - <div className="border-t border-gray-800 mt-8 pt-8 text-center text-gray-400"> - <p>© 2024 Supermemory API. All rights reserved.</p> - </div> - </div> - </footer> - - <style - dangerouslySetInnerHTML={{ - __html: ` - .dot-pattern { - background-image: radial-gradient( - rgba(255, 255, 255, 0.1) 1px, - transparent 1px - ); - background-size: 24px 24px; - } - .gradient-dark { - background: linear-gradient(to bottom right, rgb(17 24 39), rgb(0 0 0)); - } - `, - }} - /> - </div> - </body> - </html> - ); -} diff --git a/apps/backend/src/errors/baseError.ts b/apps/backend/src/errors/baseError.ts deleted file mode 100644 index bccc54df..00000000 --- a/apps/backend/src/errors/baseError.ts +++ /dev/null @@ -1,45 +0,0 @@ -export class BaseHttpError extends Error { - public status: number; - public message: string; - - constructor(status: number, message: string) { - super(message); - this.status = status; - this.message = message; - Object.setPrototypeOf(this, new.target.prototype); // Restore prototype chain - } - } - - - export class BaseError extends Error { - type: string; - message: string; - source: string; - ignoreLog: boolean; - - constructor( - type: string, - message?: string, - source?: string, - ignoreLog = false - ) { - super(); - - Object.setPrototypeOf(this, new.target.prototype); - - this.type = type; - this.message = - message ?? - "An unknown error occurred. If this persists, please contact us."; - this.source = source ?? "unspecified"; - this.ignoreLog = ignoreLog; - } - - toJSON(): Record<PropertyKey, string> { - return { - type: this.type, - message: this.message, - source: this.source, - }; - } - }
\ No newline at end of file diff --git a/apps/backend/src/errors/results.ts b/apps/backend/src/errors/results.ts deleted file mode 100644 index c5ab115b..00000000 --- a/apps/backend/src/errors/results.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { BaseError } from "./baseError"; - -export type Result<T, E extends Error> = - | { ok: true; value: T } - | { ok: false; error: E }; - -export const Ok = <T>(data: T): Result<T, never> => { - return { ok: true, value: data }; -}; - -export const Err = <E extends BaseError>(error: E): Result<never, E> => { - return { ok: false, error }; -}; - -export async function wrap<T, E extends BaseError>( - p: Promise<T>, - errorFactory: (err: Error, source: string) => E, - source: string = "unspecified" - ): Promise<Result<T, E>> { - try { - return Ok(await p); - } catch (e) { - return Err(errorFactory(e as Error, source)); - } - } - -export function isErr<T, E extends Error>( - result: Result<T, E>, -): result is { ok: false; error: E } { - return !result.ok; -}
\ No newline at end of file diff --git a/apps/backend/src/globals.css b/apps/backend/src/globals.css deleted file mode 100644 index b5c61c95..00000000 --- a/apps/backend/src/globals.css +++ /dev/null @@ -1,3 +0,0 @@ -@tailwind base; -@tailwind components; -@tailwind utilities; diff --git a/apps/backend/src/index.tsx b/apps/backend/src/index.tsx deleted file mode 100644 index 4ccbb4c0..00000000 --- a/apps/backend/src/index.tsx +++ /dev/null @@ -1,255 +0,0 @@ -import { z } from "zod"; -import { type Context, Hono } from "hono"; -import { auth } from "./auth"; -import { logger } from "hono/logger"; -import { timing } from "hono/timing"; -import type { Env, Variables } from "./types"; -import { zValidator } from "@hono/zod-validator"; -import { database } from "@supermemory/db"; -import { waitlist } from "@supermemory/db/schema"; -import { cors } from "hono/cors"; -import { ContentWorkflow } from "./workflow"; -import { Resend } from "resend"; -import { LandingPage } from "./components/landing"; -import user from "./routes/user"; -import spacesRoute from "./routes/spaces"; -import actions from "./routes/actions"; -import memories from "./routes/memories"; -import integrations from "./routes/integrations"; -import { fromHono } from "chanfana"; -import { - DurableObjectRateLimiter, - DurableObjectStore, -} from "@hono-rate-limiter/cloudflare"; -import type { ConfigType, GeneralConfigType, rateLimiter } from "hono-rate-limiter"; - -// Create base Hono app first -const honoApp = new Hono<{ Variables: Variables; Bindings: Env }>(); - -const app = fromHono(honoApp); - -// Add all middleware and routes -app.use("*", timing()); -app.use("*", logger()); -app.use( - "*", - cors({ - origin: [ - "http://localhost:3000", - "https://supermemory.ai", - "https://*.supermemory.ai", - "https://*.supermemory.com", - "https://supermemory.com", - "chrome-extension://*", - ], - allowHeaders: ["*"], - allowMethods: ["*"], - credentials: true, - exposeHeaders: ["*"], - }) -); - -app.use("/v1/*", auth); -app.use("/v1/*", (c, next) => { - const user = c.get("user"); - - if (c.env.NODE_ENV === "development") { - return next(); - } - - // RATELIMITS - const rateLimitConfig = { - // Endpoints that bypass rate limiting - excludedPaths: [ - "/v1/add", - "/v1/chat", - "/v1/suggested-learnings", - "/v1/recommended-questions", - ] as (string | RegExp)[], - - // Custom rate limits for specific endpoints - customLimits: { - notionImport: { - paths: ["/v1/integrations/notion/import", "/v1/integrations/notion"], - windowMs: 10 * 60 * 1000, // 10 minutes - limit: 5, // 5 requests per 10 minutes - }, - inviteSpace: { - paths: [/^\v1\/spaces\/[^/]+\/invite$/], - windowMs: 60 * 1000, // 1 minute - limit: 5, // 5 requests per minute - }, - } as Record< - string, - { paths: (string | RegExp)[]; windowMs: number; limit: number } - >, - - default: { - windowMs: 60 * 1000, // 1 minute - limit: 100, // 100 requests per minute - }, - - common: { - standardHeaders: "draft-6", - keyGenerator: (c: Context) => - `${user?.uuid ?? c.req.header("cf-connecting-ip")}-${new Date().getDate()}`, // day so that limit gets reset every day - store: new DurableObjectStore({ namespace: c.env.RATE_LIMITER }), - } as GeneralConfigType<ConfigType>, - }; - - if ( - c.req.path && - rateLimitConfig.excludedPaths.some((path) => - typeof path === "string" ? c.req.path === path : path.test(c.req.path) - ) - ) { - return next(); - } - - // Check for custom rate limits - for (const [_, config] of Object.entries(rateLimitConfig.customLimits)) { - if ( - config.paths.some((path) => - typeof path === "string" ? c.req.path === path : path.test(c.req.path) - ) - ) { - return rateLimiter({ - windowMs: config.windowMs, - limit: config.limit, - ...rateLimitConfig.common, - })(c as any, next); - } - } - - // Apply default rate limit - return rateLimiter({ - windowMs: rateLimitConfig.default.windowMs, - limit: rateLimitConfig.default.limit, - ...rateLimitConfig.common, - })(c as any, next); -}); - -app.get("/", (c) => { - return c.html(<LandingPage />); -}); - -// TEMPORARY REDIRECT -app.all("/api/*", async (c) => { - // Get the full URL and path - const url = new URL(c.req.url); - const path = url.pathname; - const newPath = path.replace("/api", "/v1"); - - // Preserve query parameters and build target URL - const redirectUrl = `https://api.supermemory.ai${newPath}${url.search}`; - - // Use c.redirect() for a proper redirect - return c.redirect(redirectUrl); -}); - -app.route("/v1/user", user); -app.route("/v1/spaces", spacesRoute); -app.route("/v1", actions); -app.route("/v1/integrations", integrations); -app.route("/v1/memories", memories); - -app.get("/v1/session", (c) => { - const user = c.get("user"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - return c.json({ - user, - }); -}); - -app.post( - "/waitlist", - zValidator( - "json", - z.object({ email: z.string().email(), token: z.string() }) - ), - async (c) => { - const { email, token } = c.req.valid("json"); - - const address = c.req.raw.headers.get("CF-Connecting-IP"); - - const idempotencyKey = crypto.randomUUID(); - const url = "https://challenges.cloudflare.com/turnstile/v0/siteverify"; - const firstResult = await fetch(url, { - body: JSON.stringify({ - secret: c.env.TURNSTILE_SECRET_KEY, - response: token, - remoteip: address, - idempotency_key: idempotencyKey, - }), - method: "POST", - headers: { - "Content-Type": "application/json", - }, - }); - - const firstOutcome = (await firstResult.json()) as { success: boolean }; - - if (!firstOutcome.success) { - console.info("Turnstile verification failed", firstOutcome); - return c.json( - { error: "Turnstile verification failed" }, - 439 as StatusCode - ); - } - - const resend = new Resend(c.env.RESEND_API_KEY); - - const db = database(c.env.HYPERDRIVE.connectionString); - - const ip = - c.req.header("cf-connecting-ip") || - `${c.req.raw.cf?.asn}-${c.req.raw.cf?.country}-${c.req.raw.cf?.city}-${c.req.raw.cf?.region}-${c.req.raw.cf?.postalCode}`; - - const { success } = await c.env.EMAIL_LIMITER.limit({ key: ip }); - - if (!success) { - return c.json({ error: "Rate limit exceeded" }, 429); - } - - const message = `Supermemory started as a side project a few months ago when I built it as a hackathon project. - <br></br> - you guys loved it too much. like wayy too much. it was embarrassing, because this was not it - it was nothing but a hackathon project. - <br></br> - I launched on github too. <a href="https://git.new/memory">https://github.com/supermemoryai/supermemory</a>, and we were somehow one of the fastest growing open source repositories in Q3 2024. - <br></br><br></br> - So, it's time to make this good. My vision is to make supermemory the best memory tool on the internet. - `; - - try { - await db.insert(waitlist).values({ email }); - await resend.emails.send({ - from: "Dhravya From Supermemory <[email protected]>", - to: email, - subject: "You're in the waitlist - A personal note from Dhravya", - html: `<p>Hi. I'm Dhravya. I'm building Supermemory to help people remember everything.<br></br> ${message} <br></br><br></br>I'll be in touch when we launch! Till then, just reply to this email if you wanna talk :)<br></br>If you want to follow me on X, here's my handle: <a href='https://x.com/dhravyashah'>@dhravyashah</a><br></br><br></br>- Dhravya</p>`, - }); - } catch (e) { - console.error(e); - return c.json({ error: "Failed to add to waitlist" }, 400); - } - - return c.json({ success: true }); - } -); - -app.onError((err, c) => { - console.error(err); - return c.json({ error: "Internal server error" }, 500); -}); - -export default { - fetch: app.fetch, -}; - -export { ContentWorkflow, DurableObjectRateLimiter }; - -export type AppType = typeof app; diff --git a/apps/backend/src/providers.ts b/apps/backend/src/providers.ts deleted file mode 100644 index ed9644a3..00000000 --- a/apps/backend/src/providers.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { createOpenAI, OpenAIProvider } from "@ai-sdk/openai"; -import { createGoogleGenerativeAI } from "@ai-sdk/google"; -import { Env } from "./types"; - -export function openai( - env: Env, - apiKey?: string -): ReturnType<typeof createOpenAI> { - return createOpenAI({ - apiKey: apiKey || env.OPEN_AI_API_KEY, - baseURL: "https://gateway.ai.cloudflare.com/v1/47c2b4d598af9d423c06fc9f936226d5/supermemory/openai" - }); -} - -export function google(securityKey: string) { - return createGoogleGenerativeAI({ - apiKey: securityKey, - }); -} diff --git a/apps/backend/src/routes/actions.ts b/apps/backend/src/routes/actions.ts deleted file mode 100644 index 581e7787..00000000 --- a/apps/backend/src/routes/actions.ts +++ /dev/null @@ -1,1171 +0,0 @@ -import { Hono } from "hono"; -import { Variables, Env, recommendedQuestionsSchema } from "../types"; -import { zValidator } from "@hono/zod-validator"; -import { z } from "zod"; -import { - AISDKError, - convertToCoreMessages, - embed, - generateObject, - InvalidPromptError, - Message, - smoothStream, - StreamData, - streamText, - TextPart, -} from "ai"; -import { - chatThreads, - documents, - chunk, - spaces as spaceInDb, - spaceAccess, - type Space, - contentToSpace, -} from "@supermemory/db/schema"; -import { google, openai } from "../providers"; -import { randomId } from "@supermemory/shared"; -import { - and, - cosineDistance, - database, - desc, - eq, - exists, - inArray, - or, - sql, -} from "@supermemory/db"; -import { typeDecider } from "../utils/typeDecider"; -import { isErr, Ok } from "../errors/results"; -import { fromHono } from "chanfana"; - -const actions = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) - .post( - "/chat", - zValidator( - "json", - z.object({ - messages: z.array(z.any()).min(1, "At least one message is required"), - threadId: z.string().optional(), - }) - ), - async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const { messages, threadId } = await c.req.valid("json"); - - const unfilteredCoreMessages = convertToCoreMessages( - (messages as Message[]) - .filter((m) => m.content.length > 0) - .map((m) => ({ - ...m, - content: - m.content + - (m.annotations - ? `<context>${JSON.stringify(m.annotations)}</context>` - : ""), - experimental_attachments: - m.experimental_attachments?.length && - m.experimental_attachments?.length > 0 - ? m.experimental_attachments - : (m.data as { files: [] })?.files, - })) - ); - - const coreMessages = unfilteredCoreMessages.filter( - (message) => message.content.length > 0 - ); - - const db = database(c.env.HYPERDRIVE.connectionString); - const { initLogger, wrapAISDKModel } = await import("braintrust"); - - // Initialize clients and loggers - const logger = initLogger({ - projectName: "supermemory", - apiKey: c.env.BRAINTRUST_API_KEY, - }); - - const googleClient = wrapAISDKModel( - openai(c.env).chat("gpt-4o-mini-2024-07-18") - ); - - // Get last user message and generate embedding in parallel with thread creation - let lastUserMessage = coreMessages.findLast((i) => i.role === "user"); - const queryText = - typeof lastUserMessage?.content === "string" - ? lastUserMessage.content - : lastUserMessage?.content.map((c) => (c as TextPart).text).join(""); - - if (!queryText || queryText.length === 0) { - return c.json({ error: "Empty query" }, 400); - } - - // Run embedding generation and thread creation in parallel - const [{ data: embedding }, thread] = await Promise.all([ - c.env.AI.run("@cf/baai/bge-base-en-v1.5", { text: queryText }), - !threadId - ? db - .insert(chatThreads) - .values({ - firstMessage: messages[0].content, - userId: user.id, - uuid: randomId(), - messages: coreMessages, - }) - .returning() - : null, - ]); - - const threadUuid = threadId || thread?.[0].uuid; - - if (!embedding) { - return c.json({ error: "Failed to generate embedding" }, 500); - } - - try { - const data = new StreamData(); - - // Pre-compute the vector similarity expression - const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embedding[0])}::vector)`; - - // Get matching chunks with document info - const matchingChunks = await db - .select({ - chunkId: chunk.id, - documentId: chunk.documentId, - textContent: chunk.textContent, - orderInDocument: chunk.orderInDocument, - metadata: chunk.metadata, - similarity: vectorSimilarity, - // Document fields - docId: documents.id, - docUuid: documents.uuid, - docContent: documents.content, - docType: documents.type, - docUrl: documents.url, - docTitle: documents.title, - docDescription: documents.description, - docOgImage: documents.ogImage, - }) - .from(chunk) - .innerJoin(documents, eq(chunk.documentId, documents.id)) - .where( - and(eq(documents.userId, user.id), sql`${vectorSimilarity} > 0.3`) - ) - .orderBy(desc(vectorSimilarity)) - .limit(25); - - // Get unique document IDs from matching chunks - const uniqueDocIds = [ - ...new Set(matchingChunks.map((c) => c.documentId)), - ]; - - // Fetch all chunks for these documents to get context - const contextChunks = await db - .select({ - id: chunk.id, - documentId: chunk.documentId, - textContent: chunk.textContent, - orderInDocument: chunk.orderInDocument, - metadata: chunk.metadata, - }) - .from(chunk) - .where(inArray(chunk.documentId, uniqueDocIds)) - .orderBy(chunk.documentId, chunk.orderInDocument); - - // Group chunks by document - const chunksByDocument = new Map<number, typeof contextChunks>(); - for (const chunk of contextChunks) { - const docChunks = chunksByDocument.get(chunk.documentId) || []; - docChunks.push(chunk); - chunksByDocument.set(chunk.documentId, docChunks); - } - - // Create context with surrounding chunks - const contextualResults = matchingChunks.map((match) => { - const docChunks = chunksByDocument.get(match.documentId) || []; - const matchIndex = docChunks.findIndex((c) => c.id === match.chunkId); - - // Get surrounding chunks (2 before and 2 after for more context) - const start = Math.max(0, matchIndex - 2); - const end = Math.min(docChunks.length, matchIndex + 3); - const relevantChunks = docChunks.slice(start, end); - - return { - id: match.docId, - title: match.docTitle, - description: match.docDescription, - url: match.docUrl, - type: match.docType, - content: relevantChunks.map((c) => c.textContent).join("\n"), - similarity: Number(match.similarity.toFixed(4)), - chunks: relevantChunks.map((c) => ({ - id: c.id, - content: c.textContent, - orderInDocument: c.orderInDocument, - metadata: c.metadata, - isMatch: c.id === match.chunkId, - })), - }; - }); - - // Sort by similarity and take top results - const topResults = contextualResults - .sort((a, b) => b.similarity - a.similarity) - .slice(0, 10); - - data.appendMessageAnnotation(topResults); - - if (lastUserMessage) { - lastUserMessage.content = - typeof lastUserMessage.content === "string" - ? lastUserMessage.content + - `<context>${JSON.stringify(topResults)}</context>` - : [ - ...lastUserMessage.content, - { - type: "text", - text: `<context>${JSON.stringify(topResults)}</context>`, - }, - ]; - coreMessages[coreMessages.length - 1] = lastUserMessage; - } - - const result = await streamText({ - model: googleClient, - experimental_providerMetadata: { - metadata: { userId: user.id, chatThreadId: threadUuid ?? "" }, - }, - experimental_transform: smoothStream(), - messages: [ - { - role: "system", - content: `You are a knowledgeable and helpful AI assistant for Supermemory, a personal knowledge management app. Your goal is to help users explore and understand their saved content. - - Key guidelines: - - Maintain natural, engaging conversation while seamlessly incorporating relevant information from the user's knowledge base - - Build on previous messages in the conversation to provide coherent, contextual responses - - Be concise but thorough, focusing on the most relevant details - - When appropriate, make connections between different pieces of information - - If you're not sure about something, be honest and say so - - Feel free to ask clarifying questions if needed - - Make it easy to read for the user! - - Use markdown to format your responses but dont make your answers TOO long include any and all information related to context in the response if possible. - - only talk about the context if the right answer is in the context. - - You are Supermemory - a personal knowledge management app. - - You are built by Dhravya Shah (https://dhravya.dev). And the supermemory team (https://supermemory.ai). - - The user's saved content is provided in <context> tags. Use this information naturally without explicitly referencing it.`, - }, - ...coreMessages, - ], - async onFinish(completion) { - try { - if (lastUserMessage) { - lastUserMessage.content = - typeof lastUserMessage.content === "string" - ? lastUserMessage.content.replace( - /<context>[\s\S]*?<\/context>/g, - "" - ) - : lastUserMessage.content.filter( - (part) => - !( - part.type === "text" && - part.text.startsWith("<context>") - ) - ); - coreMessages[coreMessages.length - 1] = lastUserMessage; - } - - const newMessages = [ - ...coreMessages, - { - role: "assistant", - content: - completion.text + - `<context>[${JSON.stringify(topResults)}]</context>`, - }, - ]; - - if (threadUuid) { - await db - .update(chatThreads) - .set({ messages: newMessages }) - .where(eq(chatThreads.uuid, threadUuid)); - } - } catch (error) { - console.error("Failed to update thread:", error); - } finally { - await data.close(); - } - }, - }); - - return result.toDataStreamResponse({ - headers: { - "Supermemory-Thread-Uuid": threadUuid ?? "", - "Content-Type": "text/x-unknown", - "content-encoding": "identity", - "transfer-encoding": "chunked", - }, - data, - }); - } catch (error) { - console.error("Chat error:", error); - - if (error instanceof InvalidPromptError) { - return c.json( - { error: "Invalid prompt - please rephrase your message" }, - 400 - ); - } - - if ((error as AISDKError).cause === "ECONNREFUSED") { - return c.json({ error: "Database connection failed" }, 503); - } - - return c.json( - { - error: "An unexpected error occurred", - details: - c.env.NODE_ENV === "development" - ? (error as Error).message - : undefined, - }, - 500 - ); - } - } - ) - .get( - "/chat/:threadUuid", - zValidator( - "param", - z.object({ - threadUuid: z.string(), - }) - ), - async (c) => { - const user = c.get("user"); - const threadUuid = c.req.valid("param").threadUuid; - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const chatHistory = await database(c.env.HYPERDRIVE.connectionString) - .select() - .from(chatThreads) - .where( - and(eq(chatThreads.userId, user.id), eq(chatThreads.uuid, threadUuid)) - ); - - if (!chatHistory) { - return c.json({ error: "Chat history not found" }, 404); - } - - return c.json({ chatHistory: chatHistory[0].messages }); - } - ) - .get("/recommended-questions", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - const recentDocuments = await db - .select() - .from(documents) - .where(eq(documents.userId, user.id)) - .orderBy(sql`RANDOM()`) - .limit(3); - - if (recentDocuments.length === 0) { - return c.json({ questions: [] }); - } - - const cachedQuestions = await c.env.MD_CACHE.get(`rq:${user.id}`); - if (cachedQuestions) { - const randomQuestions = JSON.parse(cachedQuestions) - .questions.sort(() => Math.random() - 0.5) - .slice(0, 3); - return c.json({ questions: randomQuestions }); - } - - const { initLogger, wrapAISDKModel } = await import("braintrust"); - - const logger = initLogger({ - projectName: "supermemory", - apiKey: c.env.BRAINTRUST_API_KEY, - }); - - const model = wrapAISDKModel(openai(c.env).chat("gpt-4o-mini-2024-07-18")); - - const aiResponse = await generateObject({ - schema: z.object({ - questions: recommendedQuestionsSchema, - }), - model, - prompt: `You are helping generate search suggestions for a user's personal knowledge base. - - Generate 10 specific, focused questions based on the following documents. The questions should: - - Be highly specific and reference concrete details from the documents - - Focus on key insights, important facts, or interesting relationships - - Be phrased naturally, as if the user is trying to recall something they learned - - Be 2-8 words long - - Not include generic questions that could apply to any document - - Documents: - ${recentDocuments.map((d) => d.content).join("\n\n")}`, - }); - - await c.env.MD_CACHE.put( - `rq:${user.id}`, - JSON.stringify(aiResponse.object), - { - // 3 hours - expirationTtl: 10800, - } - ); - - const questions = aiResponse.object.questions; - const randomQuestions = questions - .sort(() => Math.random() - 0.5) - .slice(0, 3); - return c.json({ questions: randomQuestions }); - }) - .get("/suggested-learnings", async (c) => { - const user = await c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - // Try to get from cache first - const cacheKey = `sl:${user.id}`; - const cached = await c.env.MD_CACHE.get(cacheKey); - if (cached) { - return c.json({ - suggestedLearnings: JSON.parse(cached) as { [x: string]: string }, - }); - } - - // Get random sample of user's documents that are well-distributed - const recentLearnings = await db - .select() - .from(documents) - .where(eq(documents.userId, user.id)) - // Use random() to get distributed sample - .orderBy(sql`RANDOM()`) - .limit(7); - - if (recentLearnings.length === 0 || recentLearnings.length < 3) { - return c.json({ suggestedLearnings: [] }); - } - - // for each document, i want to generate a list of - // small markdown tweet-like things that the user might want to remember about - const suggestedLearnings = await Promise.all( - recentLearnings.map(async (document) => { - const model = openai(c.env).chat("gpt-4o-mini-2024-07-18"); - const prompt = `Generate a concise topic recall card for this document. The card should: - - Have a clear title that captures the main topic - - based on when the document was saved, include a brief "Last (week/month/...), you saved notes on..." intro (do something different every time.) - - List 2-3 key points from the content in simple bullet points - - Keep the total length under 280 characters - - Focus on the core concepts worth remembering - - Be in markdown format - - if you don't have a good suggestions, just skip that document. - - Here's the document content: ${document.content}, Document saved at: ${document.updatedAt}, Today's date: ${new Date().toLocaleDateString()}`; - const response = await generateObject({ - schema: z.object({ - [document.uuid]: z.string(), - }), - // @ts-ignore - model, - prompt, - }); - return response.object; - }) - ); - - // Cache the results - await c.env.MD_CACHE.put(cacheKey, JSON.stringify(suggestedLearnings), { - expirationTtl: 60 * 60 * 3, // 3 hours - }); - - return c.json({ suggestedLearnings }); - }) - .post( - "/search", - zValidator( - "json", - z.object({ - query: z.string().min(1, "Search query cannot be empty"), - limit: z.number().min(1).max(50).default(10), - threshold: z.number().min(0).max(1).default(0), - spaces: z.array(z.string()).optional(), - }) - ), - async (c) => { - const { query, limit, threshold, spaces } = c.req.valid("json"); - const user = c.get("user"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - if (spaces && spaces.length > 0) { - const spaceDetails = await Promise.all( - spaces.map(async (spaceId) => { - const space = await db - .select() - .from(spaceInDb) - .where(eq(spaceInDb.uuid, spaceId)) - .limit(1); - - if (space.length === 0) return null; - return { - id: space[0].id, - ownerId: space[0].ownerId, - uuid: space[0].uuid, - }; - }) - ); - - // Filter out any null values and check permissions - const validSpaces = spaceDetails.filter( - (s): s is NonNullable<typeof s> => s !== null - ); - const unauthorized = validSpaces.filter((s) => s.ownerId !== user.id); - - if (unauthorized.length > 0) { - return c.json( - { - error: "Space permission denied", - details: unauthorized.map((s) => s.uuid).join(", "), - }, - 403 - ); - } - - // Replace UUIDs with IDs for the database query - spaces.splice( - 0, - spaces.length, - ...validSpaces.map((s) => s.id.toString()) - ); - } - - try { - // Generate embedding for the search query - const embeddings = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", { - text: query, - }); - - if (!embeddings.data) { - return c.json( - { error: "Failed to generate embedding for query" }, - 500 - ); - } - - // Pre-compute the vector similarity expression - const vectorSimilarity = sql<number>`1 - (embeddings <=> ${JSON.stringify(embeddings.data[0])}::vector)`; - - // Get matching chunks - const results = await db - .select({ - chunkId: chunk.id, - documentId: chunk.documentId, - textContent: chunk.textContent, - orderInDocument: chunk.orderInDocument, - metadata: chunk.metadata, - similarity: vectorSimilarity, - // Document fields - docUuid: documents.uuid, - docContent: documents.content, - docType: documents.type, - docUrl: documents.url, - docTitle: documents.title, - docCreatedAt: documents.createdAt, - docUpdatedAt: documents.updatedAt, - docUserId: documents.userId, - docDescription: documents.description, - docOgImage: documents.ogImage, - }) - .from(chunk) - .innerJoin(documents, eq(chunk.documentId, documents.id)) - .where( - and( - eq(documents.userId, user.id), - sql`${vectorSimilarity} > ${threshold}`, - ...(spaces && spaces.length > 0 - ? [ - exists( - db - .select() - .from(contentToSpace) - .where( - and( - eq(contentToSpace.contentId, documents.id), - inArray( - contentToSpace.spaceId, - spaces.map((spaceId) => Number(spaceId)) - ) - ) - ) - ), - ] - : []) - ) - ) - .orderBy(desc(vectorSimilarity)) - .limit(limit); - - // Group results by document and take the best matching chunk - const documentResults = new Map<number, (typeof results)[0]>(); - for (const result of results) { - const existingResult = documentResults.get(result.documentId); - if ( - !existingResult || - result.similarity > existingResult.similarity - ) { - documentResults.set(result.documentId, result); - } - } - - // Convert back to array and format response - const finalResults = Array.from(documentResults.values()) - .sort((a, b) => b.similarity - a.similarity) - .map((r) => ({ - id: r.documentId, - uuid: r.docUuid, - content: r.docContent, - type: r.docType, - url: r.docUrl, - title: r.docTitle, - createdAt: r.docCreatedAt, - updatedAt: r.docUpdatedAt, - userId: r.docUserId, - description: r.docDescription, - ogImage: r.docOgImage, - similarity: Number(r.similarity.toFixed(4)), - matchingChunk: { - id: r.chunkId, - content: r.textContent, - orderInDocument: r.orderInDocument, - metadata: r.metadata, - }, - })); - - return c.json({ results: finalResults }); - } catch (error) { - console.error("[Search Error]", error); - return c.json( - { - error: "Search failed", - details: - c.env.NODE_ENV === "development" - ? (error as Error).message - : undefined, - }, - 500 - ); - } - } - ) - .post( - "/add", - zValidator( - "json", - z.object({ - content: z.string().min(1, "Content cannot be empty"), - spaces: z.array(z.string()).max(5).optional(), - id: z.string().optional(), - // any type of metadata. must be json serializable. - metadata: z.any().optional(), - images: z.array(z.string()).optional(), - prefetched: z - .object({ - contentToVectorize: z.string(), - contentToSave: z.string(), - title: z.string(), - type: z.string(), - description: z.string().optional(), - ogImage: z.string().optional(), - }) - .optional(), - }) - ), - async (c) => { - const body = c.req.valid("json"); - - const user = c.get("user"); - - if (!user) { - return c.json({ error: "You must be logged in to add content" }, 401); - } - - // Do not perform seperate check for prefetched type, breaks tweet addition from extension - const type = typeDecider(body.content); - - if (isErr(type)) { - return c.json( - { - error: "Could not determine content type", - details: type.error.message, - }, - 400 - ); - } - - if (type.value === "page" && !body.content.startsWith("http")) { - body.content = `https://${body.content}`; - } - - const uuid = body.id ?? randomId(); - const contentId = `add-${user.id}-${uuid}`; - - const db = database(c.env.HYPERDRIVE.connectionString); - - // Calculate document hash early to enable faster duplicate detection - const content = body.prefetched?.contentToVectorize || body.content; - const encoder = new TextEncoder(); - const data = encoder.encode(content); - const hashBuffer = await crypto.subtle.digest("SHA-256", data); - const hashArray = Array.from(new Uint8Array(hashBuffer)); - const documentHash = hashArray - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - - // Check for duplicates using hash - const existingDocs = await db - .select() - .from(documents) - .where( - and( - eq(documents.userId, user.id), - or( - eq(documents.contentHash, documentHash), - and( - eq(documents.type, type.value), - or(eq(documents.url, body.content), eq(documents.raw, content)) - ) - ) - ) - ); - - if (existingDocs.length > 0) { - return c.json( - { error: `That ${type.value} already exists in your memories` }, - 409 - ); - } - - // Check space permissions if spaces are specified - if (body.spaces && body.spaces.length > 0) { - const spacePermissions = await Promise.all( - body.spaces.map(async (spaceId) => { - const space = await db - .select() - .from(spaceInDb) - .where(eq(spaceInDb.uuid, spaceId)) - .limit(1); - - if (!space[0]) { - // create a new space for the user with the given id - const newSpace = await db - .insert(spaceInDb) - .values({ - uuid: spaceId, - name: spaceId, - isPublic: false, - ownerId: user.id, - createdAt: new Date(), - updatedAt: new Date(), - }) - .returning(); - - return { - spaceId: newSpace[0].id, - allowed: true, - error: null, - }; - } - - const spaceData = space[0] as Space; - - // If public space, only owner can add content - if (spaceData.isPublic) { - return { - spaceId, - allowed: spaceData.ownerId === user.id, - error: - spaceData.ownerId !== user.id - ? "Only space owner can add to public spaces" - : null, - }; - } - - // For private spaces, check if user is owner or in allowlist - const spaceAccessCheck = await db - .select() - .from(spaceAccess) - .where( - and( - eq(spaceAccess.spaceId, spaceData.id), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "accepted") - ) - ) - .limit(1); - - return { - spaceId, - allowed: - spaceData.ownerId === user.id || spaceAccessCheck.length > 0, - error: - spaceData.ownerId !== user.id && !spaceAccessCheck.length - ? "Not authorized to add to this space" - : null, - }; - }) - ); - - const unauthorized = spacePermissions.filter((p) => !p.allowed); - if (unauthorized.length > 0) { - return c.json( - { - error: "Space permission denied", - details: unauthorized - .map((u) => `${u.spaceId}: ${u.error}`) - .join(", "), - }, - 403 - ); - } - } - - const isExternalContent = [ - "page", - "tweet", - "document", - "notion", - ].includes(type.value); - const indexedUrl = isExternalContent - ? body.content - : `https://supermemory.ai/content/${contentId}`; - - // Insert into documents table with hash - try { - await db.insert(documents).values({ - uuid: contentId, - userId: user.id, - type: type.value, - url: indexedUrl, - title: body.prefetched?.title, - description: body.prefetched?.description, - ogImage: body.prefetched?.ogImage, - contentHash: documentHash, - raw: - (body.prefetched ?? body.content) + "\n\n" + body.spaces?.join(" "), - metadata: body.metadata, - }); - - await c.env.CONTENT_WORKFLOW.create({ - params: { - userId: user.id, - content: body.content, - spaces: body.spaces, - type: type.value, - uuid: contentId, - url: indexedUrl, - prefetched: body.prefetched, - }, - id: contentId, - }); - - return c.json({ - message: "Content added successfully", - id: contentId, - type: type.value, - }); - } catch (error) { - console.error("[Add Content Error]", error); - return c.json({ error: "Failed to process content" }, 500); - } - } - ) - .post( - "/batch-add", - zValidator( - "json", - z - .object({ - urls: z - .array(z.string()) - .min(1, "At least one URL is required") - .optional(), - contents: z - .array( - z.object({ - content: z.string(), - title: z.string(), - type: z.string(), - }) - ) - .optional(), - spaces: z.array(z.string()).max(5).optional(), - }) - .refine((data) => data.urls || data.contents, { - message: "Either urls or contents must be provided", - }) - ), - async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const { urls, contents, spaces } = await c.req.valid("json"); - - // Check space permissions if spaces are specified - if (spaces && spaces.length > 0) { - const db = database(c.env.HYPERDRIVE.connectionString); - const spacePermissions = await Promise.all( - spaces.map(async (spaceId) => { - const space = await db - .select() - .from(spaceInDb) - .where(eq(spaceInDb.uuid, spaceId)) - .limit(1); - - if (!space[0]) { - return { spaceId, allowed: false, error: "Space not found" }; - } - - const spaceData = space[0] as Space; - - // If public space, only owner can add content - if (spaceData.isPublic) { - return { - spaceId, - allowed: spaceData.ownerId === user.id, - error: - spaceData.ownerId !== user.id - ? "Only space owner can add to public spaces" - : null, - }; - } - - // For private spaces, check if user is owner or in allowlist - const spaceAccessCheck = await db - .select() - .from(spaceAccess) - .where( - and( - eq(spaceAccess.spaceId, spaceData.id), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "accepted") - ) - ) - .limit(1); - - return { - spaceId, - allowed: - spaceData.ownerId === user.id || spaceAccessCheck.length > 0, - error: - spaceData.ownerId !== user.id && !spaceAccessCheck.length - ? "Not authorized to add to this space" - : null, - }; - }) - ); - - const unauthorized = spacePermissions.filter((p) => !p.allowed); - if (unauthorized.length > 0) { - return c.json( - { - error: "Space permission denied", - details: unauthorized - .map((u) => `${u.spaceId}: ${u.error}`) - .join(", "), - }, - 403 - ); - } - } - - // Create a new ReadableStream for progress updates - const encoder = new TextEncoder(); - const stream = new ReadableStream({ - async start(controller) { - const db = database(c.env.HYPERDRIVE.connectionString); - const items = urls || contents || []; - const total = items.length; - let processed = 0; - let failed = 0; - let succeeded = 0; - - const sendMessage = (data: any) => { - const message = encoder.encode(`data: ${JSON.stringify(data)}\n\n`); - controller.enqueue(message); - }; - - for (const item of items) { - try { - processed++; - - // Handle both URL and markdown content - const content = typeof item === "string" ? item : item.content; - const title = typeof item === "string" ? null : item.title; - const type = - typeof item === "string" ? typeDecider(item) : Ok(item.type); - - if (isErr(type)) { - failed++; - sendMessage({ - progress: Math.round((processed / total) * 100), - status: "error", - url: typeof item === "string" ? item : item.title, - error: type.error.message, - processed, - total, - succeeded, - failed, - }); - continue; - } - - // Calculate document hash - const encoder = new TextEncoder(); - const data = encoder.encode(content); - const hashBuffer = await crypto.subtle.digest("SHA-256", data); - const hashArray = Array.from(new Uint8Array(hashBuffer)); - const documentHash = hashArray - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - - // Check for duplicates - const existingDocs = await db - .select() - .from(documents) - .where( - and( - eq(documents.userId, user.id), - or( - eq(documents.contentHash, documentHash), - eq(documents.raw, content) - ) - ) - ); - - if (existingDocs.length > 0) { - failed++; - sendMessage({ - progress: Math.round((processed / total) * 100), - status: "duplicate", - title: typeof item === "string" ? item : item.title, - processed, - total, - succeeded, - failed, - }); - continue; - } - - const contentId = `add-${user.id}-${randomId()}`; - const isExternalContent = - typeof item === "string" && - ["page", "tweet", "document", "notion"].includes(type.value); - const url = isExternalContent - ? content - : `https://supermemory.ai/content/${contentId}`; - - // Insert into documents table - await db.insert(documents).values({ - uuid: contentId, - userId: user.id, - type: type.value, - url, - title, - contentHash: documentHash, - raw: content + "\n\n" + spaces?.join(" "), - }); - - // Create workflow for processing - await c.env.CONTENT_WORKFLOW.create({ - params: { - userId: user.id, - content, - spaces, - type: type.value, - uuid: contentId, - url, - }, - id: contentId, - }); - - succeeded++; - sendMessage({ - progress: Math.round((processed / total) * 100), - status: "success", - title: typeof item === "string" ? item : item.title, - processed, - total, - succeeded, - failed, - }); - - // Add a small delay between requests - await new Promise((resolve) => setTimeout(resolve, 100)); - } catch (error) { - failed++; - sendMessage({ - progress: Math.round((processed / total) * 100), - status: "error", - title: typeof item === "string" ? item : item.title, - error: error instanceof Error ? error.message : "Unknown error", - processed, - total, - succeeded, - failed, - }); - } - } - - sendMessage({ - progress: 100, - status: "complete", - processed, - total, - succeeded, - failed, - }); - controller.close(); - }, - }); - - return new Response(stream, { - headers: { - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache", - Connection: "keep-alive", - }, - }); - } - ); - -export default actions; diff --git a/apps/backend/src/routes/integrations.ts b/apps/backend/src/routes/integrations.ts deleted file mode 100644 index 9aa369ea..00000000 --- a/apps/backend/src/routes/integrations.ts +++ /dev/null @@ -1,180 +0,0 @@ -import { Hono } from "hono"; -import type { Env, Variables } from "../types"; -import { getDecryptedKV } from "encrypt-workers-kv"; -import { getAllNotionPageContents } from "../utils/notion"; -import { and, eq, or } from "@supermemory/db"; -import { documents } from "@supermemory/db/schema"; -import { database } from "@supermemory/db"; -import { fromHono } from "chanfana"; - -const integrations = fromHono( - new Hono<{ Variables: Variables; Bindings: Env }>()).get("/notion/import", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - // Create SSE stream - const stream = new TransformStream(); - const writer = stream.writable.getWriter(); - const encoder = new TextEncoder(); - - // Create response first so client gets headers immediately - const response = new Response(stream.readable, { - headers: { - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache", - Connection: "keep-alive", - // Required CORS headers for SSE - "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Credentials": "true", - }, - }); - - const sendMessage = async (data: Record<string, any>) => { - // Proper SSE format requires "data: " prefix and double newline - const formattedData = `data: ${JSON.stringify(data)}\n\n`; - await writer.write(encoder.encode(formattedData)); - }; - - // Start processing in background - c.executionCtx.waitUntil( - (async () => { - try { - // Send initial heartbeat - await sendMessage({ type: "connected" }); - - const token = await getDecryptedKV( - c.env.ENCRYPTED_TOKENS, - `${user.uuid}-notion`, - `${c.env.WORKOS_COOKIE_PASSWORD}-${user.uuid}` - ); - - const stringToken = new TextDecoder().decode(token); - if (!stringToken) { - await sendMessage({ type: "error", error: "No token found" }); - await writer.close(); - return; - } - - await sendMessage({ type: "progress", progress: 5 }); - - // Fetch pages with progress updates - const pages = await getAllNotionPageContents( - stringToken, - async (progress) => { - // Map progress from 0-100 to 5-40 range - const scaledProgress = Math.floor(5 + (progress * 35) / 100); - await sendMessage({ type: "progress", progress: scaledProgress }); - } - ); - - await sendMessage({ type: "progress", progress: 40 }); - - let processed = 0; - const totalPages = pages.length; - - const db = database(c.env.HYPERDRIVE.connectionString); - - for (const page of pages) { - // Calculate document hash for duplicate detection - const encoder = new TextEncoder(); - const data = encoder.encode(page.content); - const hashBuffer = await crypto.subtle.digest("SHA-256", data); - const hashArray = Array.from(new Uint8Array(hashBuffer)); - const documentHash = hashArray - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - - // Check for duplicates using hash - const existingDocs = await db - .select() - .from(documents) - .where( - and( - eq(documents.userId, user.id), - or( - eq(documents.contentHash, documentHash), - and( - eq(documents.type, "notion"), - or( - eq(documents.url, page.url), - eq(documents.raw, page.content) - ) - ) - ) - ) - ); - - if (existingDocs.length > 0) { - await sendMessage({ - type: "warning", - message: `Skipping duplicate page: ${page.title}`, - }); - processed++; - continue; - } - - // Insert into documents table first - try { - await db.insert(documents).values({ - uuid: page.id, - userId: user.id, - type: "notion", - url: page.url, - title: page.title, - contentHash: documentHash, - raw: page.content, - }); - - await c.env.CONTENT_WORKFLOW.create({ - params: { - userId: user.id, - content: page.url, - spaces: [], - type: "notion", - uuid: page.id, - url: page.url, - prefetched: { - contentToVectorize: page.content, - contentToSave: page.content, - title: page.title, - type: "notion", - }, - createdAt: page.createdAt, - }, - id: `${user.id}-${page.id}-${new Date().getTime()}`, - }); - - processed++; - const progress = 50 + Math.floor((processed / totalPages) * 50); - await sendMessage({ type: "progress", progress, page: page.title }); - } catch (error) { - console.error(`Failed to process page ${page.title}:`, error); - await sendMessage({ - type: "warning", - message: `Failed to process page: ${page.title}`, - error: error instanceof Error ? error.message : "Unknown error", - }); - processed++; - continue; - } - } - - await sendMessage({ type: "complete", progress: 100 }); - await writer.close(); - } catch (error) { - console.error("Import error:", error); - await sendMessage({ - type: "error", - error: error instanceof Error ? error.message : "Import failed", - }); - await writer.close(); - } - })() - ); - - return response; -}); - -export default integrations; diff --git a/apps/backend/src/routes/memories.ts b/apps/backend/src/routes/memories.ts deleted file mode 100644 index f79da8ce..00000000 --- a/apps/backend/src/routes/memories.ts +++ /dev/null @@ -1,293 +0,0 @@ -import { Hono } from "hono"; -import { Variables, Env } from "../types"; -import { zValidator } from "@hono/zod-validator"; -import { z } from "zod"; -import { - documents, - spaces, - spaceAccess, - contentToSpace, -} from "@supermemory/db/schema"; -import { and, database, desc, eq, or, sql, isNull } from "@supermemory/db"; -import { fromHono } from "chanfana"; - -const memories = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) - .get( - "/", - zValidator( - "query", - z.object({ - start: z.string().default("0").transform(Number), - count: z.string().default("10").transform(Number), - spaceId: z.string().optional(), - }) - ), - async (c) => { - const { start, count, spaceId } = c.req.valid("query"); - const user = c.get("user"); - const db = database(c.env.HYPERDRIVE.connectionString); - - console.log("Fetching memories with spaceId", spaceId); - console.log(c.req.url); - // If spaceId provided, verify access - if (spaceId) { - console.log("SpaceID provided", spaceId); - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId.split("---")[0])) - .limit(1); - - if (!space[0]) { - return c.json({ error: "Space not found" }, 404); - } - - // Check access - allow if public, user owns the space, or has access through spaceAccess - if (!space[0].isPublic && !user) { - return c.json({ error: "Unauthorized" }, 401); - } - - if (!space[0].isPublic && space[0].ownerId !== user?.id) { - const access = await db - .select() - .from(spaceAccess) - .where( - and( - eq(spaceAccess.spaceId, space[0].id), - eq(spaceAccess.userEmail, user?.email ?? ""), - eq(spaceAccess.status, "accepted") - ) - ) - .limit(1); - - if (access.length === 0) { - console.log("Unauthorized access to", c.req.url); - return c.json({ error: "Unauthorized" }, 401); - } - } - - // Get documents for space - const [items, totalResult] = await Promise.all([ - db - .select({ - documents, - }) - .from(documents) - .innerJoin( - contentToSpace, - eq(documents.id, contentToSpace.contentId) - ) - .where(eq(contentToSpace.spaceId, space[0].id)) - .orderBy(desc(documents.createdAt)) - .limit(count) - .offset(start), - db - .select({ - total: sql<number>`count(*)`.as("total"), - }) - .from(documents) - .innerJoin( - contentToSpace, - eq(documents.id, contentToSpace.contentId) - ) - .where(eq(contentToSpace.spaceId, space[0].id)), - ]); - - const total = totalResult[0]?.total ?? 0; - - return c.json({ - items: items.map((item) => ({ - ...item.documents, - id: item.documents.uuid, - })), - total, - }); - } - - // Regular user memories endpoint - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - // Set cache control headers for 5 minutes - c.header("Cache-Control", "private, max-age=300"); - c.header("Vary", "Cookie"); // Vary on Cookie since response depends on user - - // Generate ETag based on user ID, start, count - const etag = `"${user.id}-${start}-${count}"`; - c.header("ETag", etag); - - // Check if client has matching ETag - const ifNoneMatch = c.req.header("If-None-Match"); - if (ifNoneMatch === etag) { - return new Response(null, { status: 304 }); - } - - const [items, [{ total }]] = await Promise.all([ - db - .select({ - documents: documents, - }) - .from(documents) - .leftJoin(contentToSpace, eq(documents.id, contentToSpace.contentId)) - .where( - and(eq(documents.userId, user.id), isNull(contentToSpace.contentId)) - ) - .orderBy(desc(documents.createdAt)) - .limit(count) - .offset(start), - db - .select({ - total: sql<number>`count(*)`.as("total"), - }) - .from(documents) - .leftJoin(contentToSpace, eq(documents.id, contentToSpace.contentId)) - .where( - and(eq(documents.userId, user.id), isNull(contentToSpace.contentId)) - ), - ]); - - return c.json({ - items: items.map((item) => ({ - ...item.documents, - id: item.documents.uuid, - })), - total, - }); - } - ) - .get("/:id", zValidator("param", z.object({ id: z.string() })), async (c) => { - const { id } = c.req.valid("param"); - const user = c.get("user"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const memory = await database(c.env.HYPERDRIVE.connectionString) - .select() - .from(documents) - .where(and(eq(documents.uuid, id), eq(documents.userId, user.id))) - .limit(1); - - return c.json(memory[0]); - }) - .delete( - "/:id", - zValidator("param", z.object({ id: z.string() })), - async (c) => { - const { id } = c.req.valid("param"); - const user = c.get("user"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - let documentIdNum; - - try { - documentIdNum = Number(id); - } catch (e) { - documentIdNum = null; - } - - const doc = await db - .select() - .from(documents) - .where( - and( - documentIdNum - ? or(eq(documents.uuid, id), eq(documents.id, documentIdNum)) - : eq(documents.uuid, id), - eq(documents.userId, user.id) - ) - ) - .limit(1); - - if (!doc[0]) { - return c.json({ error: "Document not found" }, 404); - } - - const [document, contentToSpacei] = await Promise.all([ - db - .delete(documents) - .where(and(eq(documents.uuid, id), eq(documents.userId, user.id))), - db - .delete(contentToSpace) - .where(eq(contentToSpace.contentId, doc[0].id)), - ]); - - return c.json({ success: true }); - } - ) - .post( - "/batch-delete", - zValidator( - "json", - z.object({ - ids: z.array(z.string()), - }) - ), - async (c) => { - const { ids } = c.req.valid("json"); - const user = c.get("user"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - try { - // First get all valid documents that belong to the user - const docs = await db - .select() - .from(documents) - .where( - and( - eq(documents.userId, user.id), - sql`${documents.uuid} = ANY(ARRAY[${ids}]::text[])` - ) - ); - - if (docs.length === 0) { - return c.json({ error: "No valid documents found" }, 404); - } - - const docIds = docs.map((doc) => doc.id); - - // Delete in transaction to ensure consistency - await db.transaction(async (tx) => { - await Promise.all([ - // Delete document entries - tx - .delete(documents) - .where( - and( - eq(documents.userId, user.id), - sql`${documents.uuid} = ANY(ARRAY[${ids}]::text[])` - ) - ), - // Delete space connections - tx - .delete(contentToSpace) - .where( - sql`${contentToSpace.contentId} = ANY(ARRAY[${docIds}]::int[])` - ), - ]); - }); - - return c.json({ - success: true, - deletedCount: docs.length, - }); - } catch (error) { - console.error("Batch delete error:", error); - return c.json({ error: "Failed to delete documents" }, 500); - } - } - ); - -export default memories; diff --git a/apps/backend/src/routes/spaces.ts b/apps/backend/src/routes/spaces.ts deleted file mode 100644 index e004ce78..00000000 --- a/apps/backend/src/routes/spaces.ts +++ /dev/null @@ -1,709 +0,0 @@ -import { Hono } from "hono"; -import { Env, Variables } from "../types"; -import { and, database, desc, eq, isNotNull, or, sql } from "@supermemory/db"; -import { - contentToSpace, - documents, - savedSpaces, - spaceAccess, - spaces, - users, -} from "@supermemory/db/schema"; -import { zValidator } from "@hono/zod-validator"; -import { z } from "zod"; -import { randomId } from "@supermemory/shared"; -import { fromHono } from "chanfana"; - -const spacesRoute = fromHono( - new Hono<{ Variables: Variables; Bindings: Env }>()) - .get("/", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - const [allSpaces, savedSpacesList, spaceOwners] = await Promise.all([ - db - .select({ - id: spaces.id, - uuid: spaces.uuid, - name: sql<string>`REGEXP_REPLACE(${spaces.name}, E'[\\n\\r]+', ' ', 'g')`.as( - "name" - ), - ownerId: spaces.ownerId, - isPublic: spaces.isPublic, - createdAt: spaces.createdAt, - accessType: spaceAccess.accessType, - }) - .from(spaces) - .leftJoin( - spaceAccess, - and( - eq(spaces.id, spaceAccess.spaceId), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "accepted") - ) - ) - .where(or(eq(spaces.ownerId, user.id), isNotNull(spaceAccess.spaceId))) - .orderBy(desc(spaces.createdAt)), - - db - .select({ - spaceId: savedSpaces.spaceId, - }) - .from(savedSpaces) - .where(eq(savedSpaces.userId, user.id)), - - db - .select({ - id: users.id, - uuid: users.uuid, - name: users.firstName, - email: users.email, - profileImage: users.profilePictureUrl, - }) - .from(users) - .innerJoin(spaces, eq(spaces.ownerId, users.id)), - ]); - - const savedSpaceIds = new Set(savedSpacesList.map((s) => s.spaceId)); - const ownerMap = new Map(spaceOwners.map((owner) => [owner.id, owner])); - - const spacesWithDetails = allSpaces.map((space) => { - const isOwner = space.ownerId === user.id; - const owner = ownerMap.get(space.ownerId); - - return { - ...space, - favorited: savedSpaceIds.has(space.id), - permissions: { - canRead: space.isPublic || isOwner || space.accessType != null, - canEdit: isOwner || space.accessType === "edit", - isOwner, - }, - owner: isOwner - ? null - : { - id: owner?.uuid, - name: owner?.name, - email: owner?.email, - profileImage: owner?.profileImage, - }, - }; - }); - - return c.json({ spaces: spacesWithDetails }); - }) - .get("/:spaceId", async (c) => { - const user = c.get("user"); - const spaceId = c.req.param("spaceId"); - const db = database(c.env.HYPERDRIVE.connectionString); - - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (!space[0]) { - return c.json({ error: "Space not found" }, 404); - } - - // For public spaces, anyone can read but only owner can edit - if (space[0].isPublic) { - const canEdit = user?.id === space[0].ownerId; - return c.json({ - ...space[0], - permissions: { - canRead: true, - canEdit, - isOwner: space[0].ownerId === user?.id, - isPublic: space[0].isPublic, - }, - }); - } - - // For private spaces, require authentication - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - // Check if user is owner or has access via spaceAccess - const isOwner = space[0].ownerId === user.id; - let canEdit = isOwner; - if (!isOwner) { - const spaceAccessCheck = await db - .select() - .from(spaceAccess) - .where( - and( - eq(spaceAccess.spaceId, space[0].id), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "accepted") - ) - ) - .limit(1); - - if (spaceAccessCheck.length === 0) { - return c.json({ error: "Access denied" }, 403); - } - - canEdit = spaceAccessCheck[0].accessType === "edit"; - } - - return c.json({ - ...space[0], - permissions: { - canRead: true, - canEdit, - isOwner: space[0].ownerId === user.id, - isPublic: space[0].isPublic, - }, - }); - }) - .post( - "/create", - zValidator( - "json", - z.object({ - spaceName: z.string().min(1, "Space name cannot be empty").max(100), - isPublic: z.boolean(), // keep this explicit please - }) - ), - async (c) => { - const body = c.req.valid("json"); - const user = c.get("user"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - if (body.spaceName.trim() === "<HOME>") { - return c.json({ error: "Cannot create space with name <HOME>" }, 400); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - const uuid = randomId(); - - try { - const space = await db - .insert(spaces) - .values({ - name: body.spaceName.trim(), - ownerId: user.id, - uuid, - isPublic: body.isPublic, - createdAt: new Date(), - }) - .returning(); - - return c.json({ - message: "Space created successfully", - space: { - uuid: space[0].uuid, - name: space[0].name, - ownerId: space[0].ownerId, - isPublic: space[0].isPublic, - createdAt: space[0].createdAt, - }, - }); - } catch (error) { - console.error("[Space Creation Error]", error); - return c.json({ error: "Failed to create space" }, 500); - } - } - ) - .post( - "/:spaceId/favorite", - zValidator( - "param", - z.object({ - spaceId: z.string(), - }) - ), - async (c) => { - const user = c.get("user"); - const { spaceId } = c.req.valid("param"); - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - // Get space details - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (!space[0]) { - return c.json({ error: "Space not found" }, 404); - } - - // Check if it's user's own space - if (space[0].ownerId === user.id) { - return c.json({ error: "Cannot favorite your own space" }, 400); - } - - try { - await db.insert(savedSpaces).values({ - userId: user.id, - spaceId: space[0].id, - savedAt: new Date(), - }); - - return c.json({ message: "Space favorited successfully" }); - } catch (error) { - if ( - error instanceof Error && - error.message.includes("saved_spaces_user_space_idx") - ) { - // Space is already favorited - return c.json({ message: "Space already favorited" }); - } - throw error; - } - } - ) - .post( - "/moveContent", - zValidator( - "json", - z.object({ - spaceId: z.string(), - documentId: z.string(), - }) - ), - async (c) => { - const body = c.req.valid("json"); - const user = c.get("user"); - const { spaceId, documentId } = body; - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - try { - await db.transaction(async (tx) => { - // If moving to <HOME>, just remove all space connections - if (spaceId === "<HOME>") { - const doc = await tx - .select() - .from(documents) - .where(eq(documents.uuid, documentId)) - .limit(1); - - if (!doc[0]) { - return c.json({ error: "Document not found" }, 404); - } - - await tx - .delete(contentToSpace) - .where(eq(contentToSpace.contentId, doc[0].id)); - return; - } - - // Get space and document, verify space ownership - const results = ( - await tx - .select({ - spaceId: spaces.id, - documentId: documents.id, - ownerId: spaces.ownerId, - spaceName: spaces.name, - }) - .from(spaces) - .innerJoin( - documents, - and(eq(spaces.uuid, spaceId), eq(documents.uuid, documentId)) - ) - .limit(1) - )[0]; - - if (!results) { - return c.json({ error: "Space or document not found" }, 404); - } - - if (results.ownerId !== user.id) { - return c.json( - { error: "Not authorized to modify this space" }, - 403 - ); - } - - // Delete existing space relations for this document - await tx - .delete(contentToSpace) - .where(eq(contentToSpace.contentId, results.documentId)); - - // Add new space relation - await tx.insert(contentToSpace).values({ - contentId: results.documentId, - spaceId: results.spaceId, - }); - }); - - return c.json({ success: true, spaceId }); - } catch (e) { - console.error("Failed to move content to space:", e); - return c.json( - { - error: "Failed to move content to space", - details: e instanceof Error ? e.message : "Unknown error", - }, - 500 - ); - } - } - ) - .post( - "/addContent", - zValidator( - "json", - z.object({ - spaceId: z.string(), - documentId: z.string(), - }) - ), - async (c) => { - const body = c.req.valid("json"); - const user = c.get("user"); - const { spaceId, documentId } = body; - - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - try { - await db.transaction(async (tx) => { - // If adding to <HOME>, just remove all space connections - if (spaceId === "<HOME>") { - const doc = await tx - .select() - .from(documents) - .where(eq(documents.uuid, documentId)) - .limit(1); - - if (!doc[0]) { - return c.json({ error: "Document not found" }, 404); - } - - await tx - .delete(contentToSpace) - .where(eq(contentToSpace.contentId, doc[0].id)); - return; - } - - // Get space and document, verify space ownership - const results = ( - await tx - .select({ - spaceId: spaces.id, - documentId: documents.id, - ownerId: spaces.ownerId, - }) - .from(spaces) - .innerJoin( - documents, - and(eq(spaces.uuid, spaceId), eq(documents.uuid, documentId)) - ) - .limit(1) - )[0]; - - if (!results) { - return c.json({ error: "Space or document not found" }, 404); - } - - if (results.ownerId !== user.id) { - return c.json( - { error: "Not authorized to modify this space" }, - 403 - ); - } - - // Check if mapping already exists to avoid duplicates - const existing = await tx - .select() - .from(contentToSpace) - .where( - and( - eq(contentToSpace.contentId, results.documentId), - eq(contentToSpace.spaceId, results.spaceId) - ) - ) - .limit(1); - - if (existing.length > 0) { - return c.json({ error: "Content already exists in space" }, 409); - } - - await tx.insert(contentToSpace).values({ - contentId: results.documentId, - spaceId: results.spaceId, - }); - }); - - return c.json({ success: true }); - } catch (e) { - console.error("Failed to add content to space:", e); - return c.json( - { - error: "Failed to add content to space", - details: e instanceof Error ? e.message : "Unknown error", - }, - 500 - ); - } - } - ) - .post( - "/:spaceId/invite", - zValidator( - "json", - z.object({ - email: z.string().email("Invalid email address"), - accessType: z.enum(["read", "edit"], { - errorMap: () => ({ - message: "Access type must be either 'read' or 'edit'", - }), - }), - }) - ), - async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const { spaceId } = c.req.param(); - const { email, accessType } = c.req.valid("json"); - - const db = database(c.env.HYPERDRIVE.connectionString); - - // Check if space exists and user has permission to invite - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (space.length === 0) { - return c.json({ error: "Space not found" }, 404); - } - - // Only space owner can invite others - if (space[0].ownerId !== user.id) { - return c.json({ error: "Only space owner can invite users" }, 403); - } - - // Check if invite already exists - const existingInvite = await db - .select() - .from(spaceAccess) - .where( - and( - eq(spaceAccess.spaceId, space[0].id), - eq(spaceAccess.userEmail, email) - ) - ) - .limit(1); - - if (existingInvite.length > 0) { - return c.json( - { error: "User already has access or pending invite" }, - 400 - ); - } - - // Create invite - await db.insert(spaceAccess).values({ - spaceId: space[0].id, - userEmail: email, - accessType, - status: "pending", - }); - - // TODO: send email to the user - - return c.json({ success: true }); - } - ) - .get( - "/:spaceId/invitation", - zValidator("param", z.object({ spaceId: z.string() })), - async (c) => { - const { spaceId } = c.req.valid("param"); - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (space.length === 0) { - console.log("Space not found", spaceId); - return c.json({ error: "Space not found" }, 401); - } - - // Get pending invitation with access type - const invitation = await db - .select() - .from(spaceAccess) - .where( - and( - eq(spaceAccess.spaceId, space[0].id), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "pending") - ) - ) - .limit(1); - - if (invitation.length === 0) { - return c.json({ error: "No pending invitation found" }, 403); - } - - return c.json({ - space: space[0], - accessType: invitation[0].accessType, - }); - } - ) - .post( - "/invites/:action", - zValidator( - "json", - z.object({ - spaceId: z.string().min(5, "Invalid space ID format"), - }) - ), - async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - const { action } = c.req.param(); - if (action !== "accept" && action !== "reject") { - return c.json({ error: "Invalid action" }, 400); - } - - const { spaceId } = c.req.valid("json"); - console.log("space ID", spaceId); - - // Get space - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (space.length === 0) { - return c.json({ error: "Space not found" }, 404); - } - - // Update invite status - const updateResult = await db - .update(spaceAccess) - .set({ status: action === "accept" ? "accepted" : "rejected" }) - .where( - and( - eq(spaceAccess.spaceId, space[0].id), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "pending") - ) - ); - - if (updateResult.length === 0) { - return c.json({ error: "No pending invite found" }, 404); - } - - return c.json({ success: true }); - } - ) - .patch( - "/:spaceId", - zValidator( - "json", - z.object({ - name: z.string().min(1, "Space name cannot be empty").max(100), - }) - ), - async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const { spaceId } = c.req.param(); - const { name } = c.req.valid("json"); - const db = database(c.env.HYPERDRIVE.connectionString); - - // Get space and verify ownership - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (space.length === 0) { - return c.json({ error: "Space not found" }, 404); - } - - if (space[0].ownerId !== user.id) { - return c.json({ error: "Only space owner can edit space name" }, 403); - } - - if (name.trim() === "<HOME>") { - return c.json({ error: "Cannot use reserved name <HOME>" }, 400); - } - - // Update space name - await db - .update(spaces) - .set({ name: name.trim() }) - .where(eq(spaces.uuid, spaceId)); - - return c.json({ success: true, name: name.trim() }); - } - ) - .delete("/:spaceId", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const { spaceId } = c.req.param(); - const db = database(c.env.HYPERDRIVE.connectionString); - - const space = await db - .select() - .from(spaces) - .where(eq(spaces.uuid, spaceId)) - .limit(1); - - if (space.length === 0) { - return c.json({ error: "Space not found" }, 404); - } - - await db.delete(spaces).where(eq(spaces.uuid, spaceId)); - - return c.json({ success: true }); - }); - -export default spacesRoute; diff --git a/apps/backend/src/routes/user.ts b/apps/backend/src/routes/user.ts deleted file mode 100644 index 4905989f..00000000 --- a/apps/backend/src/routes/user.ts +++ /dev/null @@ -1,193 +0,0 @@ -import { Hono } from "hono"; -import { Env, Variables } from "../types"; -import { and, database, desc, eq, isNotNull, or, sql } from "@supermemory/db"; -import { - chatThreads, - savedSpaces, - spaceAccess, - spaces, - users, -} from "@supermemory/db/schema"; -import { decryptApiKey, getApiKey } from "../auth"; -import { DurableObjectStore } from "@hono-rate-limiter/cloudflare"; -import { rateLimiter } from "hono-rate-limiter"; -import { fromHono } from "chanfana"; - -const user = fromHono(new Hono<{ Variables: Variables; Bindings: Env }>()) - .get("/", (c) => { - return c.json(c.get("user")); - }) - .get("/spaces", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - const [allSpaces, savedSpacesList, spaceOwners] = await Promise.all([ - db - .select({ - id: spaces.id, - uuid: spaces.uuid, - name: sql<string>`REGEXP_REPLACE(${spaces.name}, E'[\\n\\r]+', ' ', 'g')`.as( - "name" - ), - ownerId: spaces.ownerId, - isPublic: spaces.isPublic, - createdAt: spaces.createdAt, - accessType: spaceAccess.accessType, - }) - .from(spaces) - .leftJoin( - spaceAccess, - and( - eq(spaces.id, spaceAccess.spaceId), - eq(spaceAccess.userEmail, user.email), - eq(spaceAccess.status, "accepted") - ) - ) - .where(or(eq(spaces.ownerId, user.id), isNotNull(spaceAccess.spaceId))) - .orderBy(desc(spaces.createdAt)), - - db - .select({ - spaceId: savedSpaces.spaceId, - }) - .from(savedSpaces) - .where(eq(savedSpaces.userId, user.id)), - - db - .select({ - id: users.id, - uuid: users.uuid, - name: users.firstName, - email: users.email, - profileImage: users.profilePictureUrl, - }) - .from(users) - .innerJoin(spaces, eq(spaces.ownerId, users.id)), - ]); - - const savedSpaceIds = new Set(savedSpacesList.map((s) => s.spaceId)); - const ownerMap = new Map(spaceOwners.map((owner) => [owner.id, owner])); - - const spacesWithDetails = allSpaces.map((space) => { - const isOwner = space.ownerId === user.id; - const owner = ownerMap.get(space.ownerId); - - return { - ...space, - favorited: savedSpaceIds.has(space.id), - permissions: { - canRead: space.isPublic || isOwner || space.accessType != null, - canEdit: isOwner || space.accessType === "edit", - isOwner, - }, - owner: isOwner - ? null - : { - id: owner?.uuid, - name: owner?.name, - email: owner?.email, - profileImage: owner?.profileImage, - }, - }; - }); - - return c.json({ spaces: spacesWithDetails }); - }) - .get("/history", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const history = await database(c.env.HYPERDRIVE.connectionString) - .select() - .from(chatThreads) - .where(eq(chatThreads.userId, user.id)) - .orderBy(desc(chatThreads.createdAt)) - .limit(10); - - return c.json({ history }); - }) - .get("/invitations", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - const invitations = await db - .select({ - spaceAccess: spaceAccess, - spaceUuid: spaces.uuid, - spaceName: spaces.name, - }) - .from(spaceAccess) - .innerJoin(spaces, eq(spaceAccess.spaceId, spaces.id)) - .where(eq(spaceAccess.userEmail, user.email)) - .limit(100); - - return c.json({ invitations }); - }) - .get("/key", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - // we need user.id and user.lastApiKeyGeneratedAt - const lastApiKeyGeneratedAt = user.lastApiKeyGeneratedAt?.getTime(); - if (!lastApiKeyGeneratedAt) { - return c.json({ error: "No API key generated" }, 400); - } - - const key = await getApiKey(user.uuid, lastApiKeyGeneratedAt.toString(), c); - - const decrypted = await decryptApiKey(key, c); - return c.json({ key, decrypted }); - }) - .post("/update", async (c) => { - const user = c.get("user"); - if (!user) { - return c.json({ error: "Unauthorized" }, 401); - } - - const body = await c.req.json(); - - // Only allow updating specific safe fields - const allowedFields = { - firstName: true, - lastName: true, - profilePictureUrl: true, - hasOnboarded: true, - }; - - const updateData: Record<string, unknown> = {}; - for (const [key, value] of Object.entries(body)) { - if (allowedFields[key as keyof typeof allowedFields]) { - updateData[key] = value; - } - } - - if (Object.keys(updateData).length === 0) { - return c.json({ error: "No valid fields to update" }, 400); - } - - const db = database(c.env.HYPERDRIVE.connectionString); - - await db - .update(users) - .set({ - ...updateData, - updatedAt: new Date(), - }) - .where(eq(users.id, user.id)); - - return c.json({ success: true }); - }); - -export default user; diff --git a/apps/backend/src/types.ts b/apps/backend/src/types.ts deleted file mode 100644 index 0323a9e6..00000000 --- a/apps/backend/src/types.ts +++ /dev/null @@ -1,79 +0,0 @@ -import { DurableObjectRateLimiter } from "@hono-rate-limiter/cloudflare"; -import { Session } from "@supermemory/authkit-remix-cloudflare/src/interfaces"; -import { User } from "@supermemory/db/schema"; -import { z } from "zod"; - -export type Variables = { - user: User | null; - session: Session | null; -}; - -export type WorkflowParams = { - userId: number; - content: string; - spaces?: string[]; - type: string; - uuid: string; - url?: string; - prefetched?: { - contentToVectorize: string; - contentToSave: string; - title: string; - type: string; - description: string; - ogImage: string; - }; - createdAt: string; -}; - -export type Env = { - WORKOS_API_KEY: string; - WORKOS_CLIENT_ID: string; - WORKOS_COOKIE_PASSWORD: string; - DATABASE_URL: string; - CONTENT_WORKFLOW: Workflow; - GEMINI_API_KEY: string; - NODE_ENV: string; - OPEN_AI_API_KEY: string; - BRAINTRUST_API_KEY: string; - RESEND_API_KEY: string; - TURNSTILE_SECRET_KEY: string; - - MD_CACHE: KVNamespace; - HYPERDRIVE: Hyperdrive; - EMAIL_LIMITER: { - limit: (params: { key: string }) => Promise<{ success: boolean }>; - }; - ENCRYPTED_TOKENS: KVNamespace; - RATE_LIMITER: DurableObjectNamespace<DurableObjectRateLimiter>; - AI: Ai -}; - -export type JobData = { - content: string; - spaces?: Array<string>; - user: number; - type: string; -}; - -type BaseChunks = { - type: "tweet" | "page" | "note" | "image"; -}; - -export type PageOrNoteChunks = BaseChunks & { - type: "page" | "note"; - chunks: string[]; -}; - -export type Metadata = { - media?: Array<string>; - links?: Array<string>; // idk how ideal this is will figure out after plate js thing -}; - -export type SpaceStatus = { - type: "inviting" | "invited" | "pending" | "accepted"; -}; - -export const recommendedQuestionsSchema = z - .array(z.string().max(200)) - .length(10); diff --git a/apps/backend/src/utils/chunkers.ts b/apps/backend/src/utils/chunkers.ts deleted file mode 100644 index ce345d29..00000000 --- a/apps/backend/src/utils/chunkers.ts +++ /dev/null @@ -1,116 +0,0 @@ -import nlp from "compromise"; - -export default function chunkText( - text: string, - maxChunkSize: number, - overlap: number = 0.2 -): string[] { - // Pre-process text to remove excessive whitespace - text = text.replace(/\s+/g, " ").trim(); - - const sentences = nlp(text).sentences().out("array"); - const chunks: { - text: string; - start: number; - end: number; - metadata?: { - position: string; - context?: string; - }; - }[] = []; - - let currentChunk: string[] = []; - let currentSize = 0; - - for (let i = 0; i < sentences.length; i++) { - const sentence = sentences[i].trim(); - - // Skip empty sentences - if (!sentence) continue; - - // If a single sentence is longer than maxChunkSize, split it - if (sentence.length > maxChunkSize) { - if (currentChunk.length > 0) { - chunks.push({ - text: currentChunk.join(" "), - start: i - currentChunk.length, - end: i - 1, - metadata: { - position: `${i - currentChunk.length}-${i - 1}`, - context: currentChunk[0].substring(0, 100), // First 100 chars for context - }, - }); - currentChunk = []; - currentSize = 0; - } - - // Split long sentence into smaller chunks - const words = sentence.split(" "); - let tempChunk: string[] = []; - - for (const word of words) { - if (tempChunk.join(" ").length + word.length > maxChunkSize) { - chunks.push({ - text: tempChunk.join(" "), - start: i, - end: i, - metadata: { - position: `${i}`, - context: "Split sentence", - }, - }); - tempChunk = []; - } - tempChunk.push(word); - } - - if (tempChunk.length > 0) { - chunks.push({ - text: tempChunk.join(" "), - start: i, - end: i, - metadata: { - position: `${i}`, - context: "Split sentence remainder", - }, - }); - } - continue; - } - - currentChunk.push(sentence); - currentSize += sentence.length; - - if (currentSize >= maxChunkSize) { - const overlapSize = Math.floor(currentChunk.length * overlap); - chunks.push({ - text: currentChunk.join(" "), - start: i - currentChunk.length + 1, - end: i, - metadata: { - position: `${i - currentChunk.length + 1}-${i}`, - context: currentChunk[0].substring(0, 100), - }, - }); - - // Keep overlap sentences for next chunk - currentChunk = currentChunk.slice(-overlapSize); - currentSize = currentChunk.reduce((sum, s) => sum + s.length, 0); - } - } - - // Handle remaining sentences - if (currentChunk.length > 0) { - chunks.push({ - text: currentChunk.join(" "), - start: sentences.length - currentChunk.length, - end: sentences.length - 1, - metadata: { - position: `${sentences.length - currentChunk.length}-${sentences.length - 1}`, - context: currentChunk[0].substring(0, 100), - }, - }); - } - - return chunks.map((chunk) => chunk.text); -} diff --git a/apps/backend/src/utils/cipher.ts b/apps/backend/src/utils/cipher.ts deleted file mode 100644 index 3ba2e905..00000000 --- a/apps/backend/src/utils/cipher.ts +++ /dev/null @@ -1,79 +0,0 @@ -async function encrypt(data: string, key: string): Promise<string> { - try { - const encoder = new TextEncoder(); - const encodedData = encoder.encode(data); - - const baseForIv = encoder.encode(data + key); - const ivHash = await crypto.subtle.digest('SHA-256', baseForIv); - const iv = new Uint8Array(ivHash).slice(0, 12); - - const cryptoKey = await crypto.subtle.importKey( - "raw", - encoder.encode(key), - { name: "AES-GCM", length: 256 }, - false, - ["encrypt", "decrypt"] - ); - - const encrypted = await crypto.subtle.encrypt( - { name: "AES-GCM", iv: new Uint8Array(iv).buffer as ArrayBuffer }, - cryptoKey, - encodedData - ); - - const combined = new Uint8Array([...iv, ...new Uint8Array(encrypted)]); - - // Convert to base64 safely - const base64 = Buffer.from(combined).toString("base64"); - - // Make URL-safe - return base64.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, ""); - } catch (err) { - console.error("Encryption error:", err); - throw err; - } -} - -async function decrypt(encryptedData: string, key: string): Promise<string> { - try { - // Restore base64 padding and convert URL-safe chars - const base64 = encryptedData - .replace(/-/g, "+") - .replace(/_/g, "/") - .padEnd( - encryptedData.length + ((4 - (encryptedData.length % 4)) % 4), - "=" - ); - - // Use Buffer for safer base64 decoding - const combined = Buffer.from(base64, "base64"); - const combinedArray = new Uint8Array(combined); - - // Extract the IV that was used for encryption - const iv = combinedArray.slice(0, 12); - const encrypted = combinedArray.slice(12); - - // Import the same key used for encryption - const cryptoKey = await crypto.subtle.importKey( - "raw", - new TextEncoder().encode(key), - { name: "AES-GCM", length: 256 }, - false, - ["encrypt", "decrypt"] - ); - - // Use the extracted IV and key to decrypt - const decrypted = await crypto.subtle.decrypt( - { name: "AES-GCM", iv: new Uint8Array(iv).buffer as ArrayBuffer }, - cryptoKey, - encrypted.buffer as ArrayBuffer - ); - - return new TextDecoder().decode(decrypted); - } catch (err) { - console.error("Decryption error:", err); - throw err; - } -} - -export { encrypt, decrypt };
\ No newline at end of file diff --git a/apps/backend/src/utils/extractDocumentContent.ts b/apps/backend/src/utils/extractDocumentContent.ts deleted file mode 100644 index 8b7d9256..00000000 --- a/apps/backend/src/utils/extractDocumentContent.ts +++ /dev/null @@ -1,87 +0,0 @@ -import * as mammoth from "mammoth"; -import { NonRetryableError } from "cloudflare:workflows"; -import { resolvePDFJS } from 'pdfjs-serverless'; - -interface DocumentContent { - content: string; - error?: string; -} - -export const extractDocumentContent = async ( - url: string -): Promise<DocumentContent> => { - try { - const fileExtension = url.split(".").pop()?.toLowerCase(); - - if (!fileExtension) { - throw new Error("Invalid file URL"); - } - - console.log("file", fileExtension); - - switch (fileExtension) { - case "pdf": - return await extractPdfContent(url); - case "md": - case "txt": - return await extractTextContent(url); - case "doc": - case "docx": - return await extractWordContent(url); - default: - throw new NonRetryableError(`Unsupported file type: ${fileExtension}`); - } - } catch (error) { - return { - content: "", - error: error instanceof Error ? error.message : "Unknown error occurred", - }; - } -}; - -async function extractPdfContent(url: string): Promise<DocumentContent> { - try { - const response = await fetch(url); - const arrayBuffer = await response.arrayBuffer(); - - // Initialize PDF.js with serverless compatibility - const { getDocument } = await resolvePDFJS(); - - // Load the PDF document - const pdf = await getDocument({ - data: arrayBuffer, - useSystemFonts: true, - }).promise; - - let fullText = ""; - - // Extract text from each page - for (let i = 1; i <= pdf.numPages; i++) { - const page = await pdf.getPage(i); - const textContent = await page.getTextContent(); - const pageText = textContent.items.map((item: any) => item.str).join(" "); - fullText += pageText + "\n"; - } - - return { content: fullText }; - } catch (error) { - console.error("Error extracting PDF content:", error); - return { - content: "", - error: error instanceof Error ? error.message : "Failed to extract PDF content", - }; - } -} - -async function extractTextContent(url: string): Promise<DocumentContent> { - const response = await fetch(url); - const text = await response.text(); - return { content: text }; -} - -async function extractWordContent(url: string): Promise<DocumentContent> { - const response = await fetch(url); - const arrayBuffer = await response.arrayBuffer(); - const result = await mammoth.extractRawText({ arrayBuffer }); - return { content: result.value }; -} diff --git a/apps/backend/src/utils/extractor.ts b/apps/backend/src/utils/extractor.ts deleted file mode 100644 index f033f8e1..00000000 --- a/apps/backend/src/utils/extractor.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { Env } from "../types"; - -export const extractPageContent = async (content: string, env: Env) => { - const resp = await fetch(`https://r.jina.ai/${content}`); - - if (!resp.ok) { - throw new Error( - `Failed to fetch ${content}: ${resp.statusText}` + (await resp.text()) - ); - } - - const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}`); - - if (!metadataResp.ok) { - throw new Error( - `Failed to fetch metadata for ${content}: ${metadataResp.statusText}` + - (await metadataResp.text()) - ); - } - - const metadata = await metadataResp.json() as { - title?: string; - description?: string; - image?: string; - favicon?: string; - }; - - const responseText = await resp.text(); - - try { - const json: { - contentToVectorize: string; - contentToSave: string; - title?: string; - description?: string; - image?: string; - favicon?: string; - } = { - contentToSave: responseText, - contentToVectorize: responseText, - title: metadata.title, - description: metadata.description, - image: metadata.image, - favicon: metadata.favicon, - }; - return json; - } catch (e) { - throw new Error(`Failed to parse JSON from ${content}: ${e}`); - } -}; diff --git a/apps/backend/src/utils/fetchers.ts b/apps/backend/src/utils/fetchers.ts deleted file mode 100644 index 2329f48a..00000000 --- a/apps/backend/src/utils/fetchers.ts +++ /dev/null @@ -1,143 +0,0 @@ -import { WorkflowStep } from "cloudflare:workers"; -import { isErr, Ok } from "../errors/results"; -import { typeDecider } from "./typeDecider"; -import { Env, WorkflowParams } from "../types"; -import { unrollTweets } from "./tweetsToThreads"; -import { Tweet } from "react-tweet/api"; -import { NonRetryableError } from "cloudflare:workflows"; -import { extractPageContent } from "./extractor"; -import { extractDocumentContent } from "./extractDocumentContent"; - -export const fetchContent = async ( - params: WorkflowParams, - env: Env, - step: WorkflowStep -) => { - const type = typeDecider(params.content); - - if (isErr(type)) { - throw type.error; - } - - switch (type.value) { - case "page": - const pageContent = await step?.do( - "extract page content", - async () => await extractPageContent(params.content, env) - ); - return { - ...pageContent, - type: "page", - }; - - case "tweet": - const tweetUrl = new URL(params.content); - tweetUrl.search = ""; // Remove all search params - const tweetId = tweetUrl.pathname.split("/").pop(); - - const rawBaseTweetContent = await step.do( - "extract tweet content", - async () => { - const url = `https://cdn.syndication.twimg.com/tweet-result?id=${tweetId}&lang=en&features=tfw_timeline_list%3A%3Btfw_follower_count_sunset%3Atrue%3Btfw_tweet_edit_backend%3Aon%3Btfw_refsrc_session%3Aon%3Btfw_fosnr_soft_interventions_enabled%3Aon%3Btfw_show_birdwatch_pivots_enabled%3Aon%3Btfw_show_business_verified_badge%3Aon%3Btfw_duplicate_scribes_to_settings%3Aon%3Btfw_use_profile_image_shape_enabled%3Aon%3Btfw_show_blue_verified_badge%3Aon%3Btfw_legacy_timeline_sunset%3Atrue%3Btfw_show_gov_verified_badge%3Aon%3Btfw_show_business_affiliate_badge%3Aon%3Btfw_tweet_edit_frontend%3Aon&token=4c2mmul6mnh`; - - const resp = await fetch(url, { - headers: { - "User-Agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", - Accept: "application/json", - "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate, br", - Connection: "keep-alive", - "Upgrade-Insecure-Requests": "1", - "Cache-Control": "max-age=0", - TE: "Trailers", - }, - }); - - const data = (await resp.json()) as Tweet; - return data; - } - ); - - let tweetContent: { - text: string; - metadata: { - media?: string[] | undefined; - links?: string[] | undefined; - }; - raw: string; - }; - const unrolledTweetContent = { - value: [rawBaseTweetContent], - }; - if (true) { - console.error("Can't get thread, reverting back to single tweet"); - tweetContent = { - text: rawBaseTweetContent.text, - metadata: { - media: [ - ...(rawBaseTweetContent.photos?.map((url) => url.expandedUrl) ?? - []), - ...(rawBaseTweetContent.video?.variants[0].src ?? []), - ], - }, - raw: `<raw>${JSON.stringify(rawBaseTweetContent)}</raw>`, - }; - } else { - tweetContent = { - text: unrolledTweetContent.value - .map((tweet) => tweet.text) - .join("\n"), - metadata: { - media: unrolledTweetContent.value.flatMap((tweet) => [ - ...tweet.videos, - ...tweet.images, - ]), - links: unrolledTweetContent.value.flatMap((tweet) => tweet.links), - }, - raw: `<raw>${JSON.stringify(rawBaseTweetContent)}</raw>`, - }; - } - - // make it the same type as the page content - const pageContentType: Awaited<ReturnType<typeof extractPageContent>> & { - type: string; - } = { - contentToVectorize: - tweetContent.text + - "\n\nMetadata for this tweet:\n" + - JSON.stringify(tweetContent.metadata) + - "\n\nRaw tweet data:\n" + - tweetContent.raw, - contentToSave: tweetContent.raw, - title: "", - description: JSON.stringify(tweetContent.metadata), - image: "", - favicon: "", - type: "tweet", - }; - return pageContentType; - case "note": - const noteContent = { - contentToVectorize: params.content, - // TODO: different when using platejs - contentToSave: params.content, - // title is the first 30 characters of the first line - title: params.content.split("\n")[0].slice(0, 30), - type: "note", - }; - return noteContent; - case "document": - const documentContent = await step.do( - "extract document content", - async () => await extractDocumentContent(params.content) - ); - return { - contentToVectorize: documentContent.content, - contentToSave: documentContent.content, - type: "document", - }; - default: - throw new NonRetryableError("Unknown content type"); - } -}; diff --git a/apps/backend/src/utils/notion.ts b/apps/backend/src/utils/notion.ts deleted file mode 100644 index ebe559e1..00000000 --- a/apps/backend/src/utils/notion.ts +++ /dev/null @@ -1,239 +0,0 @@ -interface PageContent { - content: string; - url: string; - title: string; - id: string; - createdAt: string; -} - -interface NotionBlock { - type: string; - [key: string]: any; -} - -interface SearchResponse { - results: { - id: string; - object: string; - url: string; - created_time: string; - properties: { - title?: { - title: Array<{ - plain_text: string; - }>; - }; - Name?: { - title: Array<{ - plain_text: string; - }>; - }; - }; - }[]; - next_cursor: string | undefined; - has_more: boolean; -} - -interface BlockResponse { - results: NotionBlock[]; - next_cursor: string | undefined; - has_more: boolean; -} - -export const getAllNotionPageContents = async ( - token: string, - onProgress: (progress: number) => Promise<void> -): Promise<PageContent[]> => { - const pages: PageContent[] = []; - const NOTION_API_VERSION = "2022-06-28"; - const BASE_URL = "https://api.notion.com/v1"; - const MAX_RETRIES = 3; - const BATCH_SIZE = 10; // Number of concurrent requests - const PAGE_SIZE = 100; // Number of pages to fetch per search request - - const delay = (ms: number) => - new Promise((resolve) => setTimeout(resolve, ms)); - - const notionFetch = async ( - endpoint: string, - options: RequestInit = {}, - retries = 0 - ): Promise<any> => { - try { - const response = await fetch(`${BASE_URL}${endpoint}`, { - ...options, - headers: { - Authorization: `Bearer ${token}`, - "Notion-Version": NOTION_API_VERSION, - "Content-Type": "application/json", - ...((options.headers || {}) as Record<string, string>), - }, - }); - - if (response.status === 429) { - // Rate limit error - const retryAfter = parseInt(response.headers.get("Retry-After") || "5"); - if (retries < MAX_RETRIES) { - await delay(retryAfter * 1000); - return notionFetch(endpoint, options, retries + 1); - } - } - - if (!response.ok) { - const errorText = await response.text(); - throw new Error( - `Notion API error: ${response.statusText}\n${errorText}` - ); - } - - return response.json(); - } catch (error) { - if (retries < MAX_RETRIES) { - await delay(2000 * (retries + 1)); // Exponential backoff - return notionFetch(endpoint, options, retries + 1); - } - throw error; - } - }; - - const convertBlockToMarkdown = (block: NotionBlock): string => { - switch (block.type) { - case "paragraph": - return ( - block.paragraph?.rich_text - ?.map((text: any) => text.plain_text) - .join("") || "" - ); - case "heading_1": - return `# ${block.heading_1?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - case "heading_2": - return `## ${block.heading_2?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - case "heading_3": - return `### ${block.heading_3?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - case "bulleted_list_item": - return `* ${block.bulleted_list_item?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - case "numbered_list_item": - return `1. ${block.numbered_list_item?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - case "to_do": - const checked = block.to_do?.checked ? "x" : " "; - return `- [${checked}] ${block.to_do?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - case "code": - return `\`\`\`${block.code?.language || ""}\n${block.code?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n\`\`\`\n`; - case "quote": - return `> ${block.quote?.rich_text - ?.map((text: any) => text.plain_text) - .join("")}\n`; - default: - return ""; - } - }; - - const getAllBlocks = async (pageId: string): Promise<NotionBlock[]> => { - const blocks: NotionBlock[] = []; - let cursor: string | undefined = undefined; - - do { - const endpoint = `/blocks/${pageId}/children${ - cursor ? `?start_cursor=${cursor}` : "" - }`; - const response = (await notionFetch(endpoint)) as BlockResponse; - blocks.push(...response.results); - cursor = response.next_cursor; - } while (cursor); - - return blocks; - }; - - try { - let hasMore = true; - let cursor: string | undefined = undefined; - let allPages: SearchResponse["results"] = []; - - // First, collect all pages - while (hasMore) { - const searchResponse = (await notionFetch("/search", { - method: "POST", - body: JSON.stringify({ - filter: { - value: "page", - property: "object", - }, - sort: { - direction: "ascending", - timestamp: "last_edited_time", - }, - start_cursor: cursor, - page_size: PAGE_SIZE, - }), - })) as SearchResponse; - - allPages = [...allPages, ...searchResponse.results]; - cursor = searchResponse.next_cursor; - hasMore = searchResponse.has_more; - - // Report progress for page collection (0-30%) - const progressPercent = (allPages.length / (allPages.length + searchResponse.results.length)) * 30; - await onProgress(progressPercent); - } - - // Process pages in parallel batches - for (let i = 0; i < allPages.length; i += BATCH_SIZE) { - const batch = allPages.slice(i, i + BATCH_SIZE); - const batchResults = await Promise.all( - batch.map(async (page) => { - try { - const blocks = await getAllBlocks(page.id); - const pageContent = { - content: blocks.map(convertBlockToMarkdown).join("\n"), - url: page.url || `https://notion.so/${page.id.replace(/-/g, "")}`, - title: - page.properties?.Name?.title?.[0]?.plain_text || - page.properties?.title?.title?.[0]?.plain_text || - "Untitled", - id: page.id, - createdAt: page.created_time, - }; - return pageContent.content.length > 10 ? pageContent : null; - } catch (error) { - console.error(`Error processing page ${page.id}:`, error); - return null; - } - }) - ); - - pages.push( - ...batchResults.filter( - (result): result is PageContent => result !== null - ) - ); - - // Report progress for page processing (30-100%) - const progressPercent = 30 + ((i + BATCH_SIZE) / allPages.length) * 70; - await onProgress(Math.min(progressPercent, 100)); - - // Add a small delay between batches to respect rate limits - if (i + BATCH_SIZE < allPages.length) { - await delay(1000); - } - } - - return pages.filter((page) => page.content.length > 10); - } catch (error) { - console.error("Error fetching Notion pages:", error); - throw error; - } -}; diff --git a/apps/backend/src/utils/tweetsToThreads.ts b/apps/backend/src/utils/tweetsToThreads.ts deleted file mode 100644 index 85f69b87..00000000 --- a/apps/backend/src/utils/tweetsToThreads.ts +++ /dev/null @@ -1,108 +0,0 @@ -import * as cheerio from "cheerio"; -import { BaseError } from "../errors/baseError"; -import { Ok, Result } from "../errors/results"; - -interface Tweet { - id: string; - text: string; - links: Array<string>; - images: Array<string>; - videos: Array<string>; -} - -class ProcessTweetsError extends BaseError { - constructor(message?: string, source?: string) { - super("[Thread Proceessing Error]", message, source); - } -} - -type TweetProcessResult = Array<Tweet>; - -// there won't be a need for url caching right? -export async function unrollTweets( - url: string -): Promise<Result<TweetProcessResult, ProcessTweetsError>> { - const tweetId = url.split("/").pop(); - const response = await fetch(`https://unrollnow.com/status/${tweetId}`, { - headers: { - "User-Agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", - "Cache-Control": "max-age=3600", - }, - }); - - if (!response.ok) { - const error = await response.text(); - console.error(error); - throw new Error(`HTTP error! status: ${response.status} - ${error}`); - } - - const html = await response.text(); - const $ = cheerio.load(html); - const tweets: Array<Tweet> = []; - - const urlRegex = /(https?:\/\/\S+)/g; - const paragraphs = $(".mainarticle p").toArray(); - - const processedTweets = await Promise.all( - paragraphs.map(async (element, i) => { - const $tweet = $(element); - let tweetText = $tweet.text().trim(); - if (tweetText.length < 1) { - return null; - } - - if (i === paragraphs.length - 1 && tweetText.toLowerCase() === "yes") { - return null; - } - - const shortUrls = tweetText.match(urlRegex) || []; - console.log("SHORT_URLS_LEN", shortUrls.length); - console.log("SHORT_URLS", shortUrls); - - const expandedUrls = await Promise.all(shortUrls.map(expandShortUrl)); - - tweetText = tweetText.replace(urlRegex, "").trim().replace(/\s+/g, " "); - - const images = $tweet - .nextUntil("p") - .find("img.tweetimg") - .map((i, img) => $(img).attr("src")) - .get(); - - const videos = $tweet - .nextUntil("p") - .find("video > source") - .map((i, vid) => $(vid).attr("src")) - .get(); - - return { - id: `${tweetId}_${i}`, - text: tweetText, - links: expandedUrls, - images: images, - videos: videos, - }; - }) - ); - - tweets.push( - ...processedTweets.filter((tweet): tweet is Tweet => tweet !== null) - ); - - return Ok(tweets); -} - -async function expandShortUrl(shortUrl: string): Promise<string> { - try { - const response = await fetch(shortUrl, { - method: "HEAD", - redirect: "follow", - }); - const expandedUrl = response.url; - return expandedUrl; - } catch (error) { - console.error(`Failed to expand URL: ${shortUrl}`, error); - return shortUrl; - } -} diff --git a/apps/backend/src/utils/typeDecider.ts b/apps/backend/src/utils/typeDecider.ts deleted file mode 100644 index 642b178e..00000000 --- a/apps/backend/src/utils/typeDecider.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { Result, Ok, Err } from "../errors/results"; -import { BaseError } from "../errors/baseError"; - -export type contentType = "page" | "tweet" | "note" | "document" | "notion"; - -class GetTypeError extends BaseError { - constructor(message?: string, source?: string) { - super("[Decide Type Error]", message, source); - } -} -export const typeDecider = ( - content: string -): Result<contentType, GetTypeError> => { - try { - // if the content is a URL, then it's a page. if its a URL with https://x.com/user/status/123, then it's a tweet. - // if it ends with .pdf etc then it's a document. else, it's a note. - // do strict checking with regex - if ( - content.match(/https?:\/\/(x\.com|twitter\.com)\/[\w]+\/[\w]+\/[\d]+/) - ) { - return Ok("tweet"); - } else if (content.match(/\.(pdf|doc|docx|txt|rtf|odt|md)/i)) { - return Ok("document"); - } else if ( - content.match(/https?:\/\/(www\.)?notion\.so\/.*/) - ) { - return Ok("notion"); - } else if ( - content.match( - /^(https?:\/\/)?(www\.)?[a-z0-9]+([-.]{1}[a-z0-9]+)*\.[a-z]{2,5}(\/.*)?$/i - ) - ) { - return Ok("page"); - } else { - return Ok("note"); - } - } catch (e) { - console.error("[Decide Type Error]", e); - return Err(new GetTypeError((e as Error).message, "typeDecider")); - } -}; diff --git a/apps/backend/src/workflow/index.ts b/apps/backend/src/workflow/index.ts deleted file mode 100644 index 8efcfacc..00000000 --- a/apps/backend/src/workflow/index.ts +++ /dev/null @@ -1,217 +0,0 @@ -import { - WorkflowEntrypoint, - WorkflowStep, - WorkflowEvent, -} from "cloudflare:workers"; -import { Env, WorkflowParams } from "../types"; -import { fetchContent } from "../utils/fetchers"; -import chunkText from "../utils/chunkers"; -import { database, eq, inArray } from "@supermemory/db"; -import { - ChunkInsert, - contentToSpace, - documents, - spaces, -} from "@supermemory/db/schema"; -import { embedMany } from "ai"; -import { openai } from "../providers"; -import { chunk } from "@supermemory/db/schema"; -import { NonRetryableError } from "cloudflare:workflows"; - -// TODO: handle errors properly here. - -export class ContentWorkflow extends WorkflowEntrypoint<Env, WorkflowParams> { - async run(event: WorkflowEvent<WorkflowParams>, step: WorkflowStep) { - // Step 0: Check if user has reached memory limit - await step.do("check memory limit", async () => { - const existingMemories = await database( - this.env.HYPERDRIVE.connectionString - ) - .select() - .from(documents) - .where(eq(documents.userId, event.payload.userId)); - - if (existingMemories.length >= 2000) { - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError( - "You have reached the maximum limit of 2000 memories" - ); - } - }); - - // Step 1: Get and format the content. - const rawContent = - event.payload.prefetched ?? - (await step.do( - "fetch content", - async () => await fetchContent(event.payload, this.env, step) - )); - - // check that the rawcontent is not too big - if (rawContent.contentToVectorize.length > 100000) { - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("The content is too big (maximum 20 pages)"); - } - - const chunked = await step.do("chunk content", async () => - chunkText(rawContent.contentToVectorize, 768) - ); - - // Step 2: Create the document in the database. - const document = await step.do("create document", async () => { - try { - // First check if document exists - const existingDoc = await database(this.env.HYPERDRIVE.connectionString) - .select() - .from(documents) - .where(eq(documents.uuid, event.payload.uuid)) - .limit(1); - - return await database(this.env.HYPERDRIVE.connectionString) - .insert(documents) - .values({ - userId: event.payload.userId, - type: event.payload.type, - uuid: event.payload.uuid, - ...(event.payload.url && { url: event.payload.url }), - title: rawContent.title, - content: rawContent.contentToSave, - description: - "description" in rawContent - ? (rawContent.description ?? "") - : (event.payload.prefetched?.description ?? undefined), - ogImage: - "image" in rawContent - ? (rawContent.image ?? "") - : (event.payload.prefetched?.ogImage ?? undefined), - raw: rawContent.contentToVectorize, - isSuccessfullyProcessed: false, - updatedAt: new Date(), - ...(event.payload.createdAt && { - createdAt: new Date(event.payload.createdAt), - }), - }) - .onConflictDoUpdate({ - target: documents.uuid, - set: { - title: rawContent.title, - content: rawContent.contentToSave, - description: - "description" in rawContent - ? (rawContent.description ?? "") - : (event.payload.prefetched?.description ?? undefined), - ogImage: - "image" in rawContent - ? (rawContent.image ?? "") - : (event.payload.prefetched?.ogImage ?? undefined), - raw: rawContent.contentToVectorize, - isSuccessfullyProcessed: false, - updatedAt: new Date(), - }, - }) - .returning(); - } catch (error) { - console.log("here's the error", error); - // Check if error is a unique constraint violation - if ( - error instanceof Error && - error.message.includes("document_url_user_id_idx") - ) { - // Document already exists for this user, stop workflow - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("Document already exists for this user"); - } - if ( - error instanceof Error && - error.message.includes("document_raw_user_idx") - ) { - await database(this.env.HYPERDRIVE.connectionString) - .delete(documents) - .where(eq(documents.uuid, event.payload.uuid)); - throw new NonRetryableError("The exact same document already exists"); - } - throw error; // Re-throw other errors - } - }); - - if (!document || document.length === 0) { - throw new Error( - "Failed to create/update document - no document returned" - ); - } - - // Step 3: Generate embeddings - const { data: embeddings } = await this.env.AI.run( - "@cf/baai/bge-base-en-v1.5", - { - text: chunked, - } - ); - - // Step 4: Prepare chunk data - const chunkInsertData: ChunkInsert[] = await step.do( - "prepare chunk data", - async () => - chunked.map((chunk, index) => ({ - documentId: document[0].id, - textContent: chunk, - orderInDocument: index, - embeddings: embeddings[index], - })) - ); - - // Step 5: Insert chunks - if (chunkInsertData.length > 0) { - await step.do("insert chunks", async () => - database(this.env.HYPERDRIVE.connectionString).transaction( - async (trx) => { - await trx.insert(chunk).values(chunkInsertData); - } - ) - ); - } - - // step 6: add content to spaces - if (event.payload.spaces) { - await step.do("add content to spaces", async () => { - await database(this.env.HYPERDRIVE.connectionString).transaction( - async (trx) => { - // First get the space IDs from the UUIDs - const spaceIds = await trx - .select({ id: spaces.id }) - .from(spaces) - .where(inArray(spaces.uuid, event.payload.spaces ?? [])); - - if (spaceIds.length === 0) { - return; - } - - // Then insert the content-space mappings using the actual space IDs - await trx.insert(contentToSpace).values( - spaceIds.map((space) => ({ - contentId: document[0].id, - spaceId: space.id, - })) - ); - } - ); - }); - } - - // Step 7: Mark the document as successfully processed - await step.do("mark document as successfully processed", async () => { - await database(this.env.HYPERDRIVE.connectionString) - .update(documents) - .set({ - isSuccessfullyProcessed: true, - }) - .where(eq(documents.id, document[0].id)); - }); - } -} |