diff options
| author | Dhravya Shah <[email protected]> | 2024-08-06 11:20:29 -0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-08-06 11:20:29 -0700 |
| commit | 7fc39cd770e4b2f55c6fdae1fa02fe0a66a93f6d (patch) | |
| tree | 82e6a03099b50441c2fe9a9bf8e8ddf7afa293e5 /apps | |
| parent | Merge pull request #219 from Deepakchowdavarapu/readme-issue (diff) | |
| parent | updated kv and queues (diff) | |
| download | supermemory-7fc39cd770e4b2f55c6fdae1fa02fe0a66a93f6d.tar.xz supermemory-7fc39cd770e4b2f55c6fdae1fa02fe0a66a93f6d.zip | |
Merge pull request #193 from supermemoryai/kush/be-queue
Kush/be queue
Diffstat (limited to 'apps')
50 files changed, 1373 insertions, 844 deletions
diff --git a/apps/cf-ai-backend/bun.lockb b/apps/cf-ai-backend/bun.lockb Binary files differnew file mode 100755 index 00000000..601774bc --- /dev/null +++ b/apps/cf-ai-backend/bun.lockb diff --git a/apps/cf-ai-backend/package.json b/apps/cf-ai-backend/package.json index 2b83cc93..3fcf71e0 100644 --- a/apps/cf-ai-backend/package.json +++ b/apps/cf-ai-backend/package.json @@ -13,9 +13,11 @@ "license": "MIT", "dependencies": { "@hono/zod-validator": "^0.2.1", - "hono": "^4.5.1" + "hono": "^4.5.1", + "honox": "^0.1.23", + "vite": "^5.3.5" }, - "devDependencies": { - "@cloudflare/workers-types": "^4.20240614.0" - } + "devDependencies": { + "@cloudflare/workers-types": "^4.20240614.0" + } } diff --git a/apps/cf-ai-backend/src/db/index.ts b/apps/cf-ai-backend/src/db/index.ts new file mode 100644 index 00000000..b7a05fa5 --- /dev/null +++ b/apps/cf-ai-backend/src/db/index.ts @@ -0,0 +1,7 @@ +import { drizzle } from "drizzle-orm/d1"; +import { Env } from "../types"; + +import * as schema from "@repo/db/schema"; + +export const database = (env: Env) => + drizzle(env.DATABASE, { schema, logger: true }); diff --git a/apps/cf-ai-backend/src/errors/baseError.ts b/apps/cf-ai-backend/src/errors/baseError.ts new file mode 100644 index 00000000..0dcc2203 --- /dev/null +++ b/apps/cf-ai-backend/src/errors/baseError.ts @@ -0,0 +1,46 @@ +export class BaseHttpError extends Error { + public status: number; + public message: string; + + constructor(status: number, message: string) { + super(message); + this.status = status; + this.message = message; + Object.setPrototypeOf(this, new.target.prototype); // Restore prototype chain + } + } + + + export class BaseError extends Error { + type: string; + message: string; + source: string; + ignoreLog: boolean; + + constructor( + type: string, + message?: string, + source?: string, + ignoreLog = false + ) { + super(); + + Object.setPrototypeOf(this, new.target.prototype); + + this.type = type; + this.message = + message ?? + "An unknown error occurred. If this persists, please contact us."; + this.source = source ?? "unspecified"; + this.ignoreLog = ignoreLog; + } + + toJSON(): Record<PropertyKey, string> { + return { + type: this.type, + message: this.message, + source: this.source, + }; + } + } +
\ No newline at end of file diff --git a/apps/cf-ai-backend/src/errors/results.ts b/apps/cf-ai-backend/src/errors/results.ts new file mode 100644 index 00000000..ccce1396 --- /dev/null +++ b/apps/cf-ai-backend/src/errors/results.ts @@ -0,0 +1,31 @@ +import { BaseError } from "./baseError"; + +export type Result<T, E extends Error> = + | { ok: true; value: T } + | { ok: false; error: E }; + +export const Ok = <T>(data: T): Result<T, never> => { + return { ok: true, value: data }; +}; + +export const Err = <E extends BaseError>(error: E): Result<never, E> => { + return { ok: false, error }; +}; + +export async function wrap<T, E extends BaseError>( + p: Promise<T>, + errorFactory: (err: Error, source: string) => E, + source: string = "unspecified" + ): Promise<Result<T, E>> { + try { + return Ok(await p); + } catch (e) { + return Err(errorFactory(e as Error, source)); + } + } + +export function isErr<T, E extends Error>( + result: Result<T, E>, +): result is { ok: false; error: E } { + return !result.ok; +} diff --git a/apps/cf-ai-backend/src/helper.ts b/apps/cf-ai-backend/src/helper.ts index 54848442..70efaecd 100644 --- a/apps/cf-ai-backend/src/helper.ts +++ b/apps/cf-ai-backend/src/helper.ts @@ -9,17 +9,14 @@ import { z } from "zod"; import { seededRandom } from "./utils/seededRandom"; import { bulkInsertKv } from "./utils/kvBulkInsert"; -export async function initQuery( - c: Context<{ Bindings: Env }>, - model: string = "gpt-4o", -) { +export async function initQuery(env: Env, model: string = "gpt-4o") { const embeddings = new OpenAIEmbeddings({ - apiKey: c.env.OPENAI_API_KEY, + apiKey: env.OPENAI_API_KEY, modelName: "text-embedding-3-small", }); const store = new CloudflareVectorizeStore(embeddings, { - index: c.env.VECTORIZE_INDEX, + index: env.VECTORIZE_INDEX, }); let selectedModel: @@ -30,7 +27,7 @@ export async function initQuery( switch (model) { case "claude-3-opus": const anthropic = createAnthropic({ - apiKey: c.env.ANTHROPIC_API_KEY, + apiKey: env.ANTHROPIC_API_KEY, baseURL: "https://gateway.ai.cloudflare.com/v1/47c2b4d598af9d423c06fc9f936226d5/supermemory/anthropic", }); @@ -39,7 +36,7 @@ export async function initQuery( break; case "gemini-1.5-pro": const googleai = createGoogleGenerativeAI({ - apiKey: c.env.GOOGLE_AI_API_KEY, + apiKey: env.GOOGLE_AI_API_KEY, baseURL: "https://gateway.ai.cloudflare.com/v1/47c2b4d598af9d423c06fc9f936226d5/supermemory/google-vertex-ai", }); @@ -49,7 +46,7 @@ export async function initQuery( case "gpt-4o": default: const openai = createOpenAI({ - apiKey: c.env.OPENAI_API_KEY, + apiKey: env.OPENAI_API_KEY, baseURL: "https://gateway.ai.cloudflare.com/v1/47c2b4d598af9d423c06fc9f936226d5/supermemory/openai", compatibility: "strict", @@ -132,24 +129,25 @@ export async function batchCreateChunksAndEmbeddings({ store, body, chunks, - context, + env: env, }: { store: CloudflareVectorizeStore; body: z.infer<typeof vectorObj>; chunks: Chunks; - context: Context<{ Bindings: Env }>; + env: Env; }) { //! NOTE that we use #supermemory-web to ensure that //! If a user saves it through the extension, we don't want other users to be able to see it. // Requests from the extension should ALWAYS have a unique ID with the USERiD in it. // I cannot stress this enough, important for security. + const ourID = `${body.url}#supermemory-web`; const random = seededRandom(ourID); const uuid = random().toString(36).substring(2, 15) + random().toString(36).substring(2, 15); - const allIds = await context.env.KV.list({ prefix: uuid }); + const allIds = await env.KV.list({ prefix: uuid }); // If some chunks for that content already exist, we'll just update the metadata to include // the user. @@ -159,7 +157,7 @@ export async function batchCreateChunksAndEmbeddings({ //Search in a batch of 20 for (let i = 0; i < savedVectorIds.length; i += 20) { const batch = savedVectorIds.slice(i, i + 20); - const batchVectors = await context.env.VECTORIZE_INDEX.getByIds(batch); + const batchVectors = await env.VECTORIZE_INDEX.getByIds(batch); vectors.push(...batchVectors); } console.log( @@ -193,7 +191,7 @@ export async function batchCreateChunksAndEmbeddings({ await Promise.all( results.map((result) => { - return context.env.VECTORIZE_INDEX.upsert(result); + return env.VECTORIZE_INDEX.upsert(result); }), ); return; @@ -205,7 +203,7 @@ export async function batchCreateChunksAndEmbeddings({ const commonMetaData = { type: body.type ?? "tweet", title: body.title?.slice(0, 50) ?? "", - description: body.description ?? "", + description: body.description?.slice(0, 50) ?? "", url: body.url, [sanitizeKey(`user-${body.user}`)]: 1, }; @@ -244,8 +242,7 @@ export async function batchCreateChunksAndEmbeddings({ }); console.log("these are the doucment ids", ids); console.log("Docs added:", docs); - const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = - context.env; + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = env; await bulkInsertKv( { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, { chunkIds: ids, urlid: ourID }, @@ -257,7 +254,7 @@ export async function batchCreateChunksAndEmbeddings({ const commonMetaData = { type: body.type ?? "page", title: body.title?.slice(0, 50) ?? "", - description: body.description ?? "", + description: body.description?.slice(0, 50) ?? "", url: body.url, [sanitizeKey(`user-${body.user}`)]: 1, }; @@ -267,23 +264,31 @@ export async function batchCreateChunksAndEmbeddings({ }, {}); const ids = []; - const preparedDocuments = chunks.chunks.map((chunk, i) => { + console.log("Page hit moving on to the for loop"); + for (let i = 0; i < chunks.chunks.length; i++) { + const chunk = chunks.chunks[i]; const id = `${uuid}-${i}`; ids.push(id); - return { + const document = { pageContent: chunk, metadata: { - content: chunk, ...commonMetaData, ...spaceMetadata, }, }; - }); - - const docs = await store.addDocuments(preparedDocuments, { ids: ids }); - console.log("Docs added:", docs); - const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = - context.env; + const docs = await store.addDocuments([document], { ids: [id] }); + console.log("Docs added:", docs); + // Wait for a second after every 20 documents for open ai rate limit + console.log( + "This is the 20th thing in the list?", + (i + 1) % 20 === 0, + ); + if ((i + 1) % 20 === 0) { + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + } + + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = env; await bulkInsertKv( { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, { chunkIds: ids, urlid: ourID }, @@ -295,7 +300,7 @@ export async function batchCreateChunksAndEmbeddings({ const commonMetaData = { title: body.title?.slice(0, 50) ?? "", type: body.type ?? "page", - description: body.description ?? "", + description: body.description?.slice(0, 50) ?? "", url: body.url, [sanitizeKey(`user-${body.user}`)]: 1, }; @@ -305,23 +310,30 @@ export async function batchCreateChunksAndEmbeddings({ }, {}); const ids = []; - const preparedDocuments = chunks.chunks.map((chunk, i) => { + for (let i = 0; i < chunks.chunks.length; i++) { + const chunk = chunks.chunks[i]; const id = `${uuid}-${i}`; ids.push(id); - return { + const document = { pageContent: chunk, metadata: { - content: chunk, ...commonMetaData, ...spaceMetadata, }, }; - }); - - const docs = await store.addDocuments(preparedDocuments, { ids: ids }); - console.log("Docs added:", docs); - const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = - context.env; + const docs = await store.addDocuments([document], { ids: [id] }); + console.log("Docs added:", docs); + // Wait for a second after every 20 documents for open ai rate limit + console.log( + "This is the 20th thing in the list?", + (i + 1) % 20 === 0, + ); + if ((i + 1) % 20 === 0) { + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + } + + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = env; await bulkInsertKv( { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, { chunkIds: ids, urlid: ourID }, @@ -332,7 +344,7 @@ export async function batchCreateChunksAndEmbeddings({ const commonMetaData = { type: body.type ?? "image", title: body.title, - description: body.description ?? "", + description: body.description?.slice(0, 50) ?? "", url: body.url, [sanitizeKey(`user-${body.user}`)]: 1, }; @@ -342,26 +354,34 @@ export async function batchCreateChunksAndEmbeddings({ }, {}); const ids = []; - const preparedDocuments = chunks.chunks.map((chunk, i) => { + for (let i = 0; i < chunks.chunks.length; i++) { + const chunk = chunks.chunks[i]; const id = `${uuid}-${i}`; ids.push(id); - return { + const document = { pageContent: chunk, metadata: { ...commonMetaData, ...spaceMetadata, }, }; - }); + const docs = await store.addDocuments([document], { ids: [id] }); + console.log("Docs added:", docs); + // Wait for a second after every 20 documents for open ai rate limit + console.log("This is the 20th thing in the list?", (i + 1) % 20 === 0); + if ((i + 1) % 20 === 0) { + console.log("-----------waiting atm"); + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + } - const docs = await store.addDocuments(preparedDocuments, { ids: ids }); - console.log("Docs added:", docs); - const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = context.env; + const { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID } = env; await bulkInsertKv( { CF_KV_AUTH_TOKEN, CF_ACCOUNT_ID, KV_NAMESPACE_ID }, { chunkIds: ids, urlid: ourID }, ); } } + return; } diff --git a/apps/cf-ai-backend/src/index.ts b/apps/cf-ai-backend/src/index.ts index 70d282d9..1f391359 100644 --- a/apps/cf-ai-backend/src/index.ts +++ b/apps/cf-ai-backend/src/index.ts @@ -1,4 +1,4 @@ -import { z } from "zod"; +import { boolean, z } from "zod"; import { Hono } from "hono"; import { CoreMessage, generateText, streamText, tool } from "ai"; import { @@ -9,6 +9,7 @@ import { PageOrNoteChunks, TweetChunks, vectorObj, + vectorBody, } from "./types"; import { batchCreateChunksAndEmbeddings, @@ -20,21 +21,24 @@ import { logger } from "hono/logger"; import { poweredBy } from "hono/powered-by"; import { bearerAuth } from "hono/bearer-auth"; import { zValidator } from "@hono/zod-validator"; -import chunkText from "./utils/chonker"; +import chunkText from "./queueConsumer/chunkers/chonker"; import { systemPrompt, template } from "./prompts/prompt1"; import { swaggerUI } from "@hono/swagger-ui"; -import { chunkThread } from "./utils/chunkTweet"; -import { chunkNote, chunkPage } from "./utils/chunkPageOrNotes"; +import { database } from "./db"; +import { storedContent } from "@repo/db/schema"; +import { sql, and, eq } from "drizzle-orm"; +import { LIMITS } from "@repo/shared-types"; +import { typeDecider } from "./queueConsumer/utils/typeDecider"; +// import { chunkThread } from "./utils/chunkTweet"; +import { + chunkNote, + chunkPage, +} from "./queueConsumer/chunkers/chunkPageOrNotes"; +import { queue } from "./queueConsumer"; +import { isErr } from "./errors/results"; const app = new Hono<{ Bindings: Env }>(); -app.get( - "/ui", - swaggerUI({ - url: "/doc", - }), -); - // ------- MIDDLEWARES ------- app.use("*", poweredBy()); app.use("*", timing()); @@ -68,40 +72,78 @@ app.get("/api/health", (c) => { return c.json({ status: "ok" }); }); -app.post("/api/add", zValidator("json", vectorObj), async (c) => { +app.post("/api/add", zValidator("json", vectorBody), async (c) => { try { const body = c.req.valid("json"); + //This is something I don't like + // console.log("api/add hit!!!!"); + //Have to do limit on this also duplicate check here + const db = database(c.env); + const typeResult = typeDecider(body.url); + + const saveToDbUrl = + (body.url.split("#supermemory-user-")[0] ?? body.url) + // Why does this have to be a split from #supermemory-user? + "#supermemory-user-" + + body.user; + + console.log( + "---------------------------------------------------------------------------------------------------------------------------------------------", + saveToDbUrl, + ); + const alreadyExist = await db + .select() + .from(storedContent) + .where(eq(storedContent.baseUrl, saveToDbUrl)); + console.log( + "------------------------------------------------", + JSON.stringify(alreadyExist), + ); - const { store } = await initQuery(c); + if (alreadyExist.length > 0) { + console.log( + "------------------------------------------------------------------------------------------------I exist------------------------", + ); + return c.json({ status: "error", message: "the content already exists" }); + } - console.log(body.spaces); - let chunks: TweetChunks | PageOrNoteChunks; - // remove everything in <raw> tags - // const newPageContent = body.pageContent?.replace(/<raw>.*?<\/raw>/g, ""); + if (isErr(typeResult)) { + throw typeResult.error; + } + // limiting in the backend + const type = typeResult.value; + const countResult = await db + .select({ + count: sql<number>`count(*)`.mapWith(Number), + }) + .from(storedContent) + .where( + and(eq(storedContent.userId, body.user), eq(storedContent.type, type)), + ); - switch (body.type) { - case "tweet": - chunks = chunkThread(body.pageContent); - break; + const currentCount = countResult[0]?.count || 0; + const totalLimit = LIMITS[type as keyof typeof LIMITS]; + const remainingLimit = totalLimit - currentCount; + const items = 1; + const isWithinLimit = items <= remainingLimit; - case "page": - chunks = chunkPage(body.pageContent); - break; + // unique contraint check - case "note": - chunks = chunkNote(body.pageContent); - break; + if (isWithinLimit) { + const spaceNumbers = body.spaces.map((s: string) => Number(s)); + await c.env.EMBEDCHUNKS_QUEUE.send({ + content: body.url, + user: body.user, + space: spaceNumbers, + type: type, + }); + } else { + return c.json({ + status: "error", + message: + "You have exceed the current limit for this type of document, please try removing something form memories ", + }); } - console.log("Chunks are here:", chunks); - - await batchCreateChunksAndEmbeddings({ - store, - body, - chunks: chunks, - context: c, - }); - return c.json({ status: "ok" }); } catch (error) { console.error("Error processing request:", error); @@ -134,7 +176,7 @@ app.post( async (c) => { const body = c.req.valid("form"); - const { store } = await initQuery(c); + const { store } = await initQuery(c.env); if (!(body.images || body["images[]"])) { return c.json({ status: "error", message: "No images found" }, 400); @@ -182,7 +224,7 @@ app.post( title: "Image content from the web", }, chunks: chunks, - context: c, + env: c.env, }); return c.json({ status: "ok" }); @@ -200,7 +242,7 @@ app.get( async (c) => { const query = c.req.valid("query"); - const { model } = await initQuery(c); + const { model } = await initQuery(c.env); const response = await streamText({ model, prompt: query.query }); const r = response.toTextStreamResponse(); @@ -218,7 +260,7 @@ app.get( [`user-${user}`]: 1, }; - const { store } = await initQuery(c); + const { store } = await initQuery(c.env); const queryAsVector = await store.embeddings.embedQuery(query); const resp = await c.env.VECTORIZE_INDEX.query(queryAsVector, { @@ -270,7 +312,7 @@ app.post( const { query, user } = c.req.valid("query"); const { chatHistory } = c.req.valid("json"); - const { store, model } = await initQuery(c); + const { store, model } = await initQuery(c.env); let task: "add" | "chat" = "chat"; let thingToAdd: "page" | "image" | "text" | undefined = undefined; @@ -332,7 +374,7 @@ app.post( title: `${addString.slice(0, 30)}... (Added from chatbot)`, }, chunks: vectorContent, - context: c, + env: c.env, }); return c.json({ @@ -445,7 +487,7 @@ app.post( const spaces = query.spaces?.split(",") ?? [undefined]; // Get the AI model maker and vector store - const { model, store } = await initQuery(c, query.model); + const { model, store } = await initQuery(c.env, query.model); if (!body.sources) { const filter: VectorizeVectorMetadataFilter = { @@ -588,6 +630,8 @@ app.post( } } + //Serach mem0 + const preparedContext = body.sources.normalizedData.map( ({ metadata, score, normalizedScore }) => ({ context: `Website title: ${metadata!.title}\nDescription: ${metadata!.description}\nURL: ${metadata!.url}\nContent: ${metadata!.text}`, @@ -598,7 +642,7 @@ app.post( const initialMessages: CoreMessage[] = [ { role: "user", content: systemPrompt }, - { role: "assistant", content: "Hello, how can I help?" }, + { role: "assistant", content: "Hello, how can I help?" }, // prase and add memory json here ]; const prompt = template({ @@ -634,7 +678,7 @@ app.delete( async (c) => { const { websiteUrl, user } = c.req.valid("query"); - const { store } = await initQuery(c); + const { store } = await initQuery(c.env); await deleteDocument({ url: websiteUrl, user, c, store }); @@ -654,7 +698,7 @@ app.get( ), async (c) => { const { context, request } = c.req.valid("query"); - const { model } = await initQuery(c); + const { model } = await initQuery(c.env); const response = await streamText({ model, @@ -707,4 +751,7 @@ app.get("/howFuckedAreWe", async (c) => { return c.json({ fuckedPercent }); }); -export default app; +export default { + fetch: app.fetch, + queue, +}; diff --git a/apps/cf-ai-backend/src/utils/chonker.ts b/apps/cf-ai-backend/src/queueConsumer/chunkers/chonker.ts index 18788dab..18788dab 100644 --- a/apps/cf-ai-backend/src/utils/chonker.ts +++ b/apps/cf-ai-backend/src/queueConsumer/chunkers/chonker.ts diff --git a/apps/cf-ai-backend/src/utils/chunkPageOrNotes.ts b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkPageOrNotes.ts index f04ed0c5..0da01c3f 100644 --- a/apps/cf-ai-backend/src/utils/chunkPageOrNotes.ts +++ b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkPageOrNotes.ts @@ -1,5 +1,5 @@ import chunkText from "./chonker"; -import { PageOrNoteChunks } from "../types"; +import { PageOrNoteChunks } from "../../types"; export function chunkPage(pageContent: string): PageOrNoteChunks { const chunks = chunkText(pageContent, 1536); diff --git a/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts index ae1b18c6..46a56410 100644 --- a/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts +++ b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts @@ -22,10 +22,18 @@ export interface ThreadTweetData { } export function chunkThread(threadText: string): TweetChunks { - const thread = JSON.parse(threadText); - if (typeof thread == "string") { - console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER"); + let thread = threadText; + + try { + thread = JSON.parse(threadText); + } catch (e) { + console.log("error: thread is not json.", e); + } + + if (typeof threadText == "string") { + console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER", thread); const rawTweet = getRawTweet(thread); + console.log(rawTweet); const parsedTweet: any = JSON.parse(rawTweet); const chunkedTweet = chunkText(parsedTweet.text, 1536); @@ -48,8 +56,8 @@ export function chunkThread(threadText: string): TweetChunks { return { type: "tweet", chunks }; } else { - console.log(JSON.stringify(thread)); - const chunkedTweets = thread.map((tweet: Tweet) => { + console.log("thread in else statement", JSON.stringify(thread)); + const chunkedTweets = (thread as any).map((tweet: Tweet) => { const chunkedTweet = chunkText(tweet.text, 1536); const metadata = { diff --git a/apps/cf-ai-backend/src/queueConsumer/helpers/processNotes.ts b/apps/cf-ai-backend/src/queueConsumer/helpers/processNotes.ts new file mode 100644 index 00000000..466690cc --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/helpers/processNotes.ts @@ -0,0 +1,36 @@ +import { Result, Ok, Err } from "../../errors/results"; +import { BaseError } from "../../errors/baseError"; +import { Metadata } from "../utils/get-metadata"; + +class ProcessNotesError extends BaseError { + constructor(message?: string, source?: string) { + super("[Note Processing Error]", message, source); + } +} + +type ProcessNoteResult = { + noteContent: { noteId: number; noteContent: string }; + metadata: Metadata; +}; + +export function processNote( + content: string, +): Result<ProcessNoteResult, ProcessNotesError> { + try { + const pageContent = content; + const noteId = new Date().getTime(); + + const metadata = { + baseUrl: `https://supermemory.ai/note/${noteId}`, + description: `Note created at ${new Date().toLocaleString()}`, + image: "https://supermemory.ai/logo.png", + title: `${pageContent.slice(0, 20)} ${pageContent.length > 20 ? "..." : ""}`, + }; + + const noteContent = { noteId: noteId, noteContent: pageContent }; + return Ok({ noteContent, metadata }); + } catch (e) { + console.error("[Note Processing Error]", e); + return Err(new ProcessNotesError((e as Error).message, "processNote")); + } +} diff --git a/apps/cf-ai-backend/src/queueConsumer/helpers/processPage.ts b/apps/cf-ai-backend/src/queueConsumer/helpers/processPage.ts new file mode 100644 index 00000000..9a50d701 --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/helpers/processPage.ts @@ -0,0 +1,43 @@ +import { Result, Ok, Err, isErr } from "../../errors/results"; +import { BaseError } from "../../errors/baseError"; +import { getMetaData, Metadata } from "../utils/get-metadata"; + +class ProcessPageError extends BaseError { + constructor(message?: string, source?: string) { + super("[Page Proceessing Error]", message, source); + } +} + +type PageProcessResult = { pageContent: string; metadata: Metadata }; + +export async function processPage(input: { + url: string; + securityKey: string; +}): Promise<Result<PageProcessResult, ProcessPageError>> { + try { + const response = await fetch("https://md.dhr.wtf/?url=" + input.url, { + headers: { + Authorization: "Bearer " + input.securityKey, + }, + }); + const pageContent = await response.text(); + if (!pageContent) { + return Err( + new ProcessPageError( + "Failed to get response form markdowner", + "processPage", + ), + ); + } + const metadataResult = await getMetaData(input.url); + if (isErr(metadataResult)) { + throw metadataResult.error; + } + const metadata = metadataResult.value; + console.log("[this is the metadata]", metadata); + return Ok({ pageContent, metadata }); + } catch (e) { + console.error("[Page Processing Error]", e); + return Err(new ProcessPageError((e as Error).message, "processPage")); + } +} diff --git a/apps/cf-ai-backend/src/queueConsumer/helpers/processTweet.ts b/apps/cf-ai-backend/src/queueConsumer/helpers/processTweet.ts new file mode 100644 index 00000000..8d83f2dc --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/helpers/processTweet.ts @@ -0,0 +1,88 @@ +import { Tweet } from "react-tweet/api"; +import { Result, Ok, Err, isErr } from "../../errors/results"; +import { BaseError } from "../../errors/baseError"; +import { getMetaData, Metadata } from "../utils/get-metadata"; +import { tweetToMd } from "@repo/shared-types/utils"; // can I do this? +import { Env } from "../../types"; + +class ProcessTweetError extends BaseError { + constructor(message?: string, source?: string) { + super("[Tweet Proceessing Error]", message, source); + } +} + +type GetTweetResult = Tweet; + +export const getTweetData = async ( + tweetID: string, +): Promise<Result<GetTweetResult, ProcessTweetError>> => { + try { + console.log("is fetch defined here?"); + const url = `https://cdn.syndication.twimg.com/tweet-result?id=${tweetID}&lang=en&features=tfw_timeline_list%3A%3Btfw_follower_count_sunset%3Atrue%3Btfw_tweet_edit_backend%3Aon%3Btfw_refsrc_session%3Aon%3Btfw_fosnr_soft_interventions_enabled%3Aon%3Btfw_show_birdwatch_pivots_enabled%3Aon%3Btfw_show_business_verified_badge%3Aon%3Btfw_duplicate_scribes_to_settings%3Aon%3Btfw_use_profile_image_shape_enabled%3Aon%3Btfw_show_blue_verified_badge%3Aon%3Btfw_legacy_timeline_sunset%3Atrue%3Btfw_show_gov_verified_badge%3Aon%3Btfw_show_business_affiliate_badge%3Aon%3Btfw_tweet_edit_frontend%3Aon&token=4c2mmul6mnh`; + + const resp = await fetch(url, { + headers: { + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", + Accept: "application/json", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + Connection: "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Cache-Control": "max-age=0", + TE: "Trailers", + }, + }); + console.log(resp.status); + + const data = (await resp.json()) as Tweet; + + return Ok(data); + } catch (e) { + console.error("[Tweet Proceessing Error]", e); + return Err(new ProcessTweetError(e, "getTweetData")); + } +}; + +export const getThreadData = async (input: { + tweetUrl: string; + env: Env; +}): Promise<Result<any, ProcessTweetError>> => { + try { + // const threadRequest = await fetch(input.cf_thread_endpoint, { + // method: "POST", + // headers: { + // "Content-Type": "application/json", + // Authorization: input.authKey, + // }, + // body: JSON.stringify({ url: input.tweetUrl }), + // }); + // if (threadRequest.status !== 200) { + // console.log(await threadRequest.text()); + // console.log(input.tweetUrl); + // return Err( + // new ProcessTweetError( + // `Failed to fetch the thread: ${input.tweetUrl}, Reason: ${threadRequest.statusText}`, + // "getThreadData", + // ), + // ); + // } + //@ts-ignore + const thread = await input.env.THREAD.processTweets(input.tweetUrl); + console.log("[thread response]", thread); + + if (!thread.length) { + console.log("Thread is an empty array"); + return Err( + new ProcessTweetError( + "[THREAD FETCHING SERVICE] Got no content form thread worker", + "getThreadData", + ), + ); + } + return Ok(thread); + } catch (e) { + console.error("[Thread Processing Error]", e); + return Err(new ProcessTweetError((e as Error).message, "getThreadData")); + } +}; diff --git a/apps/cf-ai-backend/src/queueConsumer/index.ts b/apps/cf-ai-backend/src/queueConsumer/index.ts new file mode 100644 index 00000000..393f1fbf --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/index.ts @@ -0,0 +1,372 @@ +import { Env, PageOrNoteChunks, TweetChunks, vectorObj } from "../types"; +import { typeDecider } from "./utils/typeDecider"; +import { isErr, wrap } from "../errors/results"; +import { processNote } from "./helpers/processNotes"; +import { processPage } from "./helpers/processPage"; +import { getThreadData, getTweetData } from "./helpers/processTweet"; +import { tweetToMd } from "@repo/shared-types/utils"; +import { chunkNote, chunkPage } from "./chunkers/chunkPageOrNotes"; +import { chunkThread } from "./chunkers/chunkTweet"; +import { batchCreateChunksAndEmbeddings, initQuery } from "../helper"; +import { z } from "zod"; +import { Metadata } from "./utils/get-metadata"; +import { BaseError } from "../errors/baseError"; +import { database } from "../db"; +import { + storedContent, + space, + contentToSpace, + users, + jobs, + Job, +} from "@repo/db/schema"; +import { and, eq, inArray, sql } from "drizzle-orm"; + +class VectorInsertError extends BaseError { + constructor(message?: string, source?: string) { + super("[Vector Insert Error]", message, source); + } +} +const vectorErrorFactory = (err: Error) => new VectorInsertError(err.message); + +class D1InsertError extends BaseError { + constructor(message?: string, source?: string) { + super("[D1 Insert Error]", message, source); + } +} + +const d1ErrorFactory = (err: Error, source: string) => + new D1InsertError(err.message, source); + +const calculateExponentialBackoff = ( + attempts: number, + baseDelaySeconds: number, +) => { + return baseDelaySeconds ** attempts; +}; + +const BASE_DELAY_SECONDS = 5; +export async function queue( + batch: MessageBatch<{ + content: string; + space: Array<number>; + user: string; + type: string; + }>, + env: Env, +): Promise<void> { + const db = database(env); + console.log(env.CF_ACCOUNT_ID, env.CF_KV_AUTH_TOKEN); + for (let message of batch.messages) { + console.log(env.CF_ACCOUNT_ID, env.CF_KV_AUTH_TOKEN); + console.log("is thie even running?", message.body); + const body = message.body; + + const type = body.type; + const userExists = await wrap( + db.select().from(users).where(eq(users.id, body.user)).limit(1), + d1ErrorFactory, + "Error when trying to verify user", + ); + + if (isErr(userExists)) { + throw userExists.error; + } + + //check if this is a retry job.. by checking if the combination of the userId and the url already exists on the queue + let jobId; + const existingJob = await wrap( + db + .select() + .from(jobs) + .where( + and( + eq(jobs.userId, userExists.value[0].id), + eq(jobs.url, body.content), + ), + ) + .limit(1), + d1ErrorFactory, + "Error when checking for existing job", + ); + + if (isErr(existingJob)) { + throw existingJob.error; + } + + if (existingJob.value.length > 0) { + jobId = existingJob.value[0].id; + await wrap( + db + .update(jobs) + .set({ + attempts: existingJob.value[0].attempts + 1, + updatedAt: new Date(), + status: "Processing", + }) + .where(eq(jobs.id, jobId)), + d1ErrorFactory, + "Error when updating job attempts", + ); + } else { + const job = await wrap( + db + .insert(jobs) + .values({ + userId: userExists.value[0].id as string, + url: body.content, + status: "Processing", + attempts: 1, + createdAt: new Date(), + updatedAt: new Date(), + }) + .returning({ jobId: jobs.id }), + d1ErrorFactory, + "Error When inserting into jobs table", + ); + if (isErr(job)) { + throw job.error; + } + jobId = job.value[0].jobId; + } + + let pageContent: string; + let vectorData: string; + let metadata: Metadata; + let storeToSpaces = body.space; + let chunks: TweetChunks | PageOrNoteChunks; + let noteId = 0; + switch (type) { + case "note": { + console.log("note hit"); + const note = processNote(body.content); + if (isErr(note)) { + throw note.error; + } + pageContent = note.value.noteContent.noteContent; + noteId = note.value.noteContent.noteId; + metadata = note.value.metadata; + vectorData = pageContent; + chunks = chunkNote(pageContent); + break; + } + case "page": { + console.log("page hit"); + const page = await processPage({ + url: body.content, + securityKey: env.MD_SEC_KEY, + }); + if (isErr(page)) { + console.log("there is a page error here"); + throw page.error; + } + pageContent = page.value.pageContent; + metadata = page.value.metadata; + vectorData = pageContent; + chunks = chunkPage(pageContent); + break; + } + + case "tweet": { + console.log("tweet hit"); + const tweet = await getTweetData(body.content.split("/").pop()); + console.log(env.THREAD_CF_WORKER, env.THREAD_CF_AUTH); + const thread = await getThreadData({ + tweetUrl: body.content, + env: env, + }); + console.log("[This is the thread]", thread); + if (isErr(tweet)) { + throw tweet.error; + } + pageContent = tweetToMd(tweet.value); + console.log(pageContent); + metadata = { + baseUrl: body.content, + description: tweet.value.text.slice(0, 200), + image: tweet.value.user.profile_image_url_https, + title: `Tweet by ${tweet.value.user.name}`, + }; + if (isErr(thread)) { + console.log("Thread worker is down!"); + vectorData = JSON.stringify(pageContent); + console.error(thread.error); + } else { + console.log("thread worker is fine"); + vectorData = thread.value; + } + chunks = chunkThread(vectorData); + break; + } + } + + //add to mem0, abstract + + // const mem0Response = fetch('https://api.mem0.ai/v1/memories/', { + // method: 'POST', + // headers: { + // 'Content-Type': 'application/json', + // Authorization: `Token ${process.env.MEM0_API_KEY}`, + // }, + // body: JSON.stringify({ + // messages: [ + // { + // role: 'user', + // content: query, + // }, + // ], + // user_id: user?.user?.email, + // }), + // }); + + // see what's up with the storedToSpaces in this block + const { store } = await initQuery(env); + + type body = z.infer<typeof vectorObj>; + + const Chunkbody: body = { + pageContent: pageContent, + spaces: storeToSpaces.map((spaceId) => spaceId.toString()), + user: body.user, + type: type, + url: metadata.baseUrl, + description: metadata.description, + title: metadata.description, + }; + + try { + const vectorResult = await wrap( + batchCreateChunksAndEmbeddings({ + store: store, + body: Chunkbody, + chunks: chunks, + env: env, + }), + vectorErrorFactory, + "Error when Inserting into vector database", + ); + + if (isErr(vectorResult)) { + await db + .update(jobs) + .set({ error: vectorResult.error.message, status: "error" }) + .where(eq(jobs.id, jobId)); + message.retry({ + delaySeconds: calculateExponentialBackoff( + message.attempts, + BASE_DELAY_SECONDS, + ), + }); + throw vectorResult.error; + } + + const saveToDbUrl = + (metadata.baseUrl.split("#supermemory-user-")[0] ?? metadata.baseUrl) + + "#supermemory-user-" + + body.user; + let contentId: number; + + const insertResponse = await wrap( + db + .insert(storedContent) + .values({ + content: pageContent as string, + title: metadata.title, + description: metadata.description, + url: saveToDbUrl, + baseUrl: saveToDbUrl, + image: metadata.image, + savedAt: new Date(), + userId: body.user, + type: type, + noteId: noteId, + }) + .returning({ id: storedContent.id }), + d1ErrorFactory, + "Error when inserting into storedContent", + ); + + if (isErr(insertResponse)) { + await db + .update(jobs) + .set({ error: insertResponse.error.message, status: "error" }) + .where(eq(jobs.id, jobId)); + message.retry({ + delaySeconds: calculateExponentialBackoff( + message.attempts, + BASE_DELAY_SECONDS, + ), + }); + throw insertResponse.error; + } + console.log(JSON.stringify(insertResponse)); + contentId = insertResponse[0]?.id; + console.log("this is the content Id", contentId); + if (storeToSpaces.length > 0) { + // Adding the many-to-many relationship between content and spaces + const spaceData = await wrap( + db + .select() + .from(space) + .where( + and(inArray(space.id, storeToSpaces), eq(space.user, body.user)), + ) + .all(), + d1ErrorFactory, + "Error when getting data from spaces", + ); + + if (isErr(spaceData)) { + throw spaceData.error; + } + try { + await Promise.all( + spaceData.value.map(async (s) => { + try { + await db + .insert(contentToSpace) + .values({ contentId: contentId, spaceId: s.id }); + + await db.update(space).set({ numItems: s.numItems + 1 }); + } catch (e) { + console.error(`Error updating space ${s.id}:`, e); + throw e; + } + }), + ); + } catch (e) { + console.error("Error in updateSpacesWithContent:", e); + throw new Error(`Failed to update spaces: ${e.message}`); + } + } + } catch (e) { + console.error("Error in simulated transaction", e.message); + + message.retry({ + delaySeconds: calculateExponentialBackoff( + message.attempts, + BASE_DELAY_SECONDS, + ), + }); + throw new D1InsertError( + "Error when inserting into d1", + "D1 stuff after the vectorize", + ); + } + + // After the d1 and vectories suceeds then finally update the jobs table to indicate that the job has completed + + await db + .update(jobs) + .set({ status: "Processed" }) + .where(eq(jobs.id, jobId)); + + return; + } +} + +/* +To do: +Figure out rate limits!! + +*/ diff --git a/apps/cf-ai-backend/src/queueConsumer/utils/get-metadata.ts b/apps/cf-ai-backend/src/queueConsumer/utils/get-metadata.ts new file mode 100644 index 00000000..95916506 --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/utils/get-metadata.ts @@ -0,0 +1,57 @@ +import * as cheerio from "cheerio"; +import { Result, Ok, Err } from "../../errors/results"; +import { BaseError } from "../../errors/baseError"; + +class GetMetadataError extends BaseError { + constructor(message?: string, source?: string) { + super("[Fetch Metadata Error]", message, source); + } +} +export type Metadata = { + title: string; + description: string; + image: string; + baseUrl: string; +}; +// TODO: THIS SHOULD PROBABLY ALSO FETCH THE OG-IMAGE +export async function getMetaData( + url: string, +): Promise<Result<Metadata, GetMetadataError>> { + try { + const response = await fetch(url); + const html = await response.text(); + + const $ = cheerio.load(html); + + // Extract the base URL + const baseUrl = url; + + // Extract title + const title = $("title").text().trim(); + + const description = $("meta[name=description]").attr("content") ?? ""; + + const _favicon = + $("link[rel=icon]").attr("href") ?? "https://supermemory.dhr.wtf/web.svg"; + + let favicon = + _favicon.trim().length > 0 + ? _favicon.trim() + : "https://supermemory.dhr.wtf/web.svg"; + if (favicon.startsWith("/")) { + favicon = baseUrl + favicon; + } else if (favicon.startsWith("./")) { + favicon = baseUrl + favicon.slice(1); + } + + return Ok({ + title, + description, + image: favicon, + baseUrl, + }); + } catch (e) { + console.error("[Metadata Fetch Error]", e); + return Err(new GetMetadataError((e as Error).message, "getMetaData")); + } +} diff --git a/apps/cf-ai-backend/src/queueConsumer/utils/typeDecider.ts b/apps/cf-ai-backend/src/queueConsumer/utils/typeDecider.ts new file mode 100644 index 00000000..037ab40c --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/utils/typeDecider.ts @@ -0,0 +1,34 @@ +import { Result, Ok, Err } from "../../errors/results"; +import { BaseError } from "../../errors/baseError"; + +export type contentType = "page" | "tweet" | "note"; + +class GetTypeError extends BaseError { + constructor(message?: string, source?: string) { + super("[Decide Type Error]", message, source); + } +} +export const typeDecider = ( + content: string, +): Result<contentType, GetTypeError> => { + try { + // if the content is a URL, then it's a page. if its a URL with https://x.com/user/status/123, then it's a tweet. else, it's a note. + // do strict checking with regex + if ( + content.match(/https?:\/\/(x\.com|twitter\.com)\/[\w]+\/[\w]+\/[\d]+/) + ) { + return Ok("tweet"); + } else if ( + content.match( + /^(https?:\/\/)?(www\.)?[a-z0-9]+([-.]{1}[a-z0-9]+)*\.[a-z]{2,5}(\/.*)?$/i, + ) + ) { + return Ok("page"); + } else { + return Ok("note"); + } + } catch (e) { + console.error("[Decide Type Error]", e); + return Err(new GetTypeError((e as Error).message, "typeDecider")); + } +}; diff --git a/apps/cf-ai-backend/src/types.ts b/apps/cf-ai-backend/src/types.ts index 5ef81f20..e4f13f1b 100644 --- a/apps/cf-ai-backend/src/types.ts +++ b/apps/cf-ai-backend/src/types.ts @@ -1,6 +1,6 @@ import { sourcesZod } from "@repo/shared-types"; import { z } from "zod"; -import { ThreadTweetData } from "./utils/chunkTweet"; +import { ThreadTweetData } from "./queueConsumer/chunkers/chunkTweet"; export type Env = { VECTORIZE_INDEX: VectorizeIndex; @@ -11,13 +11,26 @@ export type Env = { CF_KV_AUTH_TOKEN: string; KV_NAMESPACE_ID: string; CF_ACCOUNT_ID: string; + DATABASE: D1Database; MY_QUEUE: Queue<TweetData[]>; KV: KVNamespace; + EMBEDCHUNKS_QUEUE: Queue<JobData>; MYBROWSER: unknown; ANTHROPIC_API_KEY: string; + THREAD_CF_AUTH: string; + THREAD: { processTweets: () => Promise<Array<any>> }; + THREAD_CF_WORKER: string; NODE_ENV: string; + MD_SEC_KEY: string; }; +export interface JobData { + content: string; + space: Array<number>; + user: string; + type: string +} + export interface TweetData { tweetText: string; postUrl: string; @@ -80,3 +93,8 @@ export const vectorObj = z.object({ user: z.string(), type: z.string().optional().default("page"), }); +export const vectorBody = z.object({ + spaces: z.array(z.string()).optional(), + url: z.string(), + user: z.string(), +}); diff --git a/apps/cf-ai-backend/src/utils/chunkTweet.ts b/apps/cf-ai-backend/src/utils/chunkTweet.ts deleted file mode 100644 index 0d0bc896..00000000 --- a/apps/cf-ai-backend/src/utils/chunkTweet.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { TweetChunks } from "../types"; -import chunkText from "./chonker"; -import { getRawTweet } from "@repo/shared-types/utils"; - -interface Tweet { - id: string; - text: string; - links: Array<string>; - images: Array<string>; - videos: Array<string>; -} -interface Metadata { - tweetId: string; - tweetLinks: any[]; - tweetVids: any[]; - tweetImages: any[]; -} - -export interface ThreadTweetData { - chunkedTweet: string[]; - metadata: Metadata; -} - -export function chunkThread(threadText: string): TweetChunks { - let thread = threadText; - - try { - thread = JSON.parse(threadText); - } catch (e) { - console.log("error: thread is not json.", e); - } - - if (typeof threadText == "string") { - console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER", thread); - const rawTweet = getRawTweet(thread); - console.log(rawTweet); - const parsedTweet: any = JSON.parse(rawTweet); - - const chunkedTweet = chunkText(parsedTweet.text, 1536); - const metadata: Metadata = { - tweetId: parsedTweet.id_str, - tweetLinks: parsedTweet.entities?.urls.map( - (url: any) => url.expanded_url, - ), - tweetVids: - parsedTweet.extended_entities?.media - .filter((media: any) => media.type === "video") - .map((media: any) => media.video_info!.variants[0].url) || [], - tweetImages: - parsedTweet.extended_entities?.media - .filter((media: any) => media.type === "photo") - .map((media: any) => media.media_url_https!) || [], - }; - - const chunks = [{ chunkedTweet: chunkedTweet, metadata }]; - - return { type: "tweet", chunks }; - } else { - console.log("thread in else statement", JSON.stringify(thread)); - const chunkedTweets = (thread as any).map((tweet: Tweet) => { - const chunkedTweet = chunkText(tweet.text, 1536); - - const metadata = { - tweetId: tweet.id, - tweetLinks: tweet.links, - tweetVids: tweet.videos, - tweetImages: tweet.images, - }; - - return { chunkedTweet, metadata }; - }); - - return { type: "tweet", chunks: chunkedTweets }; - } -} diff --git a/apps/cf-ai-backend/wrangler.toml b/apps/cf-ai-backend/wrangler.toml index ea93fd63..0d7ede62 100644 --- a/apps/cf-ai-backend/wrangler.toml +++ b/apps/cf-ai-backend/wrangler.toml @@ -3,6 +3,13 @@ main = "src/index.ts" compatibility_date = "2024-02-23" node_compat = true +# tail_consumers = [{service = "new-cf-ai-backend-tail"}] + +[[services]] +binding = "THREAD" +service = "tweet-thread" +entrypoint = "ThreadWorker" + # [env.preview] [[vectorize]] binding = "VECTORIZE_INDEX" @@ -29,3 +36,19 @@ preview_id = "c58b6202814f4224acea97627d0c18aa" [placement] mode = "smart" + +[[queues.producers]] + queue = "prod-embedchunks-queue" + binding ="EMBEDCHUNKS_QUEUE" + +[[queues.consumers]] + queue = "prod-embedchunks-queue" + max_batch_size = 100 + max_retries = 3 + dead_letter_queue = "prod-embedchunks-dlq" + + +[[d1_databases]] +binding = "DATABASE" +database_name = "prod-d1-supermemory" +database_id = "f527a727-c472-41d4-8eaf-3d7ba0f2f395"
\ No newline at end of file diff --git a/apps/extension/content/content.tsx b/apps/extension/content/content.tsx index e97c06e6..1a80774c 100644 --- a/apps/extension/content/content.tsx +++ b/apps/extension/content/content.tsx @@ -4,9 +4,6 @@ import("./base.css"); setTimeout(initial, 1000); -const TAILWIND_URL = - "https://cdn.jsdelivr.net/npm/tailwindcss@^2.0/dist/tailwind.min.css"; - const appendTailwindStyleData = (shadowRoot: ShadowRoot) => { const styleSheet = document.createElement("style"); @@ -20,14 +17,6 @@ const appendTailwindStyleData = (shadowRoot: ShadowRoot) => { }); }; -const appendTailwindStyleLink = (shadowRoot: ShadowRoot) => { - // Import Tailwind CSS and inject it into the shadow DOM - const styleSheet = document.createElement("link"); - styleSheet.rel = "stylesheet"; - styleSheet.href = TAILWIND_URL; - shadowRoot.appendChild(styleSheet); -}; - function initial() { // Create a new div element to host the shadow root. // Styles for this div is in `content/content.css` diff --git a/apps/web/app/(auth)/onboarding/page.tsx b/apps/web/app/(auth)/onboarding/page.tsx index 93e07e73..9a6ac481 100644 --- a/apps/web/app/(auth)/onboarding/page.tsx +++ b/apps/web/app/(auth)/onboarding/page.tsx @@ -294,7 +294,7 @@ function StepThree({ currStep }: { currStep: number }) { }); if (cont.success) { - toast.success("Memory created", { + toast.success("Memory queued", { richColors: true, }); } else { diff --git a/apps/web/app/(dash)/(memories)/content.tsx b/apps/web/app/(dash)/(memories)/content.tsx index 179b4ef2..6e2659cb 100644 --- a/apps/web/app/(dash)/(memories)/content.tsx +++ b/apps/web/app/(dash)/(memories)/content.tsx @@ -1,6 +1,6 @@ "use client"; -import { Content, StoredSpace } from "@/server/db/schema"; +import { Content, StoredSpace } from "@repo/db/schema"; import { MemoriesIcon, NextIcon, SearchIcon, UrlIcon } from "@repo/ui/icons"; import { ArrowLeftIcon, diff --git a/apps/web/app/(dash)/(memories)/space/[spaceid]/page.tsx b/apps/web/app/(dash)/(memories)/space/[spaceid]/page.tsx index ed1ea1cc..8ad9d9cc 100644 --- a/apps/web/app/(dash)/(memories)/space/[spaceid]/page.tsx +++ b/apps/web/app/(dash)/(memories)/space/[spaceid]/page.tsx @@ -3,7 +3,7 @@ import { redirect } from "next/navigation"; import MemoriesPage from "../../content"; import { db } from "@/server/db"; import { and, eq } from "drizzle-orm"; -import { spacesAccess } from "@/server/db/schema"; +import { spacesAccess } from "@repo/db/schema"; import { auth } from "@/server/auth"; async function Page({ params: { spaceid } }: { params: { spaceid: number } }) { diff --git a/apps/web/app/(dash)/dialogContentContainer.tsx b/apps/web/app/(dash)/dialogContentContainer.tsx index 4e8d81ef..1a11ac6d 100644 --- a/apps/web/app/(dash)/dialogContentContainer.tsx +++ b/apps/web/app/(dash)/dialogContentContainer.tsx @@ -1,4 +1,4 @@ -import { StoredSpace } from "@/server/db/schema"; +import { StoredSpace } from "@repo/db/schema"; import { useEffect, useMemo, useState } from "react"; import { createMemory, createSpace } from "../actions/doers"; import ComboboxWithCreate from "@repo/ui/shadcn/combobox"; @@ -76,7 +76,7 @@ export function DialogContentContainer({ setSelectedSpaces([]); if (cont.success) { - toast.success("Memory created", { + toast.success("Memory queued", { richColors: true, }); } else { diff --git a/apps/web/app/(dash)/menu.tsx b/apps/web/app/(dash)/menu.tsx index 7eac5a56..1c0ce1ee 100644 --- a/apps/web/app/(dash)/menu.tsx +++ b/apps/web/app/(dash)/menu.tsx @@ -28,7 +28,7 @@ import { getSpaces } from "../actions/fetchers"; import { HomeIcon } from "@heroicons/react/24/solid"; import { createMemory, createSpace } from "../actions/doers"; import ComboboxWithCreate from "@repo/ui/shadcn/combobox"; -import { StoredSpace } from "@/server/db/schema"; +import { StoredSpace } from "@repo/db/schema"; import useMeasure from "react-use-measure"; import { useKeyPress } from "@/lib/useKeyPress"; @@ -121,9 +121,14 @@ function Menu() { setContent(""); setSelectedSpaces([]); if (cont.success) { + toast.success("Memory queued", { + richColors: true, + }); + } else { + toast.error(`Memory creation failed: ${cont.error}`); + throw new Error(`Memory creation failed: ${cont.error}`); return cont; } - throw new Error(`Memory creation failed: ${cont.error}`); }; return ( @@ -275,10 +280,7 @@ function Menu() { ]); setSelectedSpaces((prev) => [...prev, creationTask.data!]); } else { - toast.error( - "Space creation failed: " + creationTask.error ?? - "Unknown error", - ); + toast.error("Space creation failed: " + creationTask.error); } }} placeholder="Select or create a new space." diff --git a/apps/web/app/actions/doers.ts b/apps/web/app/actions/doers.ts index 9750a705..c11d5f0a 100644 --- a/apps/web/app/actions/doers.ts +++ b/apps/web/app/actions/doers.ts @@ -11,13 +11,13 @@ import { spacesAccess, storedContent, users, -} from "../../server/db/schema"; +} from "@repo/db/schema"; import { ServerActionReturnType } from "./types"; import { auth } from "../../server/auth"; import { Tweet } from "react-tweet/api"; -import { getMetaData } from "@/lib/get-metadata"; +// import { getMetaData } from "@/lib/get-metadata"; import { and, eq, inArray, sql } from "drizzle-orm"; -import { LIMITS } from "@/lib/constants"; +import { LIMITS } from "@repo/shared-types"; import { ChatHistory } from "@repo/shared-types"; import { decipher } from "@/server/encrypt"; import { redirect } from "next/navigation"; @@ -104,25 +104,6 @@ const typeDecider = (content: string): "page" | "tweet" | "note" => { } }; -export const limit = async ( - userId: string, - type = "page", - items: number = 1, -) => { - const countResult = await db - .select({ - count: sql<number>`count(*)`.mapWith(Number), - }) - .from(storedContent) - .where(and(eq(storedContent.userId, userId), eq(storedContent.type, type))); - - const currentCount = countResult[0]?.count || 0; - const totalLimit = LIMITS[type as keyof typeof LIMITS]; - const remainingLimit = totalLimit - currentCount; - - return items <= remainingLimit; -}; - export const addUserToSpace = async (userEmail: string, spaceId: number) => { const data = await auth(); @@ -208,122 +189,15 @@ export const createMemory = async (input: { return { error: "Not authenticated", success: false }; } - const type = typeDecider(input.content); - - let pageContent = input.content; - let metadata: Awaited<ReturnType<typeof getMetaData>>; - let vectorData: string; - - if (!(await limit(data.user.id, type))) { - return { - success: false, - data: 0, - error: `You have exceeded the limit of ${LIMITS[type as keyof typeof LIMITS]} ${type}s.`, - }; - } - - let noteId = 0; - - if (type === "page") { - const response = await fetch("https://md.dhr.wtf/?url=" + input.content, { - headers: { - Authorization: "Bearer " + process.env.BACKEND_SECURITY_KEY, - }, - }); - pageContent = await response.text(); - vectorData = pageContent; - try { - metadata = await getMetaData(input.content); - } catch (e) { - return { - success: false, - error: "Failed to fetch metadata for the page. Please try again later.", - }; - } - } else if (type === "tweet") { - //Request the worker for the entire thread - - let thread: string; - let errorOccurred: boolean = false; - - try { - const cf_thread_endpoint = process.env.THREAD_CF_WORKER; - const authKey = process.env.THREAD_CF_AUTH; - const threadRequest = await fetch(cf_thread_endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: authKey, - }, - body: JSON.stringify({ url: input.content }), - }); - - if (threadRequest.status !== 200) { - throw new Error( - `Failed to fetch the thread: ${input.content}, Reason: ${threadRequest.statusText}`, - ); - } - - thread = await threadRequest.text(); - if (thread.trim().length === 2) { - console.log("Thread is an empty array"); - throw new Error( - "[THREAD FETCHING SERVICE] Got no content form thread worker", - ); - } - } catch (e) { - console.log("[THREAD FETCHING SERVICE] Failed to fetch the thread", e); - errorOccurred = true; - } - - const tweet = await getTweetData(input.content.split("/").pop() as string); - - pageContent = tweetToMd(tweet); - console.log("THis ishte page content!!", pageContent); - //@ts-ignore - vectorData = errorOccurred ? JSON.stringify(pageContent) : thread; - metadata = { - baseUrl: input.content, - description: tweet.text.slice(0, 200), - image: tweet.user.profile_image_url_https, - title: `Tweet by ${tweet.user.name}`, - }; - } else if (type === "note") { - pageContent = input.content; - vectorData = pageContent; - noteId = new Date().getTime(); - metadata = { - baseUrl: `https://supermemory.ai/note/${noteId}`, - description: `Note created at ${new Date().toLocaleString()}`, - image: "https://supermemory.ai/logo.png", - title: `${pageContent.slice(0, 20)} ${pageContent.length > 20 ? "..." : ""}`, - }; - } else { - return { - success: false, - data: 0, - error: "Invalid type", - }; - } - - let storeToSpaces = input.spaces; - - if (!storeToSpaces) { - storeToSpaces = []; - } - - const vectorSaveResponse = await fetch( + // make the backend reqeust for the queue here + const vectorSaveResponses = await fetch( `${process.env.BACKEND_BASE_URL}/api/add`, { method: "POST", body: JSON.stringify({ - pageContent: vectorData, - title: metadata.title, - description: metadata.description, - url: metadata.baseUrl, - spaces: storeToSpaces.map((spaceId) => spaceId.toString()), + url: input.content, + spaces: input.spaces, user: data.user.id, - type, }), headers: { "Content-Type": "application/json", @@ -331,126 +205,262 @@ export const createMemory = async (input: { }, }, ); - - if (!vectorSaveResponse.ok) { - const errorData = await vectorSaveResponse.text(); - console.error(errorData); - return { - success: false, - data: 0, - error: `Failed to save to vector store. Backend returned error: ${errorData}`, - }; - } - - let contentId: number; - - const response = (await vectorSaveResponse.json()) as { + const response = (await vectorSaveResponses.json()) as { status: string; - chunkedInput: string; message?: string; }; - try { - if (response.status !== "ok") { - if (response.status === "error") { - return { - success: false, - data: 0, - error: response.message, - }; - } else { - return { - success: false, - data: 0, - error: `Failed to save to vector store. Backend returned error: ${response.message}`, - }; - } - } - } catch (e) { + if (response.status !== "ok") { return { success: false, data: 0, - error: `Failed to save to vector store. Backend returned error: ${e}`, + error: response.message, }; } - const saveToDbUrl = - (metadata.baseUrl.split("#supermemory-user-")[0] ?? metadata.baseUrl) + - "#supermemory-user-" + - data.user.id; - - // Insert into database - try { - const insertResponse = await db - .insert(storedContent) - .values({ - content: pageContent, - title: metadata.title, - description: metadata.description, - url: saveToDbUrl, - baseUrl: saveToDbUrl, - image: metadata.image, - savedAt: new Date(), - userId: data.user.id, - type, - noteId, - }) - .returning({ id: storedContent.id }); - revalidatePath("/memories"); - revalidatePath("/home"); - - if (!insertResponse[0]?.id) { - return { - success: false, - data: 0, - error: "Something went wrong while saving the document to the database", - }; - } - - contentId = insertResponse[0]?.id; - } catch (e) { - const error = e as Error; - console.log("Error: ", error.message); - - if ( - error.message.includes( - "D1_ERROR: UNIQUE constraint failed: storedContent.baseUrl", - ) - ) { - return { - success: false, - data: 0, - error: "Content already exists", - }; - } - - return { - success: false, - data: 0, - error: "Failed to save to database with error: " + error.message, - }; - } - - if (storeToSpaces.length > 0) { - // Adding the many-to-many relationship between content and spaces - const spaceData = await db - .select() - .from(space) - .where( - and(inArray(space.id, storeToSpaces), eq(space.user, data.user.id)), - ) - .all(); - - await Promise.all( - spaceData.map(async (s) => { - await db - .insert(contentToSpace) - .values({ contentId: contentId, spaceId: s.id }); - - await db.update(space).set({ numItems: s.numItems + 1 }); - }), - ); - } + // const type = typeDecider(input.content); + + // let pageContent = input.content; + // let metadata: Awaited<ReturnType<typeof getMetaData>>; + // let vectorData: string; + + // if (!(await limit(data.user.id, type))) { + // return { + // success: false, + // data: 0, + // error: `You have exceeded the limit of ${LIMITS[type as keyof typeof LIMITS]} ${type}s.`, + // }; + // } --> How would this fit in the backend??? + + // let noteId = 0; + + // if (type === "page") { + // const response = await fetch("https://md.dhr.wtf/?url=" + input.content, { + // headers: { + // Authorization: "Bearer " + process.env.BACKEND_SECURITY_KEY, + // }, + // }); + // pageContent = await response.text(); + // vectorData = pageContent; + // try { + // metadata = await getMetaData(input.content); + // } catch (e) { + // return { + // success: false, + // error: "Failed to fetch metadata for the page. Please try again later.", + // }; + // } + // } else if (type === "tweet") { + // //Request the worker for the entire thread + + // let thread: string; + // let errorOccurred: boolean = false; + + // try { + // const cf_thread_endpoint = process.env.THREAD_CF_WORKER; + // const authKey = process.env.THREAD_CF_AUTH; + // const threadRequest = await fetch(cf_thread_endpoint, { + // method: "POST", + // headers: { + // "Content-Type": "application/json", + // Authorization: authKey, + // }, + // body: JSON.stringify({ url: input.content }), + // }); + + // if (threadRequest.status !== 200) { + // throw new Error( + // `Failed to fetch the thread: ${input.content}, Reason: ${threadRequest.statusText}`, + // ); + // } + + // thread = await threadRequest.text(); + // if (thread.trim().length === 2) { + // console.log("Thread is an empty array"); + // throw new Error( + // "[THREAD FETCHING SERVICE] Got no content form thread worker", + // ); + // } + // } catch (e) { + // console.log("[THREAD FETCHING SERVICE] Failed to fetch the thread", e); + // errorOccurred = true; + // } + + // const tweet = await getTweetData(input.content.split("/").pop() as string); + + // pageContent = tweetToMd(tweet); + // console.log("THis ishte page content!!", pageContent); + // //@ts-ignore + // vectorData = errorOccurred ? JSON.stringify(pageContent) : thread; + // metadata = { + // baseUrl: input.content, + // description: tweet.text.slice(0, 200), + // image: tweet.user.profile_image_url_https, + // title: `Tweet by ${tweet.user.name}`, + // }; + // } else if (type === "note") { + // pageContent = input.content; + // vectorData = pageContent; + // noteId = new Date().getTime(); + // metadata = { + // baseUrl: `https://supermemory.ai/note/${noteId}`, + // description: `Note created at ${new Date().toLocaleString()}`, + // image: "https://supermemory.ai/logo.png", + // title: `${pageContent.slice(0, 20)} ${pageContent.length > 20 ? "..." : ""}`, + // }; + // } else { + // return { + // success: false, + // data: 0, + // error: "Invalid type", + // }; + // } + + // let storeToSpaces = input.spaces; + + // if (!storeToSpaces) { + // storeToSpaces = []; + // } + + // const vectorSaveResponse = await fetch( + // `${process.env.BACKEND_BASE_URL}/api/add`, + // { + // method: "POST", + // body: JSON.stringify({ + // pageContent: vectorData, + // title: metadata.title, + // description: metadata.description, + // url: metadata.baseUrl, + // spaces: storeToSpaces.map((spaceId) => spaceId.toString()), + // user: data.user.id, + // type, + // }), + // headers: { + // "Content-Type": "application/json", + // Authorization: "Bearer " + process.env.BACKEND_SECURITY_KEY, + // }, + // }, + // ); + + // if (!vectorSaveResponse.ok) { + // const errorData = await vectorSaveResponse.text(); + // console.error(errorData); + // return { + // success: false, + // data: 0, + // error: `Failed to save to vector store. Backend returned error: ${errorData}`, + // }; + // } + + // let contentId: number; + + // const response = (await vectorSaveResponse.json()) as { + // status: string; + // chunkedInput: string; + // message?: string; + // }; + + // try { + // if (response.status !== "ok") { + // if (response.status === "error") { + // return { + // success: false, + // data: 0, + // error: response.message, + // }; + // } else { + // return { + // success: false, + // data: 0, + // error: `Failed to save to vector store. Backend returned error: ${response.message}`, + // }; + // } + // } + // } catch (e) { + // return { + // success: false, + // data: 0, + // error: `Failed to save to vector store. Backend returned error: ${e}`, + // }; + // } + + // const saveToDbUrl = + // (metadata.baseUrl.split("#supermemory-user-")[0] ?? metadata.baseUrl) + + // "#supermemory-user-" + + // data.user.id; + + // // Insert into database + // try { + // const insertResponse = await db + // .insert(storedContent) + // .values({ + // content: pageContent, + // title: metadata.title, + // description: metadata.description, + // url: saveToDbUrl, + // baseUrl: saveToDbUrl, + // image: metadata.image, + // savedAt: new Date(), + // userId: data.user.id, + // type, + // noteId, + // }) + // .returning({ id: storedContent.id }); + // revalidatePath("/memories"); + // revalidatePath("/home"); + + // if (!insertResponse[0]?.id) { + // return { + // success: false, + // data: 0, + // error: "Something went wrong while saving the document to the database", + // }; + // } + + // contentId = insertResponse[0]?.id; + // } catch (e) { + // const error = e as Error; + // console.log("Error: ", error.message); + + // if ( + // error.message.includes( + // "D1_ERROR: UNIQUE constraint failed: storedContent.baseUrl", + // ) + // ) { + // return { + // success: false, + // data: 0, + // error: "Content already exists", + // }; + // } + + // return { + // success: false, + // data: 0, + // error: "Failed to save to database with error: " + error.message, + // }; + // } + + // if (storeToSpaces.length > 0) { + // // Adding the many-to-many relationship between content and spaces + // const spaceData = await db + // .select() + // .from(space) + // .where( + // and(inArray(space.id, storeToSpaces), eq(space.user, data.user.id)), + // ) + // .all(); + + // await Promise.all( + // spaceData.map(async (s) => { + // await db + // .insert(contentToSpace) + // .values({ contentId: contentId, spaceId: s.id }); + + // await db.update(space).set({ numItems: s.numItems + 1 }); + // }), + // ); + // } return { success: true, @@ -828,9 +838,9 @@ export async function getQuerySuggestions() { }; } - const fullQuery = content - .map((c) => `${c.title} \n\n${c.content}`) - .join(" "); + const fullQuery = ( + content?.map((c) => `${c.title} \n\n${c.content}`) ?? [] + ).join(" "); const suggestionsCall = (await env.AI.run( // @ts-ignore diff --git a/apps/web/app/actions/fetchers.ts b/apps/web/app/actions/fetchers.ts index 5f72089a..f00feb3c 100644 --- a/apps/web/app/actions/fetchers.ts +++ b/apps/web/app/actions/fetchers.ts @@ -15,7 +15,7 @@ import { StoredSpace, User, users, -} from "../../server/db/schema"; +} from "@repo/db/schema"; import { ServerActionReturnType } from "./types"; import { auth } from "../../server/auth"; import { ChatHistory, SourceZod } from "@repo/shared-types"; diff --git a/apps/web/app/api/chat/history/route.ts b/apps/web/app/api/chat/history/route.ts index 98b66064..197b8ee6 100644 --- a/apps/web/app/api/chat/history/route.ts +++ b/apps/web/app/api/chat/history/route.ts @@ -2,7 +2,7 @@ import { NextRequest } from "next/server"; import { ensureAuth } from "../../ensureAuth"; import { db } from "@/server/db"; import { eq } from "drizzle-orm"; -import { chatThreads } from "@/server/db/schema"; +import { chatThreads } from "@repo/db/schema"; export const runtime = "edge"; diff --git a/apps/web/app/api/chat/route.ts b/apps/web/app/api/chat/route.ts index a14c96df..78878e40 100644 --- a/apps/web/app/api/chat/route.ts +++ b/apps/web/app/api/chat/route.ts @@ -8,7 +8,7 @@ import { import { ensureAuth } from "../ensureAuth"; import { z } from "zod"; import { db } from "@/server/db"; -import { chatHistory as chatHistoryDb, chatThreads } from "@/server/db/schema"; +import { chatHistory as chatHistoryDb, chatThreads } from "@repo/db/schema"; import { and, eq, gt, sql } from "drizzle-orm"; import { join } from "path"; diff --git a/apps/web/app/api/ensureAuth.ts b/apps/web/app/api/ensureAuth.ts index 1fcd2914..92a5e3e8 100644 --- a/apps/web/app/api/ensureAuth.ts +++ b/apps/web/app/api/ensureAuth.ts @@ -1,6 +1,6 @@ import { NextRequest } from "next/server"; import { db } from "../../server/db"; -import { accounts, sessions, users } from "../../server/db/schema"; +import { accounts, sessions, users } from "@repo/db/schema"; import { eq } from "drizzle-orm"; export async function ensureAuth(req: NextRequest) { diff --git a/apps/web/app/api/getCount/route.ts b/apps/web/app/api/getCount/route.ts index f91b7b94..4fd77efd 100644 --- a/apps/web/app/api/getCount/route.ts +++ b/apps/web/app/api/getCount/route.ts @@ -1,6 +1,6 @@ import { db } from "@/server/db"; import { and, eq, ne, sql } from "drizzle-orm"; -import { sessions, storedContent, users } from "@/server/db/schema"; +import { sessions, storedContent, users } from "@repo/db/schema"; import { type NextRequest, NextResponse } from "next/server"; import { ensureAuth } from "../ensureAuth"; diff --git a/apps/web/app/api/me/route.ts b/apps/web/app/api/me/route.ts index ab408f3e..25aa27bc 100644 --- a/apps/web/app/api/me/route.ts +++ b/apps/web/app/api/me/route.ts @@ -1,6 +1,6 @@ import { db } from "@/server/db"; import { eq } from "drizzle-orm"; -import { sessions, users } from "@/server/db/schema"; +import { sessions, users } from "@repo/db/schema"; import { type NextRequest, NextResponse } from "next/server"; export const runtime = "edge"; diff --git a/apps/web/app/api/memories/route.ts b/apps/web/app/api/memories/route.ts index acb43b5d..0084524e 100644 --- a/apps/web/app/api/memories/route.ts +++ b/apps/web/app/api/memories/route.ts @@ -6,7 +6,7 @@ import { contentToSpace, storedContent, users, -} from "@/server/db/schema"; +} from "@repo/db/schema"; import { ensureAuth } from "../ensureAuth"; export const runtime = "edge"; diff --git a/apps/web/app/api/spaces/route.ts b/apps/web/app/api/spaces/route.ts index e85e07ed..27ff0dfb 100644 --- a/apps/web/app/api/spaces/route.ts +++ b/apps/web/app/api/spaces/route.ts @@ -1,5 +1,5 @@ import { db } from "@/server/db"; -import { space } from "@/server/db/schema"; +import { space } from "@repo/db/schema"; import { eq } from "drizzle-orm"; import { NextRequest, NextResponse } from "next/server"; import { ensureAuth } from "../ensureAuth"; diff --git a/apps/web/app/api/store/friend/route.ts b/apps/web/app/api/store/friend/route.ts deleted file mode 100644 index 554b1cee..00000000 --- a/apps/web/app/api/store/friend/route.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { type NextRequest } from "next/server"; -import { createMemoryFromAPI } from "../helper"; - -type FriendData = { - id: string; - created_at: string; - transcript: string; - structured: { - title: string; - overview: string; - action_items: [ - { - description: string; - }, - ]; - }; -}; - -export async function POST(req: NextRequest) { - const body: FriendData = await req.json(); - - const userId = new URL(req.url).searchParams.get("uid"); - - if (!userId) { - return new Response( - JSON.stringify({ status: 400, body: "Missing user ID" }), - ); - } - - await createMemoryFromAPI({ - data: { - title: "Friend: " + body.structured.title, - description: body.structured.overview, - pageContent: - body.transcript + "\n\n" + JSON.stringify(body.structured.action_items), - spaces: [], - type: "note", - url: "https://basedhardware.com", - }, - userId: userId, - }); - - return new Response(JSON.stringify({ status: 200, body: "success" })); -} diff --git a/apps/web/app/api/store/helper.ts b/apps/web/app/api/store/helper.ts index a2c04dc1..db13ca91 100644 --- a/apps/web/app/api/store/helper.ts +++ b/apps/web/app/api/store/helper.ts @@ -1,22 +1,22 @@ import { z } from "zod"; import { db } from "@/server/db"; -import { contentToSpace, space, storedContent } from "@/server/db/schema"; +import { contentToSpace, space, storedContent } from "@repo/db/schema"; import { and, eq, inArray } from "drizzle-orm"; -import { LIMITS } from "@/lib/constants"; -import { limit } from "@/app/actions/doers"; +// import { LIMITS } from "@repo/shared-types"; +// import { limit } from "@/app/actions/doers"; import { type AddFromAPIType } from "@repo/shared-types"; export const createMemoryFromAPI = async (input: { data: AddFromAPIType; userId: string; }) => { - if (!(await limit(input.userId, input.data.type))) { - return { - success: false, - data: 0, - error: `You have exceeded the limit of ${LIMITS[input.data.type as keyof typeof LIMITS]} ${input.data.type}s.`, - }; - } + // if (!(await limit(input.userId, input.data.type))) { + // return { + // success: false, + // data: 0, + // error: `You have exceeded the limit of ${LIMITS[input.data.type as keyof typeof LIMITS]} ${input.data.type}s.`, + // }; + // } const vectorSaveResponse = await fetch( `${process.env.BACKEND_BASE_URL}/api/add`, diff --git a/apps/web/app/api/telegram/route.ts b/apps/web/app/api/telegram/route.ts index 06499c7d..c629e409 100644 --- a/apps/web/app/api/telegram/route.ts +++ b/apps/web/app/api/telegram/route.ts @@ -1,5 +1,5 @@ import { db } from "@/server/db"; -import { storedContent, users } from "@/server/db/schema"; +import { storedContent, users } from "@repo/db/schema"; import { cipher } from "@/server/encrypt"; import { eq } from "drizzle-orm"; import { Bot, webhookCallback } from "grammy"; diff --git a/apps/web/drizzle.config.ts b/apps/web/drizzle.config.ts index 58116123..5df2ca29 100644 --- a/apps/web/drizzle.config.ts +++ b/apps/web/drizzle.config.ts @@ -1,7 +1,7 @@ import { type Config } from "drizzle-kit"; export default { - schema: "./server/db/schema.ts", + schema: "../../packages/db/schema.ts", dialect: "sqlite", driver: "d1", dbCredentials: { diff --git a/apps/web/lib/constants.ts b/apps/web/lib/constants.ts index 241a6a1d..73f9a83d 100644 --- a/apps/web/lib/constants.ts +++ b/apps/web/lib/constants.ts @@ -1,9 +1,3 @@ -export const LIMITS = { - page: 100, - tweet: 1000, - note: 1000, -}; - export const codeLanguageSubset = [ "python", "javascript", diff --git a/apps/web/lib/get-metadata.ts b/apps/web/lib/get-metadata.ts deleted file mode 100644 index c81397ff..00000000 --- a/apps/web/lib/get-metadata.ts +++ /dev/null @@ -1,40 +0,0 @@ -"use server"; -import * as cheerio from "cheerio"; - -// TODO: THIS SHOULD PROBABLY ALSO FETCH THE OG-IMAGE -export async function getMetaData(url: string) { - const response = await fetch(url); - const html = await response.text(); - - const $ = cheerio.load(html); - - // Extract the base URL - const baseUrl = url; - - // Extract title - const title = $("title").text().trim(); - - const description = $("meta[name=description]").attr("content") ?? ""; - - const _favicon = - $("link[rel=icon]").attr("href") ?? "https://supermemory.dhr.wtf/web.svg"; - - let favicon = - _favicon.trim().length > 0 - ? _favicon.trim() - : "https://supermemory.dhr.wtf/web.svg"; - if (favicon.startsWith("/")) { - favicon = baseUrl + favicon; - } else if (favicon.startsWith("./")) { - favicon = baseUrl + favicon.slice(1); - } - - // Prepare the metadata object - const metadata = { - title, - description, - image: favicon, - baseUrl, - }; - return metadata; -} diff --git a/apps/web/migrations/0000_steep_moira_mactaggert.sql b/apps/web/migrations/0000_fixed_pandemic.sql index 5813639d..09b5431a 100644 --- a/apps/web/migrations/0000_steep_moira_mactaggert.sql +++ b/apps/web/migrations/0000_fixed_pandemic.sql @@ -43,7 +43,7 @@ CREATE TABLE `chatHistory` ( `answerParts` text, `answerSources` text, `answerJustification` text, - `createdAt` integer DEFAULT '"2024-07-25T22:31:50.848Z"' NOT NULL, + `createdAt` integer DEFAULT '"2024-07-31T07:35:53.819Z"' NOT NULL, FOREIGN KEY (`threadId`) REFERENCES `chatThread`(`id`) ON UPDATE no action ON DELETE cascade ); --> statement-breakpoint @@ -62,6 +62,19 @@ CREATE TABLE `contentToSpace` ( FOREIGN KEY (`spaceId`) REFERENCES `space`(`id`) ON UPDATE no action ON DELETE cascade ); --> statement-breakpoint +CREATE TABLE `jobs` ( + `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL, + `userId` text NOT NULL, + `url` text NOT NULL, + `status` text NOT NULL, + `attempts` integer DEFAULT 0 NOT NULL, + `lastAttemptAt` integer, + `error` blob, + `createdAt` integer NOT NULL, + `updatedAt` integer NOT NULL, + FOREIGN KEY (`userId`) REFERENCES `user`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint CREATE TABLE `session` ( `sessionToken` text PRIMARY KEY NOT NULL, `userId` text NOT NULL, @@ -122,6 +135,10 @@ CREATE UNIQUE INDEX `authenticator_credentialID_unique` ON `authenticator` (`cre CREATE INDEX `canvas_user_userId` ON `canvas` (`userId`);--> statement-breakpoint CREATE INDEX `chatHistory_thread_idx` ON `chatHistory` (`threadId`);--> statement-breakpoint CREATE INDEX `chatThread_user_idx` ON `chatThread` (`userId`);--> statement-breakpoint +CREATE INDEX `jobs_userId_idx` ON `jobs` (`userId`);--> statement-breakpoint +CREATE INDEX `jobs_status_idx` ON `jobs` (`status`);--> statement-breakpoint +CREATE INDEX `jobs_createdAt_idx` ON `jobs` (`createdAt`);--> statement-breakpoint +CREATE INDEX `jobs_url_idx` ON `jobs` (`url`);--> statement-breakpoint CREATE UNIQUE INDEX `space_name_unique` ON `space` (`name`);--> statement-breakpoint CREATE INDEX `spaces_name_idx` ON `space` (`name`);--> statement-breakpoint CREATE INDEX `spaces_user_idx` ON `space` (`user`);--> statement-breakpoint diff --git a/apps/web/migrations/0001_Adding_jobs_table.sql b/apps/web/migrations/0001_Adding_jobs_table.sql new file mode 100644 index 00000000..7a687f72 --- /dev/null +++ b/apps/web/migrations/0001_Adding_jobs_table.sql @@ -0,0 +1,19 @@ +-- Migration number: 0001 2024-08-05T18:05:16.793Z +CREATE TABLE `jobs` ( + `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL, + `userId` text NOT NULL, + `url` text NOT NULL, + `status` text NOT NULL, + `attempts` integer DEFAULT 0 NOT NULL, + `lastAttemptAt` integer, + `error` blob, + `createdAt` integer NOT NULL, + `updatedAt` integer NOT NULL, + FOREIGN KEY (`userId`) REFERENCES `user`(`id`) ON UPDATE no action ON DELETE cascade +); + + +CREATE INDEX `jobs_userId_idx` ON `jobs` (`userId`);--> statement-breakpoint +CREATE INDEX `jobs_status_idx` ON `jobs` (`status`);--> statement-breakpoint +CREATE INDEX `jobs_createdAt_idx` ON `jobs` (`createdAt`);--> statement-breakpoint +CREATE INDEX `jobs_url_idx` ON `jobs` (`url`);--> statement-breakpoint
\ No newline at end of file diff --git a/apps/web/migrations/meta/0000_snapshot.json b/apps/web/migrations/meta/0000_snapshot.json index a7689010..3bb8617f 100644 --- a/apps/web/migrations/meta/0000_snapshot.json +++ b/apps/web/migrations/meta/0000_snapshot.json @@ -1,7 +1,7 @@ { "version": "6", "dialect": "sqlite", - "id": "8705302a-eae7-4fbf-9ce8-8ae23df228a2", + "id": "3fbdb153-2764-4b09-ac22-05c3a131ec35", "prevId": "00000000-0000-0000-0000-000000000000", "tables": { "account": { @@ -305,7 +305,7 @@ "primaryKey": false, "notNull": true, "autoincrement": false, - "default": "'\"2024-07-25T22:31:50.848Z\"'" + "default": "'\"2024-07-31T07:35:53.819Z\"'" } }, "indexes": { @@ -422,6 +422,110 @@ }, "uniqueConstraints": {} }, + "jobs": { + "name": "jobs", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "userId": { + "name": "userId", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "attempts": { + "name": "attempts", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 0 + }, + "lastAttemptAt": { + "name": "lastAttemptAt", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "error": { + "name": "error", + "type": "blob", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "createdAt": { + "name": "createdAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "updatedAt": { + "name": "updatedAt", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": { + "jobs_userId_idx": { + "name": "jobs_userId_idx", + "columns": ["userId"], + "isUnique": false + }, + "jobs_status_idx": { + "name": "jobs_status_idx", + "columns": ["status"], + "isUnique": false + }, + "jobs_createdAt_idx": { + "name": "jobs_createdAt_idx", + "columns": ["createdAt"], + "isUnique": false + }, + "jobs_url_idx": { + "name": "jobs_url_idx", + "columns": ["url"], + "isUnique": false + } + }, + "foreignKeys": { + "jobs_userId_user_id_fk": { + "name": "jobs_userId_user_id_fk", + "tableFrom": "jobs", + "tableTo": "user", + "columnsFrom": ["userId"], + "columnsTo": ["id"], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, "session": { "name": "session", "columns": { diff --git a/apps/web/migrations/meta/_journal.json b/apps/web/migrations/meta/_journal.json index d79e2607..59ab4ea6 100644 --- a/apps/web/migrations/meta/_journal.json +++ b/apps/web/migrations/meta/_journal.json @@ -5,8 +5,8 @@ { "idx": 0, "version": "6", - "when": 1721946710900, - "tag": "0000_steep_moira_mactaggert", + "when": 1722411353835, + "tag": "0000_fixed_pandemic", "breakpoints": true } ] diff --git a/apps/web/next.config.mjs b/apps/web/next.config.mjs index c0001fa5..307d5bdc 100644 --- a/apps/web/next.config.mjs +++ b/apps/web/next.config.mjs @@ -1,6 +1,5 @@ import MillionLint from "@million/lint"; import { setupDevPlatform } from "@cloudflare/next-on-pages/next-dev"; -import { withSentryConfig } from "@sentry/nextjs"; /** @type {import('next').NextConfig} */ const baseNextConfig = { @@ -9,6 +8,9 @@ const baseNextConfig = { env: { TELEGRAM_BOT_TOKEN: process.env.TELEGRAM_BOT_TOKEN, }, + eslint: { + disableDuringBuilds: true, + }, }; let selectedCofig = baseNextConfig; @@ -21,41 +23,6 @@ if (process.env.NODE_ENV === "development") { export default selectedCofig; -//! Disabled sentry for now because of unreasonably large bundle size -// export default withSentryConfig(selectedCofig, { -// // For all available options, see: -// // https://github.com/getsentry/sentry-webpack-plugin#options - -// org: "none-h00", -// project: "javascript-nextjs", -// // Only print logs for uploading source maps in CI -// silent: !process.env.CI, - -// // For all available options, see: -// // https://docs.sentry.io/platforms/javascript/guides/nextjs/manual-setup/ - -// // Upload a larger set of source maps for prettier stack traces (increases build time) -// widenClientFileUpload: true, - -// // Route browser requests to Sentry through a Next.js rewrite to circumvent ad-blockers. -// // This can increase your server load as well as your hosting bill. -// // Note: Check that the configured route will not match with your Next.js middleware, otherwise reporting of client- -// // side errors will fail. -// tunnelRoute: "/monitoring", - -// // Hides source maps from generated client bundles -// hideSourceMaps: true, - -// // Automatically tree-shake Sentry logger statements to reduce bundle size -// disableLogger: true, - -// // Enables automatic instrumentation of Vercel Cron Monitors. (Does not yet work with App Router route handlers.) -// // See the following for more information: -// // https://docs.sentry.io/product/crons/ -// // https://vercel.com/docs/cron-jobs -// automaticVercelMonitors: true, -// }); - // we only need to use the utility during development so we can check NODE_ENV // (note: this check is recommended but completely optional) if (process.env.NODE_ENV === "development") { diff --git a/apps/web/package.json b/apps/web/package.json index 5773fe39..d3bf1f48 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -46,7 +46,6 @@ "@types/node": "^20.11.24", "@types/react": "^18.2.61", "@types/react-dom": "^18.2.19", - "drizzle-kit": "0.21.2", "eslint": "^8.57.0", "postcss": "^8.4.38", "typescript": "^5.3.3", diff --git a/apps/web/server/auth.ts b/apps/web/server/auth.ts index 20e42e9a..645989fa 100644 --- a/apps/web/server/auth.ts +++ b/apps/web/server/auth.ts @@ -2,7 +2,7 @@ import NextAuth, { NextAuthResult } from "next-auth"; import Google from "next-auth/providers/google"; import { DrizzleAdapter } from "@auth/drizzle-adapter"; import { db } from "./db"; -import { accounts, sessions, users, verificationTokens } from "./db/schema"; +import { accounts, sessions, users, verificationTokens } from "@repo/db/schema"; export const { handlers: { GET, POST }, diff --git a/apps/web/server/db/index.ts b/apps/web/server/db/index.ts index a9ec9106..52f3e350 100644 --- a/apps/web/server/db/index.ts +++ b/apps/web/server/db/index.ts @@ -1,6 +1,6 @@ import { drizzle } from "drizzle-orm/d1"; -import * as schema from "./schema"; +import * as schema from "@repo/db/schema"; export const db = drizzle(process.env.DATABASE, { schema, diff --git a/apps/web/server/db/schema.ts b/apps/web/server/db/schema.ts deleted file mode 100644 index 32b80719..00000000 --- a/apps/web/server/db/schema.ts +++ /dev/null @@ -1,244 +0,0 @@ -import { create } from "domain"; -import { relations, sql } from "drizzle-orm"; -import { - index, - int, - primaryKey, - sqliteTableCreator, - text, - integer, -} from "drizzle-orm/sqlite-core"; -import type { AdapterAccountType } from "next-auth/adapters"; - -export const createTable = sqliteTableCreator((name) => `${name}`); - -export const users = createTable( - "user", - { - id: text("id") - .primaryKey() - .$defaultFn(() => crypto.randomUUID()), - name: text("name"), - email: text("email").notNull(), - emailVerified: integer("emailVerified", { mode: "timestamp_ms" }), - image: text("image"), - telegramId: text("telegramId"), - hasOnboarded: integer("hasOnboarded", { mode: "boolean" }).default(false), - }, - (user) => ({ - emailIdx: index("users_email_idx").on(user.email), - telegramIdx: index("users_telegram_idx").on(user.telegramId), - idIdx: index("users_id_idx").on(user.id), - }), -); - -export type User = typeof users.$inferSelect; - -export const accounts = createTable( - "account", - { - userId: text("userId") - .notNull() - .references(() => users.id, { onDelete: "cascade" }), - type: text("type").$type<AdapterAccountType>().notNull(), - provider: text("provider").notNull(), - providerAccountId: text("providerAccountId").notNull(), - refresh_token: text("refresh_token"), - access_token: text("access_token"), - expires_at: integer("expires_at"), - token_type: text("token_type"), - scope: text("scope"), - id_token: text("id_token"), - session_state: text("session_state"), - }, - (account) => ({ - compoundKey: primaryKey({ - columns: [account.provider, account.providerAccountId], - }), - }), -); - -export const sessions = createTable("session", { - sessionToken: text("sessionToken").primaryKey(), - userId: text("userId") - .notNull() - .references(() => users.id, { onDelete: "cascade" }), - expires: integer("expires", { mode: "timestamp_ms" }).notNull(), -}); - -export const verificationTokens = createTable( - "verificationToken", - { - identifier: text("identifier").notNull(), - token: text("token").notNull(), - expires: integer("expires", { mode: "timestamp_ms" }).notNull(), - }, - (verificationToken) => ({ - compositePk: primaryKey({ - columns: [verificationToken.identifier, verificationToken.token], - }), - }), -); - -export const authenticators = createTable( - "authenticator", - { - credentialID: text("credentialID").notNull().unique(), - userId: text("userId") - .notNull() - .references(() => users.id, { onDelete: "cascade" }), - providerAccountId: text("providerAccountId").notNull(), - credentialPublicKey: text("credentialPublicKey").notNull(), - counter: integer("counter").notNull(), - credentialDeviceType: text("credentialDeviceType").notNull(), - credentialBackedUp: integer("credentialBackedUp", { - mode: "boolean", - }).notNull(), - transports: text("transports"), - }, - (authenticator) => ({ - compositePK: primaryKey({ - columns: [authenticator.userId, authenticator.credentialID], - }), - }), -); - -export const storedContent = createTable( - "storedContent", - { - id: integer("id").notNull().primaryKey({ autoIncrement: true }), - content: text("content").notNull(), - title: text("title", { length: 255 }), - description: text("description", { length: 255 }), - url: text("url").notNull(), - savedAt: int("savedAt", { mode: "timestamp" }).notNull(), - baseUrl: text("baseUrl", { length: 255 }).unique(), - ogImage: text("ogImage", { length: 255 }), - type: text("type").default("page"), - image: text("image", { length: 255 }), - userId: text("user").references(() => users.id, { - onDelete: "cascade", - }), - noteId: integer("noteId"), - }, - (sc) => ({ - urlIdx: index("storedContent_url_idx").on(sc.url), - savedAtIdx: index("storedContent_savedAt_idx").on(sc.savedAt), - titleInx: index("storedContent_title_idx").on(sc.title), - userIdx: index("storedContent_user_idx").on(sc.userId), - }), -); - -export type Content = typeof storedContent.$inferSelect; - -export const contentToSpace = createTable( - "contentToSpace", - { - contentId: integer("contentId") - .notNull() - .references(() => storedContent.id, { onDelete: "cascade" }), - spaceId: integer("spaceId") - .notNull() - .references(() => space.id, { onDelete: "cascade" }), - }, - (cts) => ({ - compoundKey: primaryKey({ columns: [cts.contentId, cts.spaceId] }), - }), -); - -export const space = createTable( - "space", - { - id: integer("id").notNull().primaryKey({ autoIncrement: true }), - name: text("name").notNull().unique().default("none"), - user: text("user", { length: 255 }).references(() => users.id, { - onDelete: "cascade", - }), - createdAt: int("createdAt", { mode: "timestamp" }).notNull(), - numItems: integer("numItems").notNull().default(0), - }, - (space) => ({ - nameIdx: index("spaces_name_idx").on(space.name), - userIdx: index("spaces_user_idx").on(space.user), - }), -); - -export const spacesAccess = createTable( - "spacesAccess", - { - spaceId: integer("spaceId") - .notNull() - .references(() => space.id, { onDelete: "cascade" }), - userEmail: text("userEmail").notNull(), - }, - (spaceAccess) => ({ - compoundKey: primaryKey({ - columns: [spaceAccess.spaceId, spaceAccess.userEmail], - }), - }), -); - -export type StoredContent = Omit<typeof storedContent.$inferSelect, "user">; -export type StoredSpace = typeof space.$inferSelect; -export type ChachedSpaceContent = StoredContent & { - space: number; -}; - -export const chatThreads = createTable( - "chatThread", - { - id: text("id") - .notNull() - .primaryKey() - .$defaultFn(() => crypto.randomUUID()), - firstMessage: text("firstMessage").notNull(), - userId: text("userId") - .notNull() - .references(() => users.id, { onDelete: "cascade" }), - }, - (thread) => ({ - userIdx: index("chatThread_user_idx").on(thread.userId), - }), -); - -export const chatHistory = createTable( - "chatHistory", - { - id: integer("id").notNull().primaryKey({ autoIncrement: true }), - threadId: text("threadId") - .notNull() - .references(() => chatThreads.id, { onDelete: "cascade" }), - question: text("question").notNull(), - answer: text("answerParts"), // Single answer part as string - answerSources: text("answerSources"), // JSON stringified array of objects - answerJustification: text("answerJustification"), - createdAt: int("createdAt", { mode: "timestamp" }) - .notNull() - .default(new Date()), - }, - (history) => ({ - threadIdx: index("chatHistory_thread_idx").on(history.threadId), - }), -); - -export const canvas = createTable( - "canvas", - { - id: text("id") - .notNull() - .primaryKey() - .$defaultFn(() => crypto.randomUUID()), - title: text("title").default("Untitled").notNull(), - description: text("description").default("Untitled").notNull(), - imageUrl: text("url").default("").notNull(), - userId: text("userId") - .notNull() - .references(() => users.id, { onDelete: "cascade" }), - }, - (canvas) => ({ - userIdx: index("canvas_user_userId").on(canvas.userId), - }), -); - -export type ChatThread = typeof chatThreads.$inferSelect; -export type ChatHistory = typeof chatHistory.$inferSelect; diff --git a/apps/web/wrangler.toml b/apps/web/wrangler.toml index 7f3fa047..a6232450 100644 --- a/apps/web/wrangler.toml +++ b/apps/web/wrangler.toml @@ -29,7 +29,6 @@ binding = "DATABASE" database_name = "dev-d1-anycontext" database_id = "fc562605-157a-4f60-b439-2a24ffed5b4c" - [[env.production.d1_databases]] binding = "DATABASE" database_name = "prod-d1-supermemory" |