4 files changed, 53 insertions, 19 deletions
diff --git a/apps/cf-ai-backend/src/helper.ts b/apps/cf-ai-backend/src/helper.ts
index 3a15ac4d..2a68879a 100644
--- a/apps/cf-ai-backend/src/helper.ts
+++ b/apps/cf-ai-backend/src/helper.ts
@@ -203,7 +203,7 @@ export async function batchCreateChunksAndEmbeddings({
 			{
 				const commonMetaData = {
 					type: body.type ?? "tweet",
-					title: body.title,
+					title: body.title?.slice(0, 50) ?? "",
 					description: body.description ?? "",
 					url: body.url,
 					[sanitizeKey(`user-${body.user}`)]: 1,
@@ -225,6 +225,7 @@ export async function batchCreateChunksAndEmbeddings({
 							return {
 								pageContent: chunk,
 								metadata: {
+									content: chunk,
 									links: tweetLinks,
 									videos: tweetVids,
 									tweetId: tweetId,
@@ -254,7 +255,7 @@ export async function batchCreateChunksAndEmbeddings({
 			{
 				const commonMetaData = {
 					type: body.type ?? "page",
-					title: body.title,
+					title: body.title?.slice(0, 50) ?? "",
 					description: body.description ?? "",
 					url: body.url,
 					[sanitizeKey(`user-${body.user}`)]: 1,
@@ -271,6 +272,7 @@ export async function batchCreateChunksAndEmbeddings({
 					return {
 						pageContent: chunk,
 						metadata: {
+							content: chunk,
 							...commonMetaData,
 							...spaceMetadata,
 						},
@@ -290,6 +292,7 @@ export async function batchCreateChunksAndEmbeddings({
 		case "note":
 			{
 				const commonMetaData = {
+					title: body.title?.slice(0, 50) ?? "",
 					type: body.type ?? "page",
 					description: body.description ?? "",
 					url: body.url,
@@ -307,6 +310,7 @@ export async function batchCreateChunksAndEmbeddings({
 					return {
 						pageContent: chunk,
 						metadata: {
+							content: chunk,
 							...commonMetaData,
 							...spaceMetadata,
 						},
diff --git a/apps/cf-ai-backend/src/index.ts b/apps/cf-ai-backend/src/index.ts
index a3ac1380..1a118327 100644
--- a/apps/cf-ai-backend/src/index.ts
+++ b/apps/cf-ai-backend/src/index.ts
@@ -77,19 +77,19 @@ app.post("/api/add", zValidator("json", vectorObj), async (c) => {
 		console.log(body.spaces);
 		let chunks: TweetChunks | PageOrNoteChunks;
 		// remove everything in <raw> tags
-		const newPageContent = body.pageContent?.replace(/<raw>.*?<\/raw>/g, "");
+		// const newPageContent = body.pageContent?.replace(/<raw>.*?<\/raw>/g, "");
 
 		switch (body.type) {
 			case "tweet":
-				chunks = chunkThread(newPageContent);
+				chunks = chunkThread(body.pageContent);
 				break;
 
 			case "page":
-				chunks = chunkPage(newPageContent);
+				chunks = chunkPage(body.pageContent);
 				break;
 
 			case "note":
-				chunks = chunkNote(newPageContent);
+				chunks = chunkNote(body.pageContent);
 				break;
 		}
 
diff --git a/apps/cf-ai-backend/src/utils/chunkTweet.ts b/apps/cf-ai-backend/src/utils/chunkTweet.ts
index 224c6c05..78f0f261 100644
--- a/apps/cf-ai-backend/src/utils/chunkTweet.ts
+++ b/apps/cf-ai-backend/src/utils/chunkTweet.ts
@@ -1,5 +1,6 @@
 import { TweetChunks } from "../types";
 import chunkText from "./chonker";
+import { getRawTweet } from "@repo/shared-types/utils";
 
 interface Tweet {
 	id: string;
@@ -22,19 +23,43 @@ export interface ThreadTweetData {
 
 export function chunkThread(threadText: string): TweetChunks {
 	const thread = JSON.parse(threadText);
+	if (typeof thread == "string") {
+		console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER");
+		const rawTweet = getRawTweet(thread);
+		const parsedTweet: any = JSON.parse(rawTweet);
 
-	const chunkedTweets = thread.map((tweet: Tweet) => {
-		const chunkedTweet = chunkText(tweet.text, 1536);
-
-		const metadata = {
-			tweetId: tweet.id,
-			tweetLinks: tweet.links,
-			tweetVids: tweet.videos,
-			tweetImages: tweet.images,
+		const chunkedTweet = chunkText(parsedTweet.text, 1536);
+		const metadata: Metadata = {
+			tweetId: parsedTweet.id_str,
+			tweetLinks: parsedTweet.entities.urls.map((url: any) => url.expanded_url),
+			tweetVids:
+				parsedTweet.extended_entities?.media
+					.filter((media: any) => media.type === "video")
+					.map((media: any) => media.video_info!.variants[0].url) || [],
+			tweetImages:
+				parsedTweet.extended_entities?.media
+					.filter((media: any) => media.type === "photo")
+					.map((media: any) => media.media_url_https!) || [],
 		};
 
-		return { chunkedTweet, metadata };
-	});
+		const chunks = [{ chunkedTweet: chunkedTweet, metadata }];
+
+		return { type: "tweet", chunks };
+	} else {
+		console.log(JSON.stringify(thread));
+		const chunkedTweets = thread.map((tweet: Tweet) => {
+			const chunkedTweet = chunkText(tweet.text, 1536);
+
+			const metadata = {
+				tweetId: tweet.id,
+				tweetLinks: tweet.links,
+				tweetVids: tweet.videos,
+				tweetImages: tweet.images,
+			};
+
+			return { chunkedTweet, metadata };
+		});
 
-	return { type: "tweet", chunks: chunkedTweets };
+		return { type: "tweet", chunks: chunkedTweets };
+	}
 }
diff --git a/apps/web/app/actions/doers.ts b/apps/web/app/actions/doers.ts
index da2bfb5f..eaaaafbd 100644
--- a/apps/web/app/actions/doers.ts
+++ b/apps/web/app/actions/doers.ts
@@ -236,7 +236,6 @@ export const createMemory = async (input: {
 		try {
 			const cf_thread_endpoint = process.env.THREAD_CF_WORKER;
 			const authKey = process.env.THREAD_CF_AUTH;
-
 			const threadRequest = await fetch(cf_thread_endpoint, {
 				method: "POST",
 				headers: {
@@ -253,6 +252,12 @@ export const createMemory = async (input: {
 			}
 
 			thread = await threadRequest.text();
+			if (thread.trim().length === 2) {
+				console.log("Thread is an empty array");
+				throw new Error(
+					"[THREAD FETCHING SERVICE] Got no content form thread worker",
+				);
+			}
 		} catch (e) {
 			console.log("[THREAD FETCHING SERVICE] Failed to fetch the thread", e);
 			errorOccurred = true;
@@ -263,7 +268,7 @@ export const createMemory = async (input: {
 		pageContent = tweetToMd(tweet);
 		console.log("THis ishte page content!!", pageContent);
 		//@ts-ignore
-		vectorData = errorOccurred ? pageContent : thread;
+		vectorData = errorOccurred ? JSON.stringify(pageContent) : thread;
 		metadata = {
 			baseUrl: input.content,
 			description: tweet.text.slice(0, 200),