aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/cf-ai-backend/src/helper.ts8
-rw-r--r--apps/cf-ai-backend/src/index.ts8
-rw-r--r--apps/cf-ai-backend/src/utils/chunkTweet.ts47
-rw-r--r--apps/web/app/actions/doers.ts9
4 files changed, 53 insertions, 19 deletions
diff --git a/apps/cf-ai-backend/src/helper.ts b/apps/cf-ai-backend/src/helper.ts
index 3a15ac4d..2a68879a 100644
--- a/apps/cf-ai-backend/src/helper.ts
+++ b/apps/cf-ai-backend/src/helper.ts
@@ -203,7 +203,7 @@ export async function batchCreateChunksAndEmbeddings({
{
const commonMetaData = {
type: body.type ?? "tweet",
- title: body.title,
+ title: body.title?.slice(0, 50) ?? "",
description: body.description ?? "",
url: body.url,
[sanitizeKey(`user-${body.user}`)]: 1,
@@ -225,6 +225,7 @@ export async function batchCreateChunksAndEmbeddings({
return {
pageContent: chunk,
metadata: {
+ content: chunk,
links: tweetLinks,
videos: tweetVids,
tweetId: tweetId,
@@ -254,7 +255,7 @@ export async function batchCreateChunksAndEmbeddings({
{
const commonMetaData = {
type: body.type ?? "page",
- title: body.title,
+ title: body.title?.slice(0, 50) ?? "",
description: body.description ?? "",
url: body.url,
[sanitizeKey(`user-${body.user}`)]: 1,
@@ -271,6 +272,7 @@ export async function batchCreateChunksAndEmbeddings({
return {
pageContent: chunk,
metadata: {
+ content: chunk,
...commonMetaData,
...spaceMetadata,
},
@@ -290,6 +292,7 @@ export async function batchCreateChunksAndEmbeddings({
case "note":
{
const commonMetaData = {
+ title: body.title?.slice(0, 50) ?? "",
type: body.type ?? "page",
description: body.description ?? "",
url: body.url,
@@ -307,6 +310,7 @@ export async function batchCreateChunksAndEmbeddings({
return {
pageContent: chunk,
metadata: {
+ content: chunk,
...commonMetaData,
...spaceMetadata,
},
diff --git a/apps/cf-ai-backend/src/index.ts b/apps/cf-ai-backend/src/index.ts
index a3ac1380..1a118327 100644
--- a/apps/cf-ai-backend/src/index.ts
+++ b/apps/cf-ai-backend/src/index.ts
@@ -77,19 +77,19 @@ app.post("/api/add", zValidator("json", vectorObj), async (c) => {
console.log(body.spaces);
let chunks: TweetChunks | PageOrNoteChunks;
// remove everything in <raw> tags
- const newPageContent = body.pageContent?.replace(/<raw>.*?<\/raw>/g, "");
+ // const newPageContent = body.pageContent?.replace(/<raw>.*?<\/raw>/g, "");
switch (body.type) {
case "tweet":
- chunks = chunkThread(newPageContent);
+ chunks = chunkThread(body.pageContent);
break;
case "page":
- chunks = chunkPage(newPageContent);
+ chunks = chunkPage(body.pageContent);
break;
case "note":
- chunks = chunkNote(newPageContent);
+ chunks = chunkNote(body.pageContent);
break;
}
diff --git a/apps/cf-ai-backend/src/utils/chunkTweet.ts b/apps/cf-ai-backend/src/utils/chunkTweet.ts
index 224c6c05..78f0f261 100644
--- a/apps/cf-ai-backend/src/utils/chunkTweet.ts
+++ b/apps/cf-ai-backend/src/utils/chunkTweet.ts
@@ -1,5 +1,6 @@
import { TweetChunks } from "../types";
import chunkText from "./chonker";
+import { getRawTweet } from "@repo/shared-types/utils";
interface Tweet {
id: string;
@@ -22,19 +23,43 @@ export interface ThreadTweetData {
export function chunkThread(threadText: string): TweetChunks {
const thread = JSON.parse(threadText);
+ if (typeof thread == "string") {
+ console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER");
+ const rawTweet = getRawTweet(thread);
+ const parsedTweet: any = JSON.parse(rawTweet);
- const chunkedTweets = thread.map((tweet: Tweet) => {
- const chunkedTweet = chunkText(tweet.text, 1536);
-
- const metadata = {
- tweetId: tweet.id,
- tweetLinks: tweet.links,
- tweetVids: tweet.videos,
- tweetImages: tweet.images,
+ const chunkedTweet = chunkText(parsedTweet.text, 1536);
+ const metadata: Metadata = {
+ tweetId: parsedTweet.id_str,
+ tweetLinks: parsedTweet.entities.urls.map((url: any) => url.expanded_url),
+ tweetVids:
+ parsedTweet.extended_entities?.media
+ .filter((media: any) => media.type === "video")
+ .map((media: any) => media.video_info!.variants[0].url) || [],
+ tweetImages:
+ parsedTweet.extended_entities?.media
+ .filter((media: any) => media.type === "photo")
+ .map((media: any) => media.media_url_https!) || [],
};
- return { chunkedTweet, metadata };
- });
+ const chunks = [{ chunkedTweet: chunkedTweet, metadata }];
+
+ return { type: "tweet", chunks };
+ } else {
+ console.log(JSON.stringify(thread));
+ const chunkedTweets = thread.map((tweet: Tweet) => {
+ const chunkedTweet = chunkText(tweet.text, 1536);
+
+ const metadata = {
+ tweetId: tweet.id,
+ tweetLinks: tweet.links,
+ tweetVids: tweet.videos,
+ tweetImages: tweet.images,
+ };
+
+ return { chunkedTweet, metadata };
+ });
- return { type: "tweet", chunks: chunkedTweets };
+ return { type: "tweet", chunks: chunkedTweets };
+ }
}
diff --git a/apps/web/app/actions/doers.ts b/apps/web/app/actions/doers.ts
index da2bfb5f..eaaaafbd 100644
--- a/apps/web/app/actions/doers.ts
+++ b/apps/web/app/actions/doers.ts
@@ -236,7 +236,6 @@ export const createMemory = async (input: {
try {
const cf_thread_endpoint = process.env.THREAD_CF_WORKER;
const authKey = process.env.THREAD_CF_AUTH;
-
const threadRequest = await fetch(cf_thread_endpoint, {
method: "POST",
headers: {
@@ -253,6 +252,12 @@ export const createMemory = async (input: {
}
thread = await threadRequest.text();
+ if (thread.trim().length === 2) {
+ console.log("Thread is an empty array");
+ throw new Error(
+ "[THREAD FETCHING SERVICE] Got no content form thread worker",
+ );
+ }
} catch (e) {
console.log("[THREAD FETCHING SERVICE] Failed to fetch the thread", e);
errorOccurred = true;
@@ -263,7 +268,7 @@ export const createMemory = async (input: {
pageContent = tweetToMd(tweet);
console.log("THis ishte page content!!", pageContent);
//@ts-ignore
- vectorData = errorOccurred ? pageContent : thread;
+ vectorData = errorOccurred ? JSON.stringify(pageContent) : thread;
metadata = {
baseUrl: input.content,
description: tweet.text.slice(0, 200),