diff options
Diffstat (limited to 'apps/cf-ai-backend/src/utils')
| -rw-r--r-- | apps/cf-ai-backend/src/utils/chunkTweet.ts | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/apps/cf-ai-backend/src/utils/chunkTweet.ts b/apps/cf-ai-backend/src/utils/chunkTweet.ts index 224c6c05..78f0f261 100644 --- a/apps/cf-ai-backend/src/utils/chunkTweet.ts +++ b/apps/cf-ai-backend/src/utils/chunkTweet.ts @@ -1,5 +1,6 @@ import { TweetChunks } from "../types"; import chunkText from "./chonker"; +import { getRawTweet } from "@repo/shared-types/utils"; interface Tweet { id: string; @@ -22,19 +23,43 @@ export interface ThreadTweetData { export function chunkThread(threadText: string): TweetChunks { const thread = JSON.parse(threadText); + if (typeof thread == "string") { + console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER"); + const rawTweet = getRawTweet(thread); + const parsedTweet: any = JSON.parse(rawTweet); - const chunkedTweets = thread.map((tweet: Tweet) => { - const chunkedTweet = chunkText(tweet.text, 1536); - - const metadata = { - tweetId: tweet.id, - tweetLinks: tweet.links, - tweetVids: tweet.videos, - tweetImages: tweet.images, + const chunkedTweet = chunkText(parsedTweet.text, 1536); + const metadata: Metadata = { + tweetId: parsedTweet.id_str, + tweetLinks: parsedTweet.entities.urls.map((url: any) => url.expanded_url), + tweetVids: + parsedTweet.extended_entities?.media + .filter((media: any) => media.type === "video") + .map((media: any) => media.video_info!.variants[0].url) || [], + tweetImages: + parsedTweet.extended_entities?.media + .filter((media: any) => media.type === "photo") + .map((media: any) => media.media_url_https!) || [], }; - return { chunkedTweet, metadata }; - }); + const chunks = [{ chunkedTweet: chunkedTweet, metadata }]; + + return { type: "tweet", chunks }; + } else { + console.log(JSON.stringify(thread)); + const chunkedTweets = thread.map((tweet: Tweet) => { + const chunkedTweet = chunkText(tweet.text, 1536); + + const metadata = { + tweetId: tweet.id, + tweetLinks: tweet.links, + tweetVids: tweet.videos, + tweetImages: tweet.images, + }; + + return { chunkedTweet, metadata }; + }); - return { type: "tweet", chunks: chunkedTweets }; + return { type: "tweet", chunks: chunkedTweets }; + } } |