aboutsummaryrefslogtreecommitdiff
path: root/apps/cf-ai-backend/src/queueConsumer
diff options
context:
space:
mode:
authorDhravya Shah <[email protected]>2024-08-05 18:38:00 -0700
committerDhravya Shah <[email protected]>2024-08-05 18:38:00 -0700
commite6526826715fd2bc8fcb05145cbfd9c0cdc02b95 (patch)
treead1cf74dd0964529e9f59de879ca4744d2b9c880 /apps/cf-ai-backend/src/queueConsumer
parentMerge branch 'main' of github.com:supermemoryai/supermemory (diff)
downloadsupermemory-e6526826715fd2bc8fcb05145cbfd9c0cdc02b95.tar.xz
supermemory-e6526826715fd2bc8fcb05145cbfd9c0cdc02b95.zip
fix: entities urls might not be there
Diffstat (limited to 'apps/cf-ai-backend/src/queueConsumer')
-rw-r--r--apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts67
1 files changed, 67 insertions, 0 deletions
diff --git a/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts
new file mode 100644
index 00000000..ae1b18c6
--- /dev/null
+++ b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts
@@ -0,0 +1,67 @@
+import { TweetChunks } from "../../types";
+import chunkText from "./chonker";
+import { getRawTweet } from "@repo/shared-types/utils";
+
+interface Tweet {
+ id: string;
+ text: string;
+ links: Array<string>;
+ images: Array<string>;
+ videos: Array<string>;
+}
+interface Metadata {
+ tweetId: string;
+ tweetLinks: any[];
+ tweetVids: any[];
+ tweetImages: any[];
+}
+
+export interface ThreadTweetData {
+ chunkedTweet: string[];
+ metadata: Metadata;
+}
+
+export function chunkThread(threadText: string): TweetChunks {
+ const thread = JSON.parse(threadText);
+ if (typeof thread == "string") {
+ console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER");
+ const rawTweet = getRawTweet(thread);
+ const parsedTweet: any = JSON.parse(rawTweet);
+
+ const chunkedTweet = chunkText(parsedTweet.text, 1536);
+ const metadata: Metadata = {
+ tweetId: parsedTweet.id_str,
+ tweetLinks: parsedTweet.entities?.urls.map(
+ (url: any) => url.expanded_url,
+ ),
+ tweetVids:
+ parsedTweet.extended_entities?.media
+ .filter((media: any) => media.type === "video")
+ .map((media: any) => media.video_info!.variants[0].url) || [],
+ tweetImages:
+ parsedTweet.extended_entities?.media
+ .filter((media: any) => media.type === "photo")
+ .map((media: any) => media.media_url_https!) || [],
+ };
+
+ const chunks = [{ chunkedTweet: chunkedTweet, metadata }];
+
+ return { type: "tweet", chunks };
+ } else {
+ console.log(JSON.stringify(thread));
+ const chunkedTweets = thread.map((tweet: Tweet) => {
+ const chunkedTweet = chunkText(tweet.text, 1536);
+
+ const metadata = {
+ tweetId: tweet.id,
+ tweetLinks: tweet.links,
+ tweetVids: tweet.videos,
+ tweetImages: tweet.images,
+ };
+
+ return { chunkedTweet, metadata };
+ });
+
+ return { type: "tweet", chunks: chunkedTweets };
+ }
+}