aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDhravya Shah <[email protected]>2024-08-05 18:38:00 -0700
committerDhravya Shah <[email protected]>2024-08-05 18:38:00 -0700
commite6526826715fd2bc8fcb05145cbfd9c0cdc02b95 (patch)
treead1cf74dd0964529e9f59de879ca4744d2b9c880
parentMerge branch 'main' of github.com:supermemoryai/supermemory (diff)
downloadsupermemory-e6526826715fd2bc8fcb05145cbfd9c0cdc02b95.tar.xz
supermemory-e6526826715fd2bc8fcb05145cbfd9c0cdc02b95.zip
fix: entities urls might not be there
-rw-r--r--apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts67
-rw-r--r--apps/extension/helpers.ts2
2 files changed, 68 insertions, 1 deletions
diff --git a/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts
new file mode 100644
index 00000000..ae1b18c6
--- /dev/null
+++ b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts
@@ -0,0 +1,67 @@
+import { TweetChunks } from "../../types";
+import chunkText from "./chonker";
+import { getRawTweet } from "@repo/shared-types/utils";
+
+interface Tweet {
+ id: string;
+ text: string;
+ links: Array<string>;
+ images: Array<string>;
+ videos: Array<string>;
+}
+interface Metadata {
+ tweetId: string;
+ tweetLinks: any[];
+ tweetVids: any[];
+ tweetImages: any[];
+}
+
+export interface ThreadTweetData {
+ chunkedTweet: string[];
+ metadata: Metadata;
+}
+
+export function chunkThread(threadText: string): TweetChunks {
+ const thread = JSON.parse(threadText);
+ if (typeof thread == "string") {
+ console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER");
+ const rawTweet = getRawTweet(thread);
+ const parsedTweet: any = JSON.parse(rawTweet);
+
+ const chunkedTweet = chunkText(parsedTweet.text, 1536);
+ const metadata: Metadata = {
+ tweetId: parsedTweet.id_str,
+ tweetLinks: parsedTweet.entities?.urls.map(
+ (url: any) => url.expanded_url,
+ ),
+ tweetVids:
+ parsedTweet.extended_entities?.media
+ .filter((media: any) => media.type === "video")
+ .map((media: any) => media.video_info!.variants[0].url) || [],
+ tweetImages:
+ parsedTweet.extended_entities?.media
+ .filter((media: any) => media.type === "photo")
+ .map((media: any) => media.media_url_https!) || [],
+ };
+
+ const chunks = [{ chunkedTweet: chunkedTweet, metadata }];
+
+ return { type: "tweet", chunks };
+ } else {
+ console.log(JSON.stringify(thread));
+ const chunkedTweets = thread.map((tweet: Tweet) => {
+ const chunkedTweet = chunkText(tweet.text, 1536);
+
+ const metadata = {
+ tweetId: tweet.id,
+ tweetLinks: tweet.links,
+ tweetVids: tweet.videos,
+ tweetImages: tweet.images,
+ };
+
+ return { chunkedTweet, metadata };
+ });
+
+ return { type: "tweet", chunks: chunkedTweets };
+ }
+}
diff --git a/apps/extension/helpers.ts b/apps/extension/helpers.ts
index 9e95f963..029de5c7 100644
--- a/apps/extension/helpers.ts
+++ b/apps/extension/helpers.ts
@@ -43,7 +43,7 @@ export function transformTweetData(input: any): Tweet | null {
display_text_range: tweet.legacy.display_text_range,
entities: {
hashtags: tweet.legacy.entities.hashtags,
- urls: tweet.legacy.entities.urls,
+ urls: tweet.legacy.entities?.urls,
user_mentions: tweet.legacy.entities.user_mentions,
symbols: tweet.legacy.entities.symbols,
},