diff options
| author | Dhravya Shah <[email protected]> | 2024-08-05 18:38:00 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2024-08-05 18:38:00 -0700 |
| commit | e6526826715fd2bc8fcb05145cbfd9c0cdc02b95 (patch) | |
| tree | ad1cf74dd0964529e9f59de879ca4744d2b9c880 | |
| parent | Merge branch 'main' of github.com:supermemoryai/supermemory (diff) | |
| download | supermemory-e6526826715fd2bc8fcb05145cbfd9c0cdc02b95.tar.xz supermemory-e6526826715fd2bc8fcb05145cbfd9c0cdc02b95.zip | |
fix: entities urls might not be there
| -rw-r--r-- | apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts | 67 | ||||
| -rw-r--r-- | apps/extension/helpers.ts | 2 |
2 files changed, 68 insertions, 1 deletions
diff --git a/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts new file mode 100644 index 00000000..ae1b18c6 --- /dev/null +++ b/apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts @@ -0,0 +1,67 @@ +import { TweetChunks } from "../../types"; +import chunkText from "./chonker"; +import { getRawTweet } from "@repo/shared-types/utils"; + +interface Tweet { + id: string; + text: string; + links: Array<string>; + images: Array<string>; + videos: Array<string>; +} +interface Metadata { + tweetId: string; + tweetLinks: any[]; + tweetVids: any[]; + tweetImages: any[]; +} + +export interface ThreadTweetData { + chunkedTweet: string[]; + metadata: Metadata; +} + +export function chunkThread(threadText: string): TweetChunks { + const thread = JSON.parse(threadText); + if (typeof thread == "string") { + console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER"); + const rawTweet = getRawTweet(thread); + const parsedTweet: any = JSON.parse(rawTweet); + + const chunkedTweet = chunkText(parsedTweet.text, 1536); + const metadata: Metadata = { + tweetId: parsedTweet.id_str, + tweetLinks: parsedTweet.entities?.urls.map( + (url: any) => url.expanded_url, + ), + tweetVids: + parsedTweet.extended_entities?.media + .filter((media: any) => media.type === "video") + .map((media: any) => media.video_info!.variants[0].url) || [], + tweetImages: + parsedTweet.extended_entities?.media + .filter((media: any) => media.type === "photo") + .map((media: any) => media.media_url_https!) || [], + }; + + const chunks = [{ chunkedTweet: chunkedTweet, metadata }]; + + return { type: "tweet", chunks }; + } else { + console.log(JSON.stringify(thread)); + const chunkedTweets = thread.map((tweet: Tweet) => { + const chunkedTweet = chunkText(tweet.text, 1536); + + const metadata = { + tweetId: tweet.id, + tweetLinks: tweet.links, + tweetVids: tweet.videos, + tweetImages: tweet.images, + }; + + return { chunkedTweet, metadata }; + }); + + return { type: "tweet", chunks: chunkedTweets }; + } +} diff --git a/apps/extension/helpers.ts b/apps/extension/helpers.ts index 9e95f963..029de5c7 100644 --- a/apps/extension/helpers.ts +++ b/apps/extension/helpers.ts @@ -43,7 +43,7 @@ export function transformTweetData(input: any): Tweet | null { display_text_range: tweet.legacy.display_text_range, entities: { hashtags: tweet.legacy.entities.hashtags, - urls: tweet.legacy.entities.urls, + urls: tweet.legacy.entities?.urls, user_mentions: tweet.legacy.entities.user_mentions, symbols: tweet.legacy.entities.symbols, }, |