diff options
| author | Mahesh Sanikommmu <[email protected]> | 2025-08-16 18:50:10 -0700 |
|---|---|---|
| committer | Mahesh Sanikommmu <[email protected]> | 2025-08-16 18:50:10 -0700 |
| commit | 39003aff23d64ff1d96074d71521f6023c9bec01 (patch) | |
| tree | 3f870c04b3dce315bba1b21aa2da158494e71774 /apps/backend/src/utils/tweetsToThreads.ts | |
| parent | Merge pull request #355 from supermemoryai/archive (diff) | |
| download | supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.tar.xz supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.zip | |
New Version of Supermemory Consumer App
Diffstat (limited to 'apps/backend/src/utils/tweetsToThreads.ts')
| -rw-r--r-- | apps/backend/src/utils/tweetsToThreads.ts | 108 |
1 files changed, 0 insertions, 108 deletions
diff --git a/apps/backend/src/utils/tweetsToThreads.ts b/apps/backend/src/utils/tweetsToThreads.ts deleted file mode 100644 index 85f69b87..00000000 --- a/apps/backend/src/utils/tweetsToThreads.ts +++ /dev/null @@ -1,108 +0,0 @@ -import * as cheerio from "cheerio"; -import { BaseError } from "../errors/baseError"; -import { Ok, Result } from "../errors/results"; - -interface Tweet { - id: string; - text: string; - links: Array<string>; - images: Array<string>; - videos: Array<string>; -} - -class ProcessTweetsError extends BaseError { - constructor(message?: string, source?: string) { - super("[Thread Proceessing Error]", message, source); - } -} - -type TweetProcessResult = Array<Tweet>; - -// there won't be a need for url caching right? -export async function unrollTweets( - url: string -): Promise<Result<TweetProcessResult, ProcessTweetsError>> { - const tweetId = url.split("/").pop(); - const response = await fetch(`https://unrollnow.com/status/${tweetId}`, { - headers: { - "User-Agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", - "Cache-Control": "max-age=3600", - }, - }); - - if (!response.ok) { - const error = await response.text(); - console.error(error); - throw new Error(`HTTP error! status: ${response.status} - ${error}`); - } - - const html = await response.text(); - const $ = cheerio.load(html); - const tweets: Array<Tweet> = []; - - const urlRegex = /(https?:\/\/\S+)/g; - const paragraphs = $(".mainarticle p").toArray(); - - const processedTweets = await Promise.all( - paragraphs.map(async (element, i) => { - const $tweet = $(element); - let tweetText = $tweet.text().trim(); - if (tweetText.length < 1) { - return null; - } - - if (i === paragraphs.length - 1 && tweetText.toLowerCase() === "yes") { - return null; - } - - const shortUrls = tweetText.match(urlRegex) || []; - console.log("SHORT_URLS_LEN", shortUrls.length); - console.log("SHORT_URLS", shortUrls); - - const expandedUrls = await Promise.all(shortUrls.map(expandShortUrl)); - - tweetText = tweetText.replace(urlRegex, "").trim().replace(/\s+/g, " "); - - const images = $tweet - .nextUntil("p") - .find("img.tweetimg") - .map((i, img) => $(img).attr("src")) - .get(); - - const videos = $tweet - .nextUntil("p") - .find("video > source") - .map((i, vid) => $(vid).attr("src")) - .get(); - - return { - id: `${tweetId}_${i}`, - text: tweetText, - links: expandedUrls, - images: images, - videos: videos, - }; - }) - ); - - tweets.push( - ...processedTweets.filter((tweet): tweet is Tweet => tweet !== null) - ); - - return Ok(tweets); -} - -async function expandShortUrl(shortUrl: string): Promise<string> { - try { - const response = await fetch(shortUrl, { - method: "HEAD", - redirect: "follow", - }); - const expandedUrl = response.url; - return expandedUrl; - } catch (error) { - console.error(`Failed to expand URL: ${shortUrl}`, error); - return shortUrl; - } -} |