aboutsummaryrefslogtreecommitdiff
path: root/apps/backend/src/utils/tweetsToThreads.ts
blob: 85f69b870658883cd18af6cb7a8d6bd13cd2243c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import * as cheerio from "cheerio";
import { BaseError } from "../errors/baseError";
import { Ok, Result } from "../errors/results";

interface Tweet {
  id: string;
  text: string;
  links: Array<string>;
  images: Array<string>;
  videos: Array<string>;
}

class ProcessTweetsError extends BaseError {
  constructor(message?: string, source?: string) {
    super("[Thread Proceessing Error]", message, source);
  }
}

type TweetProcessResult = Array<Tweet>;

// there won't be a need for url caching right?
export async function unrollTweets(
  url: string
): Promise<Result<TweetProcessResult, ProcessTweetsError>> {
  const tweetId = url.split("/").pop();
  const response = await fetch(`https://unrollnow.com/status/${tweetId}`, {
    headers: {
      "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
      "Cache-Control": "max-age=3600",
    },
  });

  if (!response.ok) {
    const error = await response.text();
    console.error(error);
    throw new Error(`HTTP error! status: ${response.status} - ${error}`);
  }

  const html = await response.text();
  const $ = cheerio.load(html);
  const tweets: Array<Tweet> = [];

  const urlRegex = /(https?:\/\/\S+)/g;
  const paragraphs = $(".mainarticle p").toArray();

  const processedTweets = await Promise.all(
    paragraphs.map(async (element, i) => {
      const $tweet = $(element);
      let tweetText = $tweet.text().trim();
      if (tweetText.length < 1) {
        return null;
      }

      if (i === paragraphs.length - 1 && tweetText.toLowerCase() === "yes") {
        return null;
      }

      const shortUrls = tweetText.match(urlRegex) || [];
      console.log("SHORT_URLS_LEN", shortUrls.length);
      console.log("SHORT_URLS", shortUrls);

      const expandedUrls = await Promise.all(shortUrls.map(expandShortUrl));

      tweetText = tweetText.replace(urlRegex, "").trim().replace(/\s+/g, " ");

      const images = $tweet
        .nextUntil("p")
        .find("img.tweetimg")
        .map((i, img) => $(img).attr("src"))
        .get();

      const videos = $tweet
        .nextUntil("p")
        .find("video > source")
        .map((i, vid) => $(vid).attr("src"))
        .get();

      return {
        id: `${tweetId}_${i}`,
        text: tweetText,
        links: expandedUrls,
        images: images,
        videos: videos,
      };
    })
  );

  tweets.push(
    ...processedTweets.filter((tweet): tweet is Tweet => tweet !== null)
  );

  return Ok(tweets);
}

async function expandShortUrl(shortUrl: string): Promise<string> {
  try {
    const response = await fetch(shortUrl, {
      method: "HEAD",
      redirect: "follow",
    });
    const expandedUrl = response.url;
    return expandedUrl;
  } catch (error) {
    console.error(`Failed to expand URL: ${shortUrl}`, error);
    return shortUrl;
  }
}