1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
import { TweetChunks } from "../../types";
import chunkText from "./chonker";
import { getRawTweet } from "@repo/shared-types/utils";
interface Tweet {
id: string;
text: string;
links: Array<string>;
images: Array<string>;
videos: Array<string>;
}
interface Metadata {
tweetId: string;
tweetLinks: any[];
tweetVids: any[];
tweetImages: any[];
}
export interface ThreadTweetData {
chunkedTweet: string[];
metadata: Metadata;
}
export function chunkThread(threadText: string): TweetChunks {
let thread = threadText;
try {
thread = JSON.parse(threadText);
} catch (e) {
console.log("error: thread is not json.", e);
}
if (typeof threadText == "string") {
console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER", thread);
const rawTweet = getRawTweet(thread);
const parsedTweet: any = JSON.parse(rawTweet);
const chunkedTweet = chunkText(parsedTweet.text, 1536);
const metadata: Metadata = {
tweetId: parsedTweet.id_str,
tweetLinks: parsedTweet.entities?.urls.map(
(url: any) => url.expanded_url,
),
tweetVids:
parsedTweet.extended_entities?.media
.filter((media: any) => media.type === "video")
.map((media: any) => media.video_info!.variants[0].url) || [],
tweetImages:
parsedTweet.extended_entities?.media
.filter((media: any) => media.type === "photo")
.map((media: any) => media.media_url_https!) || [],
};
const chunks = [{ chunkedTweet: chunkedTweet, metadata }];
return { type: "tweet", chunks };
} else {
const chunkedTweets = (thread as any).map((tweet: Tweet) => {
const chunkedTweet = chunkText(tweet.text, 1536);
const metadata = {
tweetId: tweet.id,
tweetLinks: tweet.links,
tweetVids: tweet.videos,
tweetImages: tweet.images,
};
return { chunkedTweet, metadata };
});
return { type: "tweet", chunks: chunkedTweets };
}
}
|