1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
import { JSDOM } from "jsdom";
import { mapNodeList } from "./util";
function selectTrimTextContent(ele: Element, selector: string): string {
return ele.querySelector(selector)?.textContent?.replace(/\s+/g, "") || "";
}
function dataFromAThumbnail(thumb: Element) {
const time = selectTrimTextContent(thumb, ".datetime")
const name = selectTrimTextContent(thumb, ".name")
const images = mapNodeList(
thumb.querySelectorAll("img"),
(img) => img.src
)
.filter((src) => src.startsWith("https://yt3.ggpht.com"));
return {
time,
name,
images,
};
}
interface LiveBlock {
time: Date,
streamer: string,
images: string[],
}
function parseToLiveBlocks(html: string | Buffer): LiveBlock[] {
const { window } = new JSDOM(html);
const { document } = window;
const year = (new Date()).getFullYear().toString()
const rows = document.querySelectorAll('#all > .container > .row')
let date = ''
const lives: LiveBlock[] = []
rows.forEach(row => {
const dateDiv = row.querySelector('.holodule')
if (dateDiv) {
date = dateDiv.textContent?.replace(/\s+/g, "") || "";
date = date.match(/\d+\/\d+/)![0].replace('/', '-')
} else {
const allThumbnail = row.querySelectorAll("a.thumbnail");
allThumbnail.forEach(thumbnail => {
const { time, name, images } = dataFromAThumbnail(thumbnail)
lives.push({
images,
time: new Date(`${year}-${date}T${time}:00+09:00`),
streamer: name,
});
})
}
})
return lives;
}
type StreamerImageDict = Record<string, string>
type ImageStreamerDict = Record<string, string>
function nextStreamerImageDict(liveBlocks: LiveBlock[], oldDict: StreamerImageDict) {
const dict = {...oldDict}
liveBlocks.forEach(({ images, streamer }) => {
dict[streamer] = images[0];
});
return dict
}
function reverseDict(dict: StreamerImageDict): ImageStreamerDict {
const reversed: ImageStreamerDict = {}
Object.entries(dict).forEach(([streamer, img]) => {
reversed[img] = streamer
})
return reversed
}
interface LiveInfo {
time: Date
streamer: string
guests: string[]
}
interface ParseResult {
lives: LiveInfo[]
dict: StreamerImageDict
}
/**
* @param html - Html of https://schedule.hololive.tv. Get with Japan timezone (GTM+9)
* @param storedDict - An object stored { vtuberName: iconImageSrc }
* @returns - Lives schedule and updated dict
*/
function parseScheduleHtml(
html: string | Buffer,
storedDict: StreamerImageDict = {}
): ParseResult {
const liveBlocks = parseToLiveBlocks(html)
const streamerImageDict = nextStreamerImageDict(liveBlocks, storedDict)
const dict = reverseDict(streamerImageDict)
const lives = liveBlocks.map(liveBlocks => {
const { streamer, images, time } = liveBlocks
const guests = images.splice(1).map(x => dict[x]).filter(Boolean)
return {
time,
streamer,
guests,
}
})
return { lives, dict };
}
export default parseScheduleHtml;
|