1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
import { JSDOM } from 'jsdom'
import { mapNodeList } from './util'
function selectTrimTextContent(ele: Element, selector: string): string {
return ele.querySelector(selector)?.textContent?.replace(/\s+/g, '') || ''
}
function dataFromAThumbnail(thumb: Element) {
const time = selectTrimTextContent(thumb, '.datetime')
const name = selectTrimTextContent(thumb, '.name')
const images = mapNodeList(
thumb.querySelectorAll('img'),
(img) => img.src,
)
.filter((src) => src.startsWith('https://yt3.ggpht.com'))
return {
time,
name,
images,
}
}
interface LiveBlock {
time: Date,
streamer: string,
images: string[],
}
function parseToLiveBlocks(html: string | Buffer): LiveBlock[] {
const { window } = new JSDOM(html)
const { document } = window
const year = (new Date()).getFullYear().toString()
const rows = document.querySelectorAll('#all > .container > .row')
let date = ''
const lives: LiveBlock[] = []
rows.forEach(row => {
const dateDiv = row.querySelector('.holodule')
if (dateDiv) {
date = dateDiv.textContent?.replace(/\s+/g, '') || ''
date = date.match(/\d+\/\d+/)![0].replace('/', '-')
} else {
const allThumbnail = row.querySelectorAll('a.thumbnail')
allThumbnail.forEach(thumbnail => {
const { time, name, images } = dataFromAThumbnail(thumbnail)
lives.push({
images,
time: new Date(`${year}-${date}T${time}:00+09:00`),
streamer: name,
})
})
}
})
return lives
}
type StreamerImageDict = Record<string, string>
type ImageStreamerDict = Record<string, string>
function nextStreamerImageDict(liveBlocks: LiveBlock[], oldDict: StreamerImageDict) {
const dict = { ...oldDict }
liveBlocks.forEach(({ images, streamer }) => {
dict[streamer] = images[0]
})
return dict
}
function reverseDict(dict: StreamerImageDict): ImageStreamerDict {
const reversed: ImageStreamerDict = {}
Object.entries(dict).forEach(([streamer, img]) => {
reversed[img] = streamer
})
return reversed
}
interface LiveInfo {
time: Date
streamer: string
guests: string[]
}
interface ParseResult {
lives: LiveInfo[]
dict: StreamerImageDict
}
/**
* @param html - Html of https://schedule.hololive.tv. Get with Japan timezone (GTM+9)
* @param storedDict - An object stored { vtuberName: iconImageSrc }
* @returns - Lives schedule and updated dict
*/
function parseScheduleHtml(
html: string | Buffer,
storedDict: StreamerImageDict = {},
): ParseResult {
const liveBlocks = parseToLiveBlocks(html)
const streamerImageDict = nextStreamerImageDict(liveBlocks, storedDict)
const dict = reverseDict(streamerImageDict)
const lives = liveBlocks.map(liveBlocks => {
const { streamer, images, time } = liveBlocks
const guests = images.splice(1).map(x => dict[x]).filter(Boolean)
return {
time,
streamer,
guests,
}
})
return { lives, dict }
}
export default parseScheduleHtml
|