aboutsummaryrefslogtreecommitdiff
path: root/src/parseScheduleHtml.ts
blob: 56c5f2abad31b0ae700e2b3d137737a1b0a15e20 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import { JSDOM } from 'jsdom'
import { mapNodeList } from './util'

function selectTrimTextContent(ele: Element, selector: string): string {
  return ele.querySelector(selector)?.textContent?.replace(/\s+/g, '') || ''
}

function dataFromAThumbnail(thumb: Element) {
  const time = selectTrimTextContent(thumb, '.datetime')
  const name = selectTrimTextContent(thumb, '.name')

  const images = mapNodeList(
    thumb.querySelectorAll('img'),
    (img) => img.src,
  )
    .filter((src) => src.startsWith('https://yt3.ggpht.com'))

  return {
    time,
    name,
    images,
  }
}

interface LiveBlock {
  time: Date,
  streamer: string,
  images: string[],
}

function parseToLiveBlocks(html: string | Buffer): LiveBlock[] {
  const { window } = new JSDOM(html)
  const { document } = window
  const year = (new Date()).getFullYear().toString()

  const rows = document.querySelectorAll('#all > .container > .row')

  let date = ''

  const lives: LiveBlock[] = []

  rows.forEach(row => {
    const dateDiv = row.querySelector('.holodule')
    if (dateDiv) {
      date = dateDiv.textContent?.replace(/\s+/g, '') || ''
      date = date.match(/\d+\/\d+/)![0].replace('/', '-')
    } else {
      const allThumbnail = row.querySelectorAll('a.thumbnail')
      allThumbnail.forEach(thumbnail => {
        const { time, name, images } = dataFromAThumbnail(thumbnail)
        lives.push({
          images,
          time: new Date(`${year}-${date}T${time}:00+09:00`),
          streamer: name,
        })
      })
    }
  })

  return lives
}

type StreamerImageDict = Record<string, string>
type ImageStreamerDict = Record<string, string>

function nextStreamerImageDict(liveBlocks: LiveBlock[], oldDict: StreamerImageDict) {
  const dict = { ...oldDict }
  liveBlocks.forEach(({ images, streamer }) => {
    dict[streamer] = images[0]
  })

  return dict
}

function reverseDict(dict: StreamerImageDict): ImageStreamerDict {
  const reversed: ImageStreamerDict = {}
  Object.entries(dict).forEach(([streamer, img]) => {
    reversed[img] = streamer
  })

  return reversed
}

interface LiveInfo {
  time: Date
  streamer: string
  guests: string[]
}

interface ParseResult {
  lives: LiveInfo[]
  dict: StreamerImageDict
}

/**
 * @param html - Html of https://schedule.hololive.tv. Get with Japan timezone (GTM+9)
 * @param storedDict - An object stored { vtuberName: iconImageSrc }
 * @returns - Lives schedule and updated dict
 */
function parseScheduleHtml(
  html: string | Buffer,
  storedDict: StreamerImageDict = {},
): ParseResult {
  const liveBlocks = parseToLiveBlocks(html)
  const streamerImageDict = nextStreamerImageDict(liveBlocks, storedDict)
  const dict = reverseDict(streamerImageDict)

  const lives = liveBlocks.map(liveBlocks => {
    const { streamer, images, time } = liveBlocks

    const guests = images.splice(1).map(x => dict[x]).filter(Boolean)

    return {
      time,
      streamer,
      guests,
    }
  })

  return { lives, dict }
}

export default parseScheduleHtml