From df2b1e4012dc50b56141bf9012d8e367a3d08fdf Mon Sep 17 00:00:00 2001 From: wabilin Date: Fri, 31 Jul 2020 20:15:19 +0900 Subject: get datas without date --- src/example.ts | 12 ++++++++++++ src/getScheduleHtml.ts | 37 +++++++++++++++++++++++++++++++++++++ src/parseScheduleHtml.ts | 37 +++++++++++++++++++++++++++++++++++++ src/util.ts | 11 +++++++++++ 4 files changed, 97 insertions(+) create mode 100644 src/example.ts create mode 100644 src/getScheduleHtml.ts create mode 100644 src/parseScheduleHtml.ts create mode 100644 src/util.ts (limited to 'src') diff --git a/src/example.ts b/src/example.ts new file mode 100644 index 0000000..394b220 --- /dev/null +++ b/src/example.ts @@ -0,0 +1,12 @@ +import getScheduleHtml from './getScheduleHtml' +import parse from './parseScheduleHtml' + +async function main() { + const html = await getScheduleHtml(); + const parsed = parse(html) + + // console.log(html.split('\n')) + console.log(parsed) +} + +main().catch(e => { console.error(e) }) diff --git a/src/getScheduleHtml.ts b/src/getScheduleHtml.ts new file mode 100644 index 0000000..66df92e --- /dev/null +++ b/src/getScheduleHtml.ts @@ -0,0 +1,37 @@ +import https from 'https' + + +const OPTIONS = { + hostname: 'schedule.hololive.tv', + port: 443, + path: '/', + method: 'GET', + headers: { + Cookie: 'timezone=Asia/Tokyo' + } +} + +function getScheduleHtml(): Promise { + const chunks: Uint8Array[] = []; + + return new Promise((resolve, reject) => { + const req = https.request(OPTIONS, res => { + res.on('data', chunk => { + chunks.push(chunk) + }) + + res.on('end', () => { + const html = Buffer.concat(chunks).toString('utf-8') + resolve(html) + }) + }) + + req.on('error', error => { + reject(error) + }) + + req.end() + }) +} + +export default getScheduleHtml diff --git a/src/parseScheduleHtml.ts b/src/parseScheduleHtml.ts new file mode 100644 index 0000000..8177eee --- /dev/null +++ b/src/parseScheduleHtml.ts @@ -0,0 +1,37 @@ +import { JSDOM } from "jsdom"; +import { mapNodeList } from "./util"; + +function selectTrimTextContent(ele: Element, selector: string): string { + return ele.querySelector(selector)?.textContent?.replace(/\s+/g, "") || ""; +} + +function dataFromAThumbnail(thumb: Element) { + const time = selectTrimTextContent(thumb, ".datetime") + const name = selectTrimTextContent(thumb, ".name") + + const images = mapNodeList( + thumb.querySelectorAll("img"), + (img) => img.src + ) + .filter((src) => src.startsWith("https://yt3.ggpht.com")); + + return { + time, + name, + images, + }; +} + +function parseScheduleHtml(html: string | Buffer) { + const { window } = new JSDOM(html); + const { document } = window; + + const allThumbnail = document.querySelectorAll("a.thumbnail"); + const data = mapNodeList(allThumbnail, dataFromAThumbnail); + + console.log(data); + + return ""; +} + +export default parseScheduleHtml; diff --git a/src/util.ts b/src/util.ts new file mode 100644 index 0000000..fbe87bb --- /dev/null +++ b/src/util.ts @@ -0,0 +1,11 @@ +export function mapNodeList( + list: NodeListOf, + mapper: (ele: E) => T +): T[] { + const ary: T[] = []; + list.forEach((node) => { + ary.push(mapper(node)); + }); + + return ary; +} -- cgit v1.2.3