aboutsummaryrefslogtreecommitdiff
path: root/apps/proxy/src/rawkuma.js
diff options
context:
space:
mode:
Diffstat (limited to 'apps/proxy/src/rawkuma.js')
-rw-r--r--apps/proxy/src/rawkuma.js299
1 files changed, 299 insertions, 0 deletions
diff --git a/apps/proxy/src/rawkuma.js b/apps/proxy/src/rawkuma.js
new file mode 100644
index 00000000..d993a916
--- /dev/null
+++ b/apps/proxy/src/rawkuma.js
@@ -0,0 +1,299 @@
+const RAWKUMA_ORIGIN = "https://rawkuma.net";
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+const DEFAULT_CONCURRENCY = 4;
+const DEFAULT_USER_AGENT =
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0";
+const MIN_MATCH_SCORE = 0.75;
+const MIN_MATCH_MARGIN = 0.1;
+
+const rawkumaCache = new Map();
+const rawkumaInFlight = new Map();
+
+const cacheTtlMs = (env) => {
+ const milliseconds = Number.parseInt(env.RAWKUMA_CACHE_TTL_MS || "", 10);
+
+ return Number.isFinite(milliseconds) && milliseconds > 0
+ ? milliseconds
+ : DEFAULT_CACHE_TTL_MS;
+};
+
+const concurrencyLimit = (env) => {
+ const concurrency = Number.parseInt(env.RAWKUMA_CONCURRENCY || "", 10);
+
+ return Number.isFinite(concurrency) && concurrency > 0
+ ? concurrency
+ : DEFAULT_CONCURRENCY;
+};
+
+const getCachedChapterCount = (title) => {
+ const cached = rawkumaCache.get(title);
+
+ if (!cached) return undefined;
+
+ if (Date.now() >= cached.expiresAt) {
+ rawkumaCache.delete(title);
+
+ return undefined;
+ }
+
+ return cached.chapter;
+};
+
+const setCachedChapterCount = (env, title, chapter) => {
+ if (chapter === null) return;
+
+ rawkumaCache.set(title, {
+ chapter,
+ expiresAt: Date.now() + cacheTtlMs(env),
+ });
+};
+
+const fetchText = async (requestHeaders, url, init = {}) => {
+ const headers = new Headers(requestHeaders);
+ const targetUrl = new URL(url);
+ const initHeaders = new Headers(init.headers);
+
+ for (const [key, value] of initHeaders.entries()) headers.set(key, value);
+
+ headers.set(
+ "Accept",
+ "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ );
+ headers.set("Accept-Encoding", "identity");
+ headers.set("Origin", targetUrl.origin);
+ headers.set("Referer", `${targetUrl.origin}/`);
+ if (!headers.has("User-Agent")) headers.set("User-Agent", DEFAULT_USER_AGENT);
+ headers.delete("Content-Length");
+
+ return await (await fetch(url, { ...init, headers })).text();
+};
+
+const parseNonce = (text) =>
+ text.match(/name=['"]search_nonce['"]\s+value=['"]([^'"]+)['"]/i)?.[1] ||
+ null;
+
+const decodeHtml = (value) =>
+ value
+ .replaceAll("&", "&")
+ .replaceAll("&", "&")
+ .replaceAll(""", '"')
+ .replaceAll("'", "'")
+ .replaceAll("'", "'");
+
+const normalizeTitle = (value) =>
+ String(value || "")
+ .toLowerCase()
+ .normalize("NFKC")
+ .replace(/&/g, " and ")
+ .replace(/[^\p{L}\p{N}]+/gu, " ")
+ .replace(/\s+/g, " ")
+ .trim();
+
+const tokenizeTitle = (value) =>
+ normalizeTitle(value)
+ .split(" ")
+ .filter((token) => token.length > 1);
+
+const compareTitles = (left, right) => {
+ const normalizedLeft = normalizeTitle(left);
+ const normalizedRight = normalizeTitle(right);
+
+ if (!normalizedLeft || !normalizedRight) return 0;
+ if (normalizedLeft === normalizedRight) return 1;
+ if (
+ normalizedLeft.includes(normalizedRight) ||
+ normalizedRight.includes(normalizedLeft)
+ )
+ return 0.92;
+
+ const leftTokens = tokenizeTitle(left);
+ const rightTokens = tokenizeTitle(right);
+
+ if (!leftTokens.length || !rightTokens.length) return 0;
+
+ const overlappingTokenCount = leftTokens.filter((token) =>
+ rightTokens.includes(token),
+ ).length;
+ const overlapScore =
+ overlappingTokenCount / Math.max(leftTokens.length, rightTokens.length);
+
+ return overlapScore;
+};
+
+const titleCandidates = (entry) =>
+ [
+ entry.nativeTitle,
+ entry.englishTitle,
+ entry.romajiTitle,
+ entry.nativeTitle === "null" ? null : entry.nativeTitle,
+ entry.englishTitle === "null" ? null : entry.englishTitle,
+ entry.romajiTitle === "null" ? null : entry.romajiTitle,
+ ]
+ .filter(Boolean)
+ .map((title) => String(title).trim())
+ .filter((title, index, array) => array.indexOf(title) === index);
+
+const parseSearchResults = (text) =>
+ [
+ ...text.matchAll(
+ /<a[^>]+href=["'](https:\/\/rawkuma\.net\/manga\/[^"']+)["'][^>]*>[\s\S]*?<h3[^>]*>([\s\S]*?)<\/h3>/gi,
+ ),
+ ].map((match) => ({
+ url: decodeHtml(match[1]).trim(),
+ title: decodeHtml(match[2])
+ .replace(/<[^>]+>/g, "")
+ .trim(),
+ }));
+
+const pickBestSearchResult = (results, entry) => {
+ const candidates = titleCandidates(entry);
+ let best = null;
+ let secondBestScore = 0;
+
+ for (const result of results) {
+ const score = candidates.reduce(
+ (maximumScore, candidate) =>
+ Math.max(maximumScore, compareTitles(candidate, result.title)),
+ 0,
+ );
+
+ if (!best || score > best.score) {
+ secondBestScore = best?.score || 0;
+ best = { ...result, score };
+
+ continue;
+ }
+
+ if (score > secondBestScore) secondBestScore = score;
+ }
+
+ if (!best) return null;
+ if (best.score < MIN_MATCH_SCORE) return null;
+ if (best.score - secondBestScore < MIN_MATCH_MARGIN) return null;
+
+ return best;
+};
+
+const parseChapterNumbers = (text) =>
+ [
+ ...text.matchAll(/data-chapter-number=["'](\d+(?:\.\d+)?)["']/gi),
+ ...text.matchAll(
+ /<a[^>]+href=["'][^"']*\/chapter-[^"']*["'][^>]*>\s*Chapter\s+(\d+(?:\.\d+)?)\s*<\/a>/gi,
+ ),
+ ]
+ .map((match) => Number.parseFloat(match[1]))
+ .filter((value) => Number.isFinite(value))
+ .sort((left, right) => right - left);
+
+const parseChapterListUrl = (text) =>
+ decodeHtml(
+ text.match(
+ /<div[^>]+id=["']chapter-list["'][^>]+hx-get=["']([^"']+)["']/i,
+ )?.[1] || "",
+ ).trim() || null;
+
+const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => {
+ const nonceText = await fetchText(
+ requestHeaders,
+ `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?type=search_form&action=get_nonce`,
+ );
+ const nonce = parseNonce(nonceText);
+
+ if (!nonce) return null;
+
+ for (const candidate of titleCandidates(entry)) {
+ const searchText = await fetchText(
+ requestHeaders,
+ `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?nonce=${encodeURIComponent(
+ nonce,
+ )}&action=search`,
+ {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ },
+ body: new URLSearchParams({
+ query: candidate,
+ }),
+ },
+ );
+ const bestMatch = pickBestSearchResult(
+ parseSearchResults(searchText),
+ entry,
+ );
+
+ if (!bestMatch) continue;
+
+ const mangaText = await fetchText(requestHeaders, bestMatch.url);
+ const chapterListUrl = parseChapterListUrl(mangaText);
+ const chapterListText = chapterListUrl
+ ? await fetchText(requestHeaders, chapterListUrl)
+ : mangaText;
+ const chapters = parseChapterNumbers(chapterListText);
+
+ if (!chapters.length) continue;
+
+ return chapters[0] ?? null;
+ }
+
+ return null;
+};
+
+const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => {
+ const normalizedTitle = entry.nativeTitle?.trim();
+
+ if (!normalizedTitle) return null;
+
+ const cachedChapter = getCachedChapterCount(normalizedTitle);
+
+ if (cachedChapter !== undefined) return cachedChapter;
+
+ const existing = rawkumaInFlight.get(normalizedTitle);
+
+ if (existing) return existing;
+
+ const promise = fetchRawkumaChapterCountUncached(requestHeaders, entry)
+ .catch(() => null)
+ .then((chapter) => {
+ setCachedChapterCount(env, normalizedTitle, chapter);
+
+ return chapter;
+ })
+ .finally(() => {
+ rawkumaInFlight.delete(normalizedTitle);
+ });
+
+ rawkumaInFlight.set(normalizedTitle, promise);
+
+ return promise;
+};
+
+export const fetchRawkumaChapterCounts = async (env, requestHeaders, manga) => {
+ const results = {};
+ const entries = [...manga];
+ const workerCount = Math.min(concurrencyLimit(env), entries.length);
+
+ if (!workerCount) return results;
+
+ let nextIndex = 0;
+
+ await Promise.all(
+ Array.from({ length: workerCount }, async () => {
+ while (nextIndex < entries.length) {
+ const currentIndex = nextIndex;
+ nextIndex += 1;
+
+ const entry = entries[currentIndex];
+ const chapter = await fetchRawkumaChapterCount(
+ env,
+ requestHeaders,
+ entry,
+ );
+
+ results[String(entry.anilistId)] = { chapter };
+ }
+ }),
+ );
+
+ return results;
+};