From 4c072c665191619472472cc23c9e2e322611afce Mon Sep 17 00:00:00 2001 From: Fuwn Date: Sun, 29 Mar 2026 05:07:06 +0000 Subject: fix(proxy): improve native chapter source routing --- apps/proxy/src/index.js | 19 +++++- apps/proxy/src/rawkuma.js | 148 +++++++++++++++++++++++++++++++--------------- 2 files changed, 117 insertions(+), 50 deletions(-) (limited to 'apps') diff --git a/apps/proxy/src/index.js b/apps/proxy/src/index.js index 26121f10..592899c0 100644 --- a/apps/proxy/src/index.js +++ b/apps/proxy/src/index.js @@ -267,10 +267,16 @@ const handleMangaChapterCounts = async (request, env, ctx) => { .filter((row) => isRecentFailure(row, bootstrapRetryMinutes(env))) .map((row) => row.anilist_id), ); - const rowsNeedingBackfill = manga.filter((entry) => { + const rowsMissingFromIndex = manga.filter((entry) => { const row = existingRowsById.get(entry.anilistId); if (!row) return !recentFailures.has(entry.anilistId); + return false; + }); + const rowsNeedingVolumeBackfill = manga.filter((entry) => { + const row = existingRowsById.get(entry.anilistId); + + if (!row) return false; return ( entry.progress > 0 && @@ -278,12 +284,19 @@ const handleMangaChapterCounts = async (request, env, ctx) => { !recentFailures.has(entry.anilistId) ); }); - const pendingRows = rowsNeedingBackfill.filter((entry) => + const rowsNeedingBackfill = [ + ...rowsMissingFromIndex, + ...rowsNeedingVolumeBackfill, + ]; + const pendingRows = rowsMissingFromIndex.filter((entry) => bootstrapInFlight.has(entry.anilistId), ); const queueableRows = rowsNeedingBackfill.filter( (entry) => !bootstrapInFlight.has(entry.anilistId), ); + const queueablePendingRows = rowsMissingFromIndex.filter( + (entry) => !bootstrapInFlight.has(entry.anilistId), + ); if (queueableRows.length) ctx.waitUntil( @@ -311,7 +324,7 @@ const handleMangaChapterCounts = async (request, env, ctx) => { ); const pending = [ ...new Set( - [...pendingRows, ...queueableRows].map((entry) => entry.anilistId), + [...pendingRows, ...queueablePendingRows].map((entry) => entry.anilistId), ), ]; diff --git a/apps/proxy/src/rawkuma.js b/apps/proxy/src/rawkuma.js index d993a916..89f20870 100644 --- a/apps/proxy/src/rawkuma.js +++ b/apps/proxy/src/rawkuma.js @@ -1,13 +1,14 @@ +const KLMANGA_ORIGIN = "https://klmanga.mom"; const RAWKUMA_ORIGIN = "https://rawkuma.net"; const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; -const DEFAULT_CONCURRENCY = 4; +const DEFAULT_CONCURRENCY = 8; const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0"; const MIN_MATCH_SCORE = 0.75; const MIN_MATCH_MARGIN = 0.1; -const rawkumaCache = new Map(); -const rawkumaInFlight = new Map(); +const nativeChapterCache = new Map(); +const nativeChapterInFlight = new Map(); const cacheTtlMs = (env) => { const milliseconds = Number.parseInt(env.RAWKUMA_CACHE_TTL_MS || "", 10); @@ -26,12 +27,12 @@ const concurrencyLimit = (env) => { }; const getCachedChapterCount = (title) => { - const cached = rawkumaCache.get(title); + const cached = nativeChapterCache.get(title); if (!cached) return undefined; if (Date.now() >= cached.expiresAt) { - rawkumaCache.delete(title); + nativeChapterCache.delete(title); return undefined; } @@ -42,7 +43,7 @@ const getCachedChapterCount = (title) => { const setCachedChapterCount = (env, title, chapter) => { if (chapter === null) return; - rawkumaCache.set(title, { + nativeChapterCache.set(title, { chapter, expiresAt: Date.now() + cacheTtlMs(env), }); @@ -68,17 +69,15 @@ const fetchText = async (requestHeaders, url, init = {}) => { return await (await fetch(url, { ...init, headers })).text(); }; -const parseNonce = (text) => - text.match(/name=['"]search_nonce['"]\s+value=['"]([^'"]+)['"]/i)?.[1] || - null; - const decodeHtml = (value) => value .replaceAll("&", "&") .replaceAll("&", "&") .replaceAll(""", '"') .replaceAll("'", "'") - .replaceAll("'", "'"); + .replaceAll("'", "'") + .replaceAll("<", "<") + .replaceAll(">", ">"); const normalizeTitle = (value) => String(value || "") @@ -114,37 +113,19 @@ const compareTitles = (left, right) => { const overlappingTokenCount = leftTokens.filter((token) => rightTokens.includes(token), ).length; - const overlapScore = - overlappingTokenCount / Math.max(leftTokens.length, rightTokens.length); - return overlapScore; + return ( + overlappingTokenCount / Math.max(leftTokens.length, rightTokens.length) + ); }; const titleCandidates = (entry) => - [ - entry.nativeTitle, - entry.englishTitle, - entry.romajiTitle, - entry.nativeTitle === "null" ? null : entry.nativeTitle, - entry.englishTitle === "null" ? null : entry.englishTitle, - entry.romajiTitle === "null" ? null : entry.romajiTitle, - ] + [entry.nativeTitle, entry.englishTitle, entry.romajiTitle] .filter(Boolean) .map((title) => String(title).trim()) + .filter((title) => title && title !== "null") .filter((title, index, array) => array.indexOf(title) === index); -const parseSearchResults = (text) => - [ - ...text.matchAll( - /]+href=["'](https:\/\/rawkuma\.net\/manga\/[^"']+)["'][^>]*>[\s\S]*?]*>([\s\S]*?)<\/h3>/gi, - ), - ].map((match) => ({ - url: decodeHtml(match[1]).trim(), - title: decodeHtml(match[2]) - .replace(/<[^>]+>/g, "") - .trim(), - })); - const pickBestSearchResult = (results, entry) => { const candidates = titleCandidates(entry); let best = null; @@ -174,7 +155,23 @@ const pickBestSearchResult = (results, entry) => { return best; }; -const parseChapterNumbers = (text) => +const parseRawkumaNonce = (text) => + text.match(/name=['"]search_nonce['"]\s+value=['"]([^'"]+)['"]/i)?.[1] || + null; + +const parseRawkumaSearchResults = (text) => + [ + ...text.matchAll( + /]+href=["'](https:\/\/rawkuma\.net\/manga\/[^"']+)["'][^>]*>[\s\S]*?]*>([\s\S]*?)<\/h3>/gi, + ), + ].map((match) => ({ + url: decodeHtml(match[1]).trim(), + title: decodeHtml(match[2]) + .replace(/<[^>]+>/g, "") + .trim(), + })); + +const parseRawkumaChapterNumbers = (text) => [ ...text.matchAll(/data-chapter-number=["'](\d+(?:\.\d+)?)["']/gi), ...text.matchAll( @@ -185,19 +182,19 @@ const parseChapterNumbers = (text) => .filter((value) => Number.isFinite(value)) .sort((left, right) => right - left); -const parseChapterListUrl = (text) => +const parseRawkumaChapterListUrl = (text) => decodeHtml( text.match( /]+id=["']chapter-list["'][^>]+hx-get=["']([^"']+)["']/i, )?.[1] || "", ).trim() || null; -const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => { +const fetchRawkumaChapterCount = async (requestHeaders, entry) => { const nonceText = await fetchText( requestHeaders, `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?type=search_form&action=get_nonce`, ); - const nonce = parseNonce(nonceText); + const nonce = parseRawkumaNonce(nonceText); if (!nonce) return null; @@ -218,18 +215,63 @@ const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => { }, ); const bestMatch = pickBestSearchResult( - parseSearchResults(searchText), + parseRawkumaSearchResults(searchText), entry, ); if (!bestMatch) continue; const mangaText = await fetchText(requestHeaders, bestMatch.url); - const chapterListUrl = parseChapterListUrl(mangaText); + const chapterListUrl = parseRawkumaChapterListUrl(mangaText); const chapterListText = chapterListUrl ? await fetchText(requestHeaders, chapterListUrl) : mangaText; - const chapters = parseChapterNumbers(chapterListText); + const chapters = parseRawkumaChapterNumbers(chapterListText); + + if (!chapters.length) continue; + + return chapters[0] ?? null; + } + + return null; +}; + +const parseKlmangaSearchResults = (text) => + [ + ...text.matchAll( + /]+href=["'](https:\/\/klmanga\.mom\/manga-raw\/[^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, + ), + ] + .map((match) => ({ + url: decodeHtml(match[1]).trim(), + title: decodeHtml(match[2]) + .replace(/<[^>]+>/g, "") + .replace(/\(Raw\s*-\s*Free\)/gi, "") + .trim(), + })) + .filter((result) => result.title.length > 0); + +const parseKlmangaChapterNumbers = (text) => + [...text.matchAll(/第(\d+(?:\.\d+)?)話/gu)] + .map((match) => Number.parseFloat(match[1])) + .filter((value) => Number.isFinite(value)) + .sort((left, right) => right - left); + +const fetchKlmangaChapterCount = async (requestHeaders, entry) => { + for (const candidate of titleCandidates(entry)) { + const searchText = await fetchText( + requestHeaders, + `${KLMANGA_ORIGIN}/?s=${encodeURIComponent(candidate)}`, + ); + const bestMatch = pickBestSearchResult( + parseKlmangaSearchResults(searchText), + entry, + ); + + if (!bestMatch) continue; + + const mangaText = await fetchText(requestHeaders, bestMatch.url); + const chapters = parseKlmangaChapterNumbers(mangaText); if (!chapters.length) continue; @@ -239,7 +281,19 @@ const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => { return null; }; -const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => { +const fetchNativeChapterCountUncached = async (requestHeaders, entry) => { + const providers = [fetchKlmangaChapterCount, fetchRawkumaChapterCount]; + + for (const provider of providers) { + const chapter = await provider(requestHeaders, entry).catch(() => null); + + if (chapter !== null) return chapter; + } + + return null; +}; + +const fetchNativeChapterCount = async (env, requestHeaders, entry) => { const normalizedTitle = entry.nativeTitle?.trim(); if (!normalizedTitle) return null; @@ -248,11 +302,11 @@ const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => { if (cachedChapter !== undefined) return cachedChapter; - const existing = rawkumaInFlight.get(normalizedTitle); + const existing = nativeChapterInFlight.get(normalizedTitle); if (existing) return existing; - const promise = fetchRawkumaChapterCountUncached(requestHeaders, entry) + const promise = fetchNativeChapterCountUncached(requestHeaders, entry) .catch(() => null) .then((chapter) => { setCachedChapterCount(env, normalizedTitle, chapter); @@ -260,10 +314,10 @@ const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => { return chapter; }) .finally(() => { - rawkumaInFlight.delete(normalizedTitle); + nativeChapterInFlight.delete(normalizedTitle); }); - rawkumaInFlight.set(normalizedTitle, promise); + nativeChapterInFlight.set(normalizedTitle, promise); return promise; }; @@ -284,7 +338,7 @@ export const fetchRawkumaChapterCounts = async (env, requestHeaders, manga) => { nextIndex += 1; const entry = entries[currentIndex]; - const chapter = await fetchRawkumaChapterCount( + const chapter = await fetchNativeChapterCount( env, requestHeaders, entry, -- cgit v1.2.3