diff options
| author | Fuwn <[email protected]> | 2026-03-27 10:44:12 +0000 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-03-27 10:44:12 +0000 |
| commit | 12bfed974c77a85c8fb520682aae9cfeecbdf5b8 (patch) | |
| tree | d7cff80d7caa09f2a3d773577b61b5af539a554b /apps/proxy | |
| parent | style(ci): format and tidy proxy files (diff) | |
| download | due.moe-12bfed974c77a85c8fb520682aae9cfeecbdf5b8.tar.xz due.moe-12bfed974c77a85c8fb520682aae9cfeecbdf5b8.zip | |
fix(proxy): improve native manga chapter counts
Diffstat (limited to 'apps/proxy')
| -rw-r--r-- | apps/proxy/src/index.js | 17 | ||||
| -rw-r--r-- | apps/proxy/src/rawkuma.js | 299 |
2 files changed, 316 insertions, 0 deletions
diff --git a/apps/proxy/src/index.js b/apps/proxy/src/index.js index 80d87b1b..4f18ba44 100644 --- a/apps/proxy/src/index.js +++ b/apps/proxy/src/index.js @@ -1,4 +1,5 @@ import { bootstrapManga, syncMangadexIndex } from "./mangadex.js"; +import { fetchRawkumaChapterCounts } from "./rawkuma.js"; import { deleteMangadexFailureRows, getMangadexFailureRowsByAniListIds, @@ -271,6 +272,16 @@ const handleMangaChapterCounts = async (request, env, ctx) => { }); }; +const handleMangaNativeChapterCounts = async (request, env) => { + const manga = await parseMangaPayload(request); + + if (!manga.length) return jsonResponse(request, { data: {} }); + + return jsonResponse(request, { + data: await fetchRawkumaChapterCounts(env, request.headers, manga), + }); +}; + const isAuthorisedSyncRequest = (request, env) => { const token = env.MANGADEX_SYNC_TOKEN; @@ -305,6 +316,12 @@ export default { if (url.pathname === "/manga/chapter-counts" && request.method === "POST") return handleMangaChapterCounts(request, env, ctx); + if ( + url.pathname === "/manga/native-chapter-counts" && + request.method === "POST" + ) + return handleMangaNativeChapterCounts(request, env); + if (url.pathname === "/manga/sync" && request.method === "POST") return handleMangaSync(request, env); diff --git a/apps/proxy/src/rawkuma.js b/apps/proxy/src/rawkuma.js new file mode 100644 index 00000000..d993a916 --- /dev/null +++ b/apps/proxy/src/rawkuma.js @@ -0,0 +1,299 @@ +const RAWKUMA_ORIGIN = "https://rawkuma.net"; +const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000; +const DEFAULT_CONCURRENCY = 4; +const DEFAULT_USER_AGENT = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0"; +const MIN_MATCH_SCORE = 0.75; +const MIN_MATCH_MARGIN = 0.1; + +const rawkumaCache = new Map(); +const rawkumaInFlight = new Map(); + +const cacheTtlMs = (env) => { + const milliseconds = Number.parseInt(env.RAWKUMA_CACHE_TTL_MS || "", 10); + + return Number.isFinite(milliseconds) && milliseconds > 0 + ? milliseconds + : DEFAULT_CACHE_TTL_MS; +}; + +const concurrencyLimit = (env) => { + const concurrency = Number.parseInt(env.RAWKUMA_CONCURRENCY || "", 10); + + return Number.isFinite(concurrency) && concurrency > 0 + ? concurrency + : DEFAULT_CONCURRENCY; +}; + +const getCachedChapterCount = (title) => { + const cached = rawkumaCache.get(title); + + if (!cached) return undefined; + + if (Date.now() >= cached.expiresAt) { + rawkumaCache.delete(title); + + return undefined; + } + + return cached.chapter; +}; + +const setCachedChapterCount = (env, title, chapter) => { + if (chapter === null) return; + + rawkumaCache.set(title, { + chapter, + expiresAt: Date.now() + cacheTtlMs(env), + }); +}; + +const fetchText = async (requestHeaders, url, init = {}) => { + const headers = new Headers(requestHeaders); + const targetUrl = new URL(url); + const initHeaders = new Headers(init.headers); + + for (const [key, value] of initHeaders.entries()) headers.set(key, value); + + headers.set( + "Accept", + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + ); + headers.set("Accept-Encoding", "identity"); + headers.set("Origin", targetUrl.origin); + headers.set("Referer", `${targetUrl.origin}/`); + if (!headers.has("User-Agent")) headers.set("User-Agent", DEFAULT_USER_AGENT); + headers.delete("Content-Length"); + + return await (await fetch(url, { ...init, headers })).text(); +}; + +const parseNonce = (text) => + text.match(/name=['"]search_nonce['"]\s+value=['"]([^'"]+)['"]/i)?.[1] || + null; + +const decodeHtml = (value) => + value + .replaceAll("&", "&") + .replaceAll("&", "&") + .replaceAll(""", '"') + .replaceAll("'", "'") + .replaceAll("'", "'"); + +const normalizeTitle = (value) => + String(value || "") + .toLowerCase() + .normalize("NFKC") + .replace(/&/g, " and ") + .replace(/[^\p{L}\p{N}]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); + +const tokenizeTitle = (value) => + normalizeTitle(value) + .split(" ") + .filter((token) => token.length > 1); + +const compareTitles = (left, right) => { + const normalizedLeft = normalizeTitle(left); + const normalizedRight = normalizeTitle(right); + + if (!normalizedLeft || !normalizedRight) return 0; + if (normalizedLeft === normalizedRight) return 1; + if ( + normalizedLeft.includes(normalizedRight) || + normalizedRight.includes(normalizedLeft) + ) + return 0.92; + + const leftTokens = tokenizeTitle(left); + const rightTokens = tokenizeTitle(right); + + if (!leftTokens.length || !rightTokens.length) return 0; + + const overlappingTokenCount = leftTokens.filter((token) => + rightTokens.includes(token), + ).length; + const overlapScore = + overlappingTokenCount / Math.max(leftTokens.length, rightTokens.length); + + return overlapScore; +}; + +const titleCandidates = (entry) => + [ + entry.nativeTitle, + entry.englishTitle, + entry.romajiTitle, + entry.nativeTitle === "null" ? null : entry.nativeTitle, + entry.englishTitle === "null" ? null : entry.englishTitle, + entry.romajiTitle === "null" ? null : entry.romajiTitle, + ] + .filter(Boolean) + .map((title) => String(title).trim()) + .filter((title, index, array) => array.indexOf(title) === index); + +const parseSearchResults = (text) => + [ + ...text.matchAll( + /<a[^>]+href=["'](https:\/\/rawkuma\.net\/manga\/[^"']+)["'][^>]*>[\s\S]*?<h3[^>]*>([\s\S]*?)<\/h3>/gi, + ), + ].map((match) => ({ + url: decodeHtml(match[1]).trim(), + title: decodeHtml(match[2]) + .replace(/<[^>]+>/g, "") + .trim(), + })); + +const pickBestSearchResult = (results, entry) => { + const candidates = titleCandidates(entry); + let best = null; + let secondBestScore = 0; + + for (const result of results) { + const score = candidates.reduce( + (maximumScore, candidate) => + Math.max(maximumScore, compareTitles(candidate, result.title)), + 0, + ); + + if (!best || score > best.score) { + secondBestScore = best?.score || 0; + best = { ...result, score }; + + continue; + } + + if (score > secondBestScore) secondBestScore = score; + } + + if (!best) return null; + if (best.score < MIN_MATCH_SCORE) return null; + if (best.score - secondBestScore < MIN_MATCH_MARGIN) return null; + + return best; +}; + +const parseChapterNumbers = (text) => + [ + ...text.matchAll(/data-chapter-number=["'](\d+(?:\.\d+)?)["']/gi), + ...text.matchAll( + /<a[^>]+href=["'][^"']*\/chapter-[^"']*["'][^>]*>\s*Chapter\s+(\d+(?:\.\d+)?)\s*<\/a>/gi, + ), + ] + .map((match) => Number.parseFloat(match[1])) + .filter((value) => Number.isFinite(value)) + .sort((left, right) => right - left); + +const parseChapterListUrl = (text) => + decodeHtml( + text.match( + /<div[^>]+id=["']chapter-list["'][^>]+hx-get=["']([^"']+)["']/i, + )?.[1] || "", + ).trim() || null; + +const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => { + const nonceText = await fetchText( + requestHeaders, + `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?type=search_form&action=get_nonce`, + ); + const nonce = parseNonce(nonceText); + + if (!nonce) return null; + + for (const candidate of titleCandidates(entry)) { + const searchText = await fetchText( + requestHeaders, + `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?nonce=${encodeURIComponent( + nonce, + )}&action=search`, + { + method: "POST", + headers: { + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + }, + body: new URLSearchParams({ + query: candidate, + }), + }, + ); + const bestMatch = pickBestSearchResult( + parseSearchResults(searchText), + entry, + ); + + if (!bestMatch) continue; + + const mangaText = await fetchText(requestHeaders, bestMatch.url); + const chapterListUrl = parseChapterListUrl(mangaText); + const chapterListText = chapterListUrl + ? await fetchText(requestHeaders, chapterListUrl) + : mangaText; + const chapters = parseChapterNumbers(chapterListText); + + if (!chapters.length) continue; + + return chapters[0] ?? null; + } + + return null; +}; + +const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => { + const normalizedTitle = entry.nativeTitle?.trim(); + + if (!normalizedTitle) return null; + + const cachedChapter = getCachedChapterCount(normalizedTitle); + + if (cachedChapter !== undefined) return cachedChapter; + + const existing = rawkumaInFlight.get(normalizedTitle); + + if (existing) return existing; + + const promise = fetchRawkumaChapterCountUncached(requestHeaders, entry) + .catch(() => null) + .then((chapter) => { + setCachedChapterCount(env, normalizedTitle, chapter); + + return chapter; + }) + .finally(() => { + rawkumaInFlight.delete(normalizedTitle); + }); + + rawkumaInFlight.set(normalizedTitle, promise); + + return promise; +}; + +export const fetchRawkumaChapterCounts = async (env, requestHeaders, manga) => { + const results = {}; + const entries = [...manga]; + const workerCount = Math.min(concurrencyLimit(env), entries.length); + + if (!workerCount) return results; + + let nextIndex = 0; + + await Promise.all( + Array.from({ length: workerCount }, async () => { + while (nextIndex < entries.length) { + const currentIndex = nextIndex; + nextIndex += 1; + + const entry = entries[currentIndex]; + const chapter = await fetchRawkumaChapterCount( + env, + requestHeaders, + entry, + ); + + results[String(entry.anilistId)] = { chapter }; + } + }), + ); + + return results; +}; |