fix(proxy): improve native manga chapter counts

author: Fuwn <[email protected]> 2026-03-27 10:44:12 +0000
committer: Fuwn <[email protected]> 2026-03-27 10:44:12 +0000
commit: 12bfed974c77a85c8fb520682aae9cfeecbdf5b8 (patch)
tree: d7cff80d7caa09f2a3d773577b61b5af539a554b /apps/proxy
parent: style(ci): format and tidy proxy files (diff)
download: due.moe-12bfed974c77a85c8fb520682aae9cfeecbdf5b8.tar.xz
due.moe-12bfed974c77a85c8fb520682aae9cfeecbdf5b8.zip
2 files changed, 316 insertions, 0 deletions
diff --git a/apps/proxy/src/index.js b/apps/proxy/src/index.js
index 80d87b1b..4f18ba44 100644
--- a/apps/proxy/src/index.js
+++ b/apps/proxy/src/index.js
@@ -1,4 +1,5 @@
 import { bootstrapManga, syncMangadexIndex } from "./mangadex.js";
+import { fetchRawkumaChapterCounts } from "./rawkuma.js";
 import {
 	deleteMangadexFailureRows,
 	getMangadexFailureRowsByAniListIds,
@@ -271,6 +272,16 @@ const handleMangaChapterCounts = async (request, env, ctx) => {
 	});
 };
 
+const handleMangaNativeChapterCounts = async (request, env) => {
+	const manga = await parseMangaPayload(request);
+
+	if (!manga.length) return jsonResponse(request, { data: {} });
+
+	return jsonResponse(request, {
+		data: await fetchRawkumaChapterCounts(env, request.headers, manga),
+	});
+};
+
 const isAuthorisedSyncRequest = (request, env) => {
 	const token = env.MANGADEX_SYNC_TOKEN;
 
@@ -305,6 +316,12 @@ export default {
 			if (url.pathname === "/manga/chapter-counts" && request.method === "POST")
 				return handleMangaChapterCounts(request, env, ctx);
 
+			if (
+				url.pathname === "/manga/native-chapter-counts" &&
+				request.method === "POST"
+			)
+				return handleMangaNativeChapterCounts(request, env);
+
 			if (url.pathname === "/manga/sync" && request.method === "POST")
 				return handleMangaSync(request, env);
 
diff --git a/apps/proxy/src/rawkuma.js b/apps/proxy/src/rawkuma.js
new file mode 100644
index 00000000..d993a916
--- /dev/null
+++ b/apps/proxy/src/rawkuma.js
@@ -0,0 +1,299 @@
+const RAWKUMA_ORIGIN = "https://rawkuma.net";
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+const DEFAULT_CONCURRENCY = 4;
+const DEFAULT_USER_AGENT =
+	"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0";
+const MIN_MATCH_SCORE = 0.75;
+const MIN_MATCH_MARGIN = 0.1;
+
+const rawkumaCache = new Map();
+const rawkumaInFlight = new Map();
+
+const cacheTtlMs = (env) => {
+	const milliseconds = Number.parseInt(env.RAWKUMA_CACHE_TTL_MS || "", 10);
+
+	return Number.isFinite(milliseconds) && milliseconds > 0
+		? milliseconds
+		: DEFAULT_CACHE_TTL_MS;
+};
+
+const concurrencyLimit = (env) => {
+	const concurrency = Number.parseInt(env.RAWKUMA_CONCURRENCY || "", 10);
+
+	return Number.isFinite(concurrency) && concurrency > 0
+		? concurrency
+		: DEFAULT_CONCURRENCY;
+};
+
+const getCachedChapterCount = (title) => {
+	const cached = rawkumaCache.get(title);
+
+	if (!cached) return undefined;
+
+	if (Date.now() >= cached.expiresAt) {
+		rawkumaCache.delete(title);
+
+		return undefined;
+	}
+
+	return cached.chapter;
+};
+
+const setCachedChapterCount = (env, title, chapter) => {
+	if (chapter === null) return;
+
+	rawkumaCache.set(title, {
+		chapter,
+		expiresAt: Date.now() + cacheTtlMs(env),
+	});
+};
+
+const fetchText = async (requestHeaders, url, init = {}) => {
+	const headers = new Headers(requestHeaders);
+	const targetUrl = new URL(url);
+	const initHeaders = new Headers(init.headers);
+
+	for (const [key, value] of initHeaders.entries()) headers.set(key, value);
+
+	headers.set(
+		"Accept",
+		"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+	);
+	headers.set("Accept-Encoding", "identity");
+	headers.set("Origin", targetUrl.origin);
+	headers.set("Referer", `${targetUrl.origin}/`);
+	if (!headers.has("User-Agent")) headers.set("User-Agent", DEFAULT_USER_AGENT);
+	headers.delete("Content-Length");
+
+	return await (await fetch(url, { ...init, headers })).text();
+};
+
+const parseNonce = (text) =>
+	text.match(/name=['"]search_nonce['"]\s+value=['"]([^'"]+)['"]/i)?.[1] ||
+	null;
+
+const decodeHtml = (value) =>
+	value
+		.replaceAll("&amp;", "&")
+		.replaceAll("&#038;", "&")
+		.replaceAll("&quot;", '"')
+		.replaceAll("&#039;", "'")
+		.replaceAll("&apos;", "'");
+
+const normalizeTitle = (value) =>
+	String(value || "")
+		.toLowerCase()
+		.normalize("NFKC")
+		.replace(/&/g, " and ")
+		.replace(/[^\p{L}\p{N}]+/gu, " ")
+		.replace(/\s+/g, " ")
+		.trim();
+
+const tokenizeTitle = (value) =>
+	normalizeTitle(value)
+		.split(" ")
+		.filter((token) => token.length > 1);
+
+const compareTitles = (left, right) => {
+	const normalizedLeft = normalizeTitle(left);
+	const normalizedRight = normalizeTitle(right);
+
+	if (!normalizedLeft || !normalizedRight) return 0;
+	if (normalizedLeft === normalizedRight) return 1;
+	if (
+		normalizedLeft.includes(normalizedRight) ||
+		normalizedRight.includes(normalizedLeft)
+	)
+		return 0.92;
+
+	const leftTokens = tokenizeTitle(left);
+	const rightTokens = tokenizeTitle(right);
+
+	if (!leftTokens.length || !rightTokens.length) return 0;
+
+	const overlappingTokenCount = leftTokens.filter((token) =>
+		rightTokens.includes(token),
+	).length;
+	const overlapScore =
+		overlappingTokenCount / Math.max(leftTokens.length, rightTokens.length);
+
+	return overlapScore;
+};
+
+const titleCandidates = (entry) =>
+	[
+		entry.nativeTitle,
+		entry.englishTitle,
+		entry.romajiTitle,
+		entry.nativeTitle === "null" ? null : entry.nativeTitle,
+		entry.englishTitle === "null" ? null : entry.englishTitle,
+		entry.romajiTitle === "null" ? null : entry.romajiTitle,
+	]
+		.filter(Boolean)
+		.map((title) => String(title).trim())
+		.filter((title, index, array) => array.indexOf(title) === index);
+
+const parseSearchResults = (text) =>
+	[
+		...text.matchAll(
+			/<a[^>]+href=["'](https:\/\/rawkuma\.net\/manga\/[^"']+)["'][^>]*>[\s\S]*?<h3[^>]*>([\s\S]*?)<\/h3>/gi,
+		),
+	].map((match) => ({
+		url: decodeHtml(match[1]).trim(),
+		title: decodeHtml(match[2])
+			.replace(/<[^>]+>/g, "")
+			.trim(),
+	}));
+
+const pickBestSearchResult = (results, entry) => {
+	const candidates = titleCandidates(entry);
+	let best = null;
+	let secondBestScore = 0;
+
+	for (const result of results) {
+		const score = candidates.reduce(
+			(maximumScore, candidate) =>
+				Math.max(maximumScore, compareTitles(candidate, result.title)),
+			0,
+		);
+
+		if (!best || score > best.score) {
+			secondBestScore = best?.score || 0;
+			best = { ...result, score };
+
+			continue;
+		}
+
+		if (score > secondBestScore) secondBestScore = score;
+	}
+
+	if (!best) return null;
+	if (best.score < MIN_MATCH_SCORE) return null;
+	if (best.score - secondBestScore < MIN_MATCH_MARGIN) return null;
+
+	return best;
+};
+
+const parseChapterNumbers = (text) =>
+	[
+		...text.matchAll(/data-chapter-number=["'](\d+(?:\.\d+)?)["']/gi),
+		...text.matchAll(
+			/<a[^>]+href=["'][^"']*\/chapter-[^"']*["'][^>]*>\s*Chapter\s+(\d+(?:\.\d+)?)\s*<\/a>/gi,
+		),
+	]
+		.map((match) => Number.parseFloat(match[1]))
+		.filter((value) => Number.isFinite(value))
+		.sort((left, right) => right - left);
+
+const parseChapterListUrl = (text) =>
+	decodeHtml(
+		text.match(
+			/<div[^>]+id=["']chapter-list["'][^>]+hx-get=["']([^"']+)["']/i,
+		)?.[1] || "",
+	).trim() || null;
+
+const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => {
+	const nonceText = await fetchText(
+		requestHeaders,
+		`${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?type=search_form&action=get_nonce`,
+	);
+	const nonce = parseNonce(nonceText);
+
+	if (!nonce) return null;
+
+	for (const candidate of titleCandidates(entry)) {
+		const searchText = await fetchText(
+			requestHeaders,
+			`${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?nonce=${encodeURIComponent(
+				nonce,
+			)}&action=search`,
+			{
+				method: "POST",
+				headers: {
+					"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+				},
+				body: new URLSearchParams({
+					query: candidate,
+				}),
+			},
+		);
+		const bestMatch = pickBestSearchResult(
+			parseSearchResults(searchText),
+			entry,
+		);
+
+		if (!bestMatch) continue;
+
+		const mangaText = await fetchText(requestHeaders, bestMatch.url);
+		const chapterListUrl = parseChapterListUrl(mangaText);
+		const chapterListText = chapterListUrl
+			? await fetchText(requestHeaders, chapterListUrl)
+			: mangaText;
+		const chapters = parseChapterNumbers(chapterListText);
+
+		if (!chapters.length) continue;
+
+		return chapters[0] ?? null;
+	}
+
+	return null;
+};
+
+const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => {
+	const normalizedTitle = entry.nativeTitle?.trim();
+
+	if (!normalizedTitle) return null;
+
+	const cachedChapter = getCachedChapterCount(normalizedTitle);
+
+	if (cachedChapter !== undefined) return cachedChapter;
+
+	const existing = rawkumaInFlight.get(normalizedTitle);
+
+	if (existing) return existing;
+
+	const promise = fetchRawkumaChapterCountUncached(requestHeaders, entry)
+		.catch(() => null)
+		.then((chapter) => {
+			setCachedChapterCount(env, normalizedTitle, chapter);
+
+			return chapter;
+		})
+		.finally(() => {
+			rawkumaInFlight.delete(normalizedTitle);
+		});
+
+	rawkumaInFlight.set(normalizedTitle, promise);
+
+	return promise;
+};
+
+export const fetchRawkumaChapterCounts = async (env, requestHeaders, manga) => {
+	const results = {};
+	const entries = [...manga];
+	const workerCount = Math.min(concurrencyLimit(env), entries.length);
+
+	if (!workerCount) return results;
+
+	let nextIndex = 0;
+
+	await Promise.all(
+		Array.from({ length: workerCount }, async () => {
+			while (nextIndex < entries.length) {
+				const currentIndex = nextIndex;
+				nextIndex += 1;
+
+				const entry = entries[currentIndex];
+				const chapter = await fetchRawkumaChapterCount(
+					env,
+					requestHeaders,
+					entry,
+				);
+
+				results[String(entry.anilistId)] = { chapter };
+			}
+		}),
+	);
+
+	return results;
+};
author	Fuwn <[email protected]>	2026-03-27 10:44:12 +0000
committer	Fuwn <[email protected]>	2026-03-27 10:44:12 +0000
commit	12bfed974c77a85c8fb520682aae9cfeecbdf5b8 (patch)
tree	d7cff80d7caa09f2a3d773577b61b5af539a554b /apps/proxy
parent	style(ci): format and tidy proxy files (diff)
download	due.moe-12bfed974c77a85c8fb520682aae9cfeecbdf5b8.tar.xz due.moe-12bfed974c77a85c8fb520682aae9cfeecbdf5b8.zip