aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/proxy/src/index.js17
-rw-r--r--apps/proxy/src/rawkuma.js299
-rw-r--r--src/lib/Data/Manga/raw.ts82
-rw-r--r--src/lib/Media/Manga/chapters.ts57
4 files changed, 417 insertions, 38 deletions
diff --git a/apps/proxy/src/index.js b/apps/proxy/src/index.js
index 80d87b1b..4f18ba44 100644
--- a/apps/proxy/src/index.js
+++ b/apps/proxy/src/index.js
@@ -1,4 +1,5 @@
import { bootstrapManga, syncMangadexIndex } from "./mangadex.js";
+import { fetchRawkumaChapterCounts } from "./rawkuma.js";
import {
deleteMangadexFailureRows,
getMangadexFailureRowsByAniListIds,
@@ -271,6 +272,16 @@ const handleMangaChapterCounts = async (request, env, ctx) => {
});
};
+const handleMangaNativeChapterCounts = async (request, env) => {
+ const manga = await parseMangaPayload(request);
+
+ if (!manga.length) return jsonResponse(request, { data: {} });
+
+ return jsonResponse(request, {
+ data: await fetchRawkumaChapterCounts(env, request.headers, manga),
+ });
+};
+
const isAuthorisedSyncRequest = (request, env) => {
const token = env.MANGADEX_SYNC_TOKEN;
@@ -305,6 +316,12 @@ export default {
if (url.pathname === "/manga/chapter-counts" && request.method === "POST")
return handleMangaChapterCounts(request, env, ctx);
+ if (
+ url.pathname === "/manga/native-chapter-counts" &&
+ request.method === "POST"
+ )
+ return handleMangaNativeChapterCounts(request, env);
+
if (url.pathname === "/manga/sync" && request.method === "POST")
return handleMangaSync(request, env);
diff --git a/apps/proxy/src/rawkuma.js b/apps/proxy/src/rawkuma.js
new file mode 100644
index 00000000..d993a916
--- /dev/null
+++ b/apps/proxy/src/rawkuma.js
@@ -0,0 +1,299 @@
+const RAWKUMA_ORIGIN = "https://rawkuma.net";
+const DEFAULT_CACHE_TTL_MS = 30 * 60 * 1000;
+const DEFAULT_CONCURRENCY = 4;
+const DEFAULT_USER_AGENT =
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0";
+const MIN_MATCH_SCORE = 0.75;
+const MIN_MATCH_MARGIN = 0.1;
+
+const rawkumaCache = new Map();
+const rawkumaInFlight = new Map();
+
+const cacheTtlMs = (env) => {
+ const milliseconds = Number.parseInt(env.RAWKUMA_CACHE_TTL_MS || "", 10);
+
+ return Number.isFinite(milliseconds) && milliseconds > 0
+ ? milliseconds
+ : DEFAULT_CACHE_TTL_MS;
+};
+
+const concurrencyLimit = (env) => {
+ const concurrency = Number.parseInt(env.RAWKUMA_CONCURRENCY || "", 10);
+
+ return Number.isFinite(concurrency) && concurrency > 0
+ ? concurrency
+ : DEFAULT_CONCURRENCY;
+};
+
+const getCachedChapterCount = (title) => {
+ const cached = rawkumaCache.get(title);
+
+ if (!cached) return undefined;
+
+ if (Date.now() >= cached.expiresAt) {
+ rawkumaCache.delete(title);
+
+ return undefined;
+ }
+
+ return cached.chapter;
+};
+
+const setCachedChapterCount = (env, title, chapter) => {
+ if (chapter === null) return;
+
+ rawkumaCache.set(title, {
+ chapter,
+ expiresAt: Date.now() + cacheTtlMs(env),
+ });
+};
+
+const fetchText = async (requestHeaders, url, init = {}) => {
+ const headers = new Headers(requestHeaders);
+ const targetUrl = new URL(url);
+ const initHeaders = new Headers(init.headers);
+
+ for (const [key, value] of initHeaders.entries()) headers.set(key, value);
+
+ headers.set(
+ "Accept",
+ "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ );
+ headers.set("Accept-Encoding", "identity");
+ headers.set("Origin", targetUrl.origin);
+ headers.set("Referer", `${targetUrl.origin}/`);
+ if (!headers.has("User-Agent")) headers.set("User-Agent", DEFAULT_USER_AGENT);
+ headers.delete("Content-Length");
+
+ return await (await fetch(url, { ...init, headers })).text();
+};
+
+const parseNonce = (text) =>
+ text.match(/name=['"]search_nonce['"]\s+value=['"]([^'"]+)['"]/i)?.[1] ||
+ null;
+
+const decodeHtml = (value) =>
+ value
+ .replaceAll("&", "&")
+ .replaceAll("&", "&")
+ .replaceAll(""", '"')
+ .replaceAll("'", "'")
+ .replaceAll("'", "'");
+
+const normalizeTitle = (value) =>
+ String(value || "")
+ .toLowerCase()
+ .normalize("NFKC")
+ .replace(/&/g, " and ")
+ .replace(/[^\p{L}\p{N}]+/gu, " ")
+ .replace(/\s+/g, " ")
+ .trim();
+
+const tokenizeTitle = (value) =>
+ normalizeTitle(value)
+ .split(" ")
+ .filter((token) => token.length > 1);
+
+const compareTitles = (left, right) => {
+ const normalizedLeft = normalizeTitle(left);
+ const normalizedRight = normalizeTitle(right);
+
+ if (!normalizedLeft || !normalizedRight) return 0;
+ if (normalizedLeft === normalizedRight) return 1;
+ if (
+ normalizedLeft.includes(normalizedRight) ||
+ normalizedRight.includes(normalizedLeft)
+ )
+ return 0.92;
+
+ const leftTokens = tokenizeTitle(left);
+ const rightTokens = tokenizeTitle(right);
+
+ if (!leftTokens.length || !rightTokens.length) return 0;
+
+ const overlappingTokenCount = leftTokens.filter((token) =>
+ rightTokens.includes(token),
+ ).length;
+ const overlapScore =
+ overlappingTokenCount / Math.max(leftTokens.length, rightTokens.length);
+
+ return overlapScore;
+};
+
+const titleCandidates = (entry) =>
+ [
+ entry.nativeTitle,
+ entry.englishTitle,
+ entry.romajiTitle,
+ entry.nativeTitle === "null" ? null : entry.nativeTitle,
+ entry.englishTitle === "null" ? null : entry.englishTitle,
+ entry.romajiTitle === "null" ? null : entry.romajiTitle,
+ ]
+ .filter(Boolean)
+ .map((title) => String(title).trim())
+ .filter((title, index, array) => array.indexOf(title) === index);
+
+const parseSearchResults = (text) =>
+ [
+ ...text.matchAll(
+ /<a[^>]+href=["'](https:\/\/rawkuma\.net\/manga\/[^"']+)["'][^>]*>[\s\S]*?<h3[^>]*>([\s\S]*?)<\/h3>/gi,
+ ),
+ ].map((match) => ({
+ url: decodeHtml(match[1]).trim(),
+ title: decodeHtml(match[2])
+ .replace(/<[^>]+>/g, "")
+ .trim(),
+ }));
+
+const pickBestSearchResult = (results, entry) => {
+ const candidates = titleCandidates(entry);
+ let best = null;
+ let secondBestScore = 0;
+
+ for (const result of results) {
+ const score = candidates.reduce(
+ (maximumScore, candidate) =>
+ Math.max(maximumScore, compareTitles(candidate, result.title)),
+ 0,
+ );
+
+ if (!best || score > best.score) {
+ secondBestScore = best?.score || 0;
+ best = { ...result, score };
+
+ continue;
+ }
+
+ if (score > secondBestScore) secondBestScore = score;
+ }
+
+ if (!best) return null;
+ if (best.score < MIN_MATCH_SCORE) return null;
+ if (best.score - secondBestScore < MIN_MATCH_MARGIN) return null;
+
+ return best;
+};
+
+const parseChapterNumbers = (text) =>
+ [
+ ...text.matchAll(/data-chapter-number=["'](\d+(?:\.\d+)?)["']/gi),
+ ...text.matchAll(
+ /<a[^>]+href=["'][^"']*\/chapter-[^"']*["'][^>]*>\s*Chapter\s+(\d+(?:\.\d+)?)\s*<\/a>/gi,
+ ),
+ ]
+ .map((match) => Number.parseFloat(match[1]))
+ .filter((value) => Number.isFinite(value))
+ .sort((left, right) => right - left);
+
+const parseChapterListUrl = (text) =>
+ decodeHtml(
+ text.match(
+ /<div[^>]+id=["']chapter-list["'][^>]+hx-get=["']([^"']+)["']/i,
+ )?.[1] || "",
+ ).trim() || null;
+
+const fetchRawkumaChapterCountUncached = async (requestHeaders, entry) => {
+ const nonceText = await fetchText(
+ requestHeaders,
+ `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?type=search_form&action=get_nonce`,
+ );
+ const nonce = parseNonce(nonceText);
+
+ if (!nonce) return null;
+
+ for (const candidate of titleCandidates(entry)) {
+ const searchText = await fetchText(
+ requestHeaders,
+ `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?nonce=${encodeURIComponent(
+ nonce,
+ )}&action=search`,
+ {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ },
+ body: new URLSearchParams({
+ query: candidate,
+ }),
+ },
+ );
+ const bestMatch = pickBestSearchResult(
+ parseSearchResults(searchText),
+ entry,
+ );
+
+ if (!bestMatch) continue;
+
+ const mangaText = await fetchText(requestHeaders, bestMatch.url);
+ const chapterListUrl = parseChapterListUrl(mangaText);
+ const chapterListText = chapterListUrl
+ ? await fetchText(requestHeaders, chapterListUrl)
+ : mangaText;
+ const chapters = parseChapterNumbers(chapterListText);
+
+ if (!chapters.length) continue;
+
+ return chapters[0] ?? null;
+ }
+
+ return null;
+};
+
+const fetchRawkumaChapterCount = async (env, requestHeaders, entry) => {
+ const normalizedTitle = entry.nativeTitle?.trim();
+
+ if (!normalizedTitle) return null;
+
+ const cachedChapter = getCachedChapterCount(normalizedTitle);
+
+ if (cachedChapter !== undefined) return cachedChapter;
+
+ const existing = rawkumaInFlight.get(normalizedTitle);
+
+ if (existing) return existing;
+
+ const promise = fetchRawkumaChapterCountUncached(requestHeaders, entry)
+ .catch(() => null)
+ .then((chapter) => {
+ setCachedChapterCount(env, normalizedTitle, chapter);
+
+ return chapter;
+ })
+ .finally(() => {
+ rawkumaInFlight.delete(normalizedTitle);
+ });
+
+ rawkumaInFlight.set(normalizedTitle, promise);
+
+ return promise;
+};
+
+export const fetchRawkumaChapterCounts = async (env, requestHeaders, manga) => {
+ const results = {};
+ const entries = [...manga];
+ const workerCount = Math.min(concurrencyLimit(env), entries.length);
+
+ if (!workerCount) return results;
+
+ let nextIndex = 0;
+
+ await Promise.all(
+ Array.from({ length: workerCount }, async () => {
+ while (nextIndex < entries.length) {
+ const currentIndex = nextIndex;
+ nextIndex += 1;
+
+ const entry = entries[currentIndex];
+ const chapter = await fetchRawkumaChapterCount(
+ env,
+ requestHeaders,
+ entry,
+ );
+
+ results[String(entry.anilistId)] = { chapter };
+ }
+ }),
+ );
+
+ return results;
+};
diff --git a/src/lib/Data/Manga/raw.ts b/src/lib/Data/Manga/raw.ts
index 3663c737..64ed3de4 100644
--- a/src/lib/Data/Manga/raw.ts
+++ b/src/lib/Data/Manga/raw.ts
@@ -6,41 +6,61 @@ interface Chapter {
chapterDate: string;
}
+const RAWKUMA_ORIGIN = "https://rawkuma.net";
+
+const fetchDocument = async (url: string, init?: RequestInit) =>
+ new DOMParser().parseFromString(
+ await (await fetch(proxy(url, true), init)).text(),
+ "text/html",
+ );
+
+const parseChapterNumber = (text: string | null | undefined) => {
+ if (!text) return undefined;
+
+ const match = text.match(/Chapter\s+(\d+(?:\.\d+)?)/i);
+
+ return match ? Number.parseFloat(match[1]) : undefined;
+};
+
export const getChapterCount = async (
nativeTitle: string,
): Promise<number | undefined> => {
- const html = new DOMParser().parseFromString(
- await (
- await fetch(
- proxy(`https://rawkuma.com/?s=${encodeURIComponent(nativeTitle)}`),
- )
- ).text(),
- "text/html",
+ const nonceDocument = await fetchDocument(
+ `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?type=search_form&action=get_nonce`,
+ );
+ const nonce = nonceDocument
+ .querySelector("input[name='search_nonce']")
+ ?.getAttribute("value");
+
+ if (!nonce) return undefined;
+
+ const searchDocument = await fetchDocument(
+ `${RAWKUMA_ORIGIN}/wp-admin/admin-ajax.php?nonce=${encodeURIComponent(
+ nonce,
+ )}&action=search`,
+ {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ },
+ body: new URLSearchParams({
+ query: nativeTitle,
+ }),
+ },
);
- const listContent = html.querySelector(".listupd");
-
- if (
- listContent &&
- listContent.textContent &&
- listContent.textContent.includes("Not Found")
- ) {
- return undefined;
- }
-
- const chapterCount = html.querySelector(".epxs");
-
- if (
- chapterCount &&
- chapterCount.textContent &&
- chapterCount.textContent.includes("Chapter")
- ) {
- return Number.parseInt(
- chapterCount.textContent.replace("Chapter", "").trim(),
- 10,
- );
- }
-
- return undefined;
+ const mangaUrl = searchDocument
+ .querySelector("#searchResults a[href*='/manga/']")
+ ?.getAttribute("href");
+
+ if (!mangaUrl) return undefined;
+
+ const mangaDocument = await fetchDocument(mangaUrl);
+ const chapters = [...mangaDocument.querySelectorAll("a[href*='/chapter-']")]
+ .map((anchor) => parseChapterNumber(anchor.textContent))
+ .filter((value): value is number => value !== undefined)
+ .sort((left, right) => right - left);
+
+ return chapters[0];
};
export const getChaptersFromText = (text: string) => {
diff --git a/src/lib/Media/Manga/chapters.ts b/src/lib/Media/Manga/chapters.ts
index 473a3ed4..04b147b0 100644
--- a/src/lib/Media/Manga/chapters.ts
+++ b/src/lib/Media/Manga/chapters.ts
@@ -1,6 +1,5 @@
import { env } from "$env/dynamic/public";
import { type Media, recentMediaActivities } from "$lib/Data/AniList/media";
-import { getChapterCount } from "$lib/Data/Manga/raw";
import { proxyRoute } from "$lib/Utility/proxy";
import settings from "$stores/settings";
import type { UserIdentity } from "../../Data/AniList/identity";
@@ -17,6 +16,14 @@ interface MangaDexChapterCountsResponse {
retryAfterMs?: number;
}
+interface NativeChapterCount {
+ chapter: number | null;
+}
+
+interface NativeChapterCountsResponse {
+ data?: Record<string, NativeChapterCount>;
+}
+
const chapterMemoryCache = new Map<number, number | null>();
const MAX_PENDING_RETRIES = 2;
const DEFAULT_PENDING_RETRY_MS = 750;
@@ -182,6 +189,36 @@ const fetchMangaChapterCounts = async (manga: Media[]) => {
return { data, rateLimited: rateLimited && !successfulResponse };
};
+const fetchNativeChapterCounts = async (manga: Media[]) => {
+ const data: Record<string, NativeChapterCount> = {};
+
+ for (let index = 0; index < manga.length; index += 100) {
+ const chunk = manga.slice(index, index + 100);
+ const response = await fetch(proxyRoute("/manga/native-chapter-counts"), {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify({
+ manga: chunk.map((entry) => ({
+ anilistId: entry.id,
+ nativeTitle: entry.title.native,
+ englishTitle: entry.title.english,
+ romajiTitle: entry.title.romaji,
+ })),
+ }),
+ }).catch(() => null);
+
+ if (!response?.ok) continue;
+
+ const payload = (await response.json()) as NativeChapterCountsResponse;
+
+ Object.assign(data, payload.data || {});
+ }
+
+ return data;
+};
+
export const hydrateChapterCounts = async (
identity: UserIdentity,
manga: Media[],
@@ -191,6 +228,7 @@ export const hydrateChapterCounts = async (
(entry, index, array) =>
array.findIndex((candidate) => candidate.id === entry.id) === index,
);
+ const nativeCountManga: Media[] = [];
const unresolvedManga: Media[] = [];
for (const entry of uniqueManga) {
@@ -203,12 +241,7 @@ export const hydrateChapterCounts = async (
}
if (settings.get().calculatePreferNativeChapterCount) {
- const nativeCount = (await getChapterCount(entry.title.native)) || 0;
-
- await writeCachedChapterCount(
- entry.id,
- nativeCount === 0 ? null : nativeCount,
- );
+ nativeCountManga.push(entry);
continue;
}
@@ -216,6 +249,16 @@ export const hydrateChapterCounts = async (
unresolvedManga.push(entry);
}
+ if (nativeCountManga.length) {
+ const nativeCounts = await fetchNativeChapterCounts(nativeCountManga);
+
+ for (const entry of nativeCountManga) {
+ const nativeCount = nativeCounts[String(entry.id)]?.chapter ?? null;
+
+ await writeCachedChapterCount(entry.id, nativeCount);
+ }
+ }
+
if (!unresolvedManga.length) return { rateLimited: false };
const { data, rateLimited } = await fetchMangaChapterCounts(unresolvedManga);