aboutsummaryrefslogtreecommitdiff
path: root/src/lib/Media/Anime/Airing/Subtitled/match.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/Media/Anime/Airing/Subtitled/match.ts')
-rw-r--r--src/lib/Media/Anime/Airing/Subtitled/match.ts652
1 files changed, 0 insertions, 652 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts
deleted file mode 100644
index a89ac8ac..00000000
--- a/src/lib/Media/Anime/Airing/Subtitled/match.ts
+++ /dev/null
@@ -1,652 +0,0 @@
-import stringSimilarity from "string-similarity";
-import { get } from "svelte/store";
-import excludeMatch from "$lib/Data/Static/matchExclude.json";
-import type { SubsPlease } from "$lib/Media/Anime/Airing/Subtitled/subsPlease";
-import settings from "$stores/settings";
-import type { Media } from "../../../../Data/AniList/media";
-import { season } from "../../season";
-
-export interface Time {
- title: string;
- time: string;
- day: string;
-}
-
-interface IndexedTime {
- time: Time;
- normalizedTitle: string;
- tokens: string[];
-}
-
-interface DayScheduleIndex {
- entries: IndexedTime[];
- exactTitleIndex: Map<string, number[]>;
- tokenIndex: Map<string, number[]>;
-}
-
-interface ScheduleIndex {
- byDay: Map<string, DayScheduleIndex>;
- version: string;
-}
-
-const secondsUntil = (targetTime: string, targetDay: string) => {
- const now = new Date();
- const [targetHour, targetMinute] = targetTime.split(":").map(Number);
- let dayDifference =
- [
- "Sunday",
- "Monday",
- "Tuesday",
- "Wednesday",
- "Thursday",
- "Friday",
- "Saturday",
- ].indexOf(targetDay) - now.getDay();
-
- if (dayDifference < 0) dayDifference += 7;
-
- const targetDate = new Date(now);
-
- targetDate.setDate(now.getDate() + dayDifference);
- targetDate.setHours(targetHour, targetMinute, 0, 0);
-
- const secondsDifference = (Number(targetDate) - Number(now)) / 1000;
-
- return secondsDifference > 0
- ? secondsDifference
- : secondsDifference + 7 * 24 * 60 * 60;
-};
-
-const preprocessTitle = (title: string): string => {
- return title
- .toLowerCase()
- .replace(/\b(season|s|part|cour)(\d+)\b/g, " $2 ")
- .replace(/\b(season|s|part|cour)\b/g, " ")
- .replace(/[^a-z0-9\s]/gi, "")
- .trim()
- .split(/\s+/)
- .join(" ");
-};
-
-const NON_DISTINCTIVE_TOKENS = new Set([
- "a",
- "and",
- "de",
- "e",
- "for",
- "ga",
- "in",
- "na",
- "ni",
- "no",
- "o",
- "of",
- "on",
- "the",
- "to",
- "wa",
- "wo",
-]);
-
-const isMeaningfulToken = (token: string): boolean =>
- /^\d+$/.test(token) ||
- (token.length >= 3 && !NON_DISTINCTIVE_TOKENS.has(token));
-
-const MIN_MATCH_SCORE = 0.3;
-const MIN_TOKEN_OVERLAP = 2;
-const MIN_MATCH_MARGIN = 0.08;
-const FALLBACK_MIN_SCORE = 0.82;
-const FALLBACK_MIN_MARGIN = 0.08;
-const MAX_MATCH_CACHE_ENTRIES = 10_000;
-const MAX_INJECT_CACHE_ENTRIES = 10_000;
-const STALE_AIRING_GRACE_SECONDS = 5 * 60;
-const MAX_EPISODE_SHIFT_WINDOW_SECONDS = 8 * 24 * 60 * 60;
-
-interface SimilarityAnalysis {
- score: number;
- tokenOverlap: number;
- numericTokenOverlap: number;
-}
-
-const calculateWeightedSimilarity = (
- title1: string,
- title2: string,
-): SimilarityAnalysis => {
- const tokens1 = title1.split(" ").filter(isMeaningfulToken);
- const tokens2 = title2.split(" ").filter(isMeaningfulToken);
-
- if (tokens1.length === 0 || tokens2.length === 0)
- return {
- score: 0,
- tokenOverlap: 0,
- numericTokenOverlap: 0,
- };
-
- const set2 = new Set(tokens2);
- let score = 0;
- let tokenOverlap = 0;
- let numericTokenOverlap = 0;
- const numericTokens1 = tokens1.filter((token) => /^\d+$/.test(token));
- const numericTokens2 = tokens2.filter((token) => /^\d+$/.test(token));
-
- tokens1.forEach((token) => {
- if (set2.has(token)) {
- tokenOverlap += 1;
-
- if (/^\d+$/.test(token)) numericTokenOverlap += 1;
-
- score += /^\d+$/.test(token) ? 2 : 1;
- }
- });
-
- let finalScore =
- (score / ((Math.max(tokens1.length, tokens2.length) || 1) * 2)) * 0.7 +
- stringSimilarity.compareTwoStrings(title1, title2) * 0.3;
-
- if (
- numericTokens1.length > 0 &&
- numericTokens2.length > 0 &&
- numericTokenOverlap === 0
- )
- finalScore *= 0.5;
-
- return {
- score: finalScore,
- tokenOverlap,
- numericTokenOverlap,
- };
-};
-
-const indexPush = (
- index: Map<string, number[]>,
- key: string,
- entryIndex: number,
-) => {
- const existing = index.get(key);
-
- if (existing) existing.push(entryIndex);
- else index.set(key, [entryIndex]);
-};
-
-const scheduleIndexCache = new WeakMap<SubsPlease, ScheduleIndex>();
-const closestMatchCache = new Map<string, Time | null>();
-const injectAiringTimeCache = new Map<string, Media>();
-
-const hashString = (input: string): string => {
- let hash = 2166136261;
-
- for (let index = 0; index < input.length; index += 1) {
- hash ^= input.charCodeAt(index);
- hash = Math.imul(hash, 16777619);
- }
-
- return (hash >>> 0).toString(36);
-};
-
-const setBoundedCacheValue = <T>(
- cache: Map<string, T>,
- key: string,
- value: T,
- maxEntries: number,
-) => {
- if (cache.size >= maxEntries) cache.clear();
-
- cache.set(key, value);
-};
-
-const animeTitleFingerprint = (anime: Media) =>
- [anime.title.romaji, anime.title.english, ...anime.synonyms]
- .filter(Boolean)
- .map(preprocessTitle)
- .join("|");
-
-const localTimeZone = () =>
- Intl.DateTimeFormat().resolvedOptions().timeZone || "local";
-
-const airingDayOf = (airingAt: number | undefined) =>
- new Date((airingAt || 0) * 1000).toLocaleString("en-US", { weekday: "long" });
-
-const fallbackClosestMatch = (
- dayIndex: DayScheduleIndex,
- searchTitles: string[],
-): Time | null => {
- let bestMatch: Time | null = null;
- let bestScore = 0;
- let secondBestScore = 0;
-
- for (const searchTitle of searchTitles) {
- if (searchTitle.includes("OVA") || searchTitle.includes("Special"))
- continue;
-
- const normalizedSearchTitle = preprocessTitle(searchTitle);
-
- for (const candidateEntry of dayIndex.entries) {
- const score = stringSimilarity.compareTwoStrings(
- normalizedSearchTitle,
- candidateEntry.normalizedTitle,
- );
-
- if (score > bestScore) {
- secondBestScore = bestScore;
- bestScore = score;
- bestMatch = candidateEntry.time;
- } else if (score > secondBestScore) {
- secondBestScore = score;
- }
- }
- }
-
- if (bestScore < FALLBACK_MIN_SCORE) return null;
- if (bestScore - secondBestScore < FALLBACK_MIN_MARGIN) return null;
-
- return bestMatch;
-};
-
-const buildScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => {
- const byDay = new Map<string, DayScheduleIndex>();
- const versionParts: string[] = [];
-
- for (const [day, value] of Object.entries(subsPlease.schedule)) {
- const flattenedValue = Array.isArray(value) ? value.flat() : [];
-
- versionParts.push(day);
-
- const dayIndex: DayScheduleIndex = {
- entries: [],
- exactTitleIndex: new Map<string, number[]>(),
- tokenIndex: new Map<string, number[]>(),
- };
-
- for (const scheduleTime of flattenedValue) {
- const time = {
- title: scheduleTime.title,
- time: scheduleTime.time,
- day,
- };
-
- versionParts.push(`${day}\u001f${time.title}\u001f${time.time}`);
-
- const normalizedTitle = preprocessTitle(time.title);
- const tokens = normalizedTitle.split(" ").filter(isMeaningfulToken);
- const entryIndex = dayIndex.entries.length;
-
- dayIndex.entries.push({
- time,
- normalizedTitle,
- tokens,
- });
- indexPush(dayIndex.exactTitleIndex, normalizedTitle, entryIndex);
-
- for (const token of tokens)
- indexPush(dayIndex.tokenIndex, token, entryIndex);
- }
-
- byDay.set(day, dayIndex);
- }
-
- return {
- byDay,
- version: hashString(`${subsPlease.tz}\u001e${versionParts.join("\u001d")}`),
- };
-};
-
-export const findClosestMatch = (
- scheduleIndex: ScheduleIndex,
- anime: Media,
-): Time | null => {
- if (excludeMatch.includes(anime.id)) {
- setBoundedCacheValue(
- closestMatchCache,
- `${anime.id}:excluded`,
- null,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return null;
- }
-
- const airingDay = airingDayOf(anime.nextAiringEpisode?.airingAt);
- const cacheKey = `${anime.id}:${anime.nextAiringEpisode?.airingAt || 0}:${animeTitleFingerprint(
- anime,
- )}:${airingDay}:${localTimeZone()}:${scheduleIndex.version}`;
- const cached = closestMatchCache.get(cacheKey);
-
- if (cached !== undefined) return cached;
- const dayIndex = scheduleIndex.byDay.get(airingDay);
-
- if (!dayIndex || dayIndex.entries.length === 0) {
- setBoundedCacheValue(
- closestMatchCache,
- cacheKey,
- null,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return null;
- }
-
- let bestMatch: Time | null = null;
- let bestScore = 0;
- let secondBestScore = 0;
- let bestTokenOverlap = 0;
- let bestNumericTokenOverlap = 0;
- const searchTitles = [
- anime.title.romaji,
- anime.title.english,
- ...anime.synonyms,
- ].filter(Boolean);
-
- for (const searchTitle of searchTitles) {
- if (searchTitle.includes("OVA") || searchTitle.includes("Special"))
- continue;
-
- const normalizedSearchTitle = preprocessTitle(searchTitle);
- const exactMatchIndexes = dayIndex.exactTitleIndex.get(
- normalizedSearchTitle,
- );
-
- if (exactMatchIndexes && exactMatchIndexes.length > 0) {
- const exactMatch = dayIndex.entries[exactMatchIndexes[0]];
-
- if (exactMatch) {
- setBoundedCacheValue(
- closestMatchCache,
- cacheKey,
- exactMatch.time,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return exactMatch.time;
- }
- }
-
- const searchTokens = normalizedSearchTitle
- .split(" ")
- .filter(isMeaningfulToken);
- const candidateIndexSet = new Set<number>();
-
- for (const token of searchTokens) {
- for (const candidateIndex of dayIndex.tokenIndex.get(token) || [])
- candidateIndexSet.add(candidateIndex);
- }
-
- const candidateIndexes =
- candidateIndexSet.size > 0
- ? [...candidateIndexSet]
- : dayIndex.entries.map((_, entryIndex) => entryIndex);
-
- for (const candidateIndex of candidateIndexes) {
- const candidateEntry = dayIndex.entries[candidateIndex];
- const similarity = calculateWeightedSimilarity(
- normalizedSearchTitle,
- candidateEntry.normalizedTitle,
- );
-
- if (similarity.score > bestScore) {
- secondBestScore = bestScore;
- bestScore = similarity.score;
- bestTokenOverlap = similarity.tokenOverlap;
- bestNumericTokenOverlap = similarity.numericTokenOverlap;
- bestMatch = candidateEntry.time;
- } else if (similarity.score > secondBestScore) {
- secondBestScore = similarity.score;
- }
- }
- }
-
- if (bestScore < MIN_MATCH_SCORE) {
- const fallbackMatch = fallbackClosestMatch(dayIndex, searchTitles);
-
- setBoundedCacheValue(
- closestMatchCache,
- cacheKey,
- fallbackMatch,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return fallbackMatch;
- }
-
- if (bestScore - secondBestScore < MIN_MATCH_MARGIN) {
- const fallbackMatch = fallbackClosestMatch(dayIndex, searchTitles);
-
- setBoundedCacheValue(
- closestMatchCache,
- cacheKey,
- fallbackMatch,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return fallbackMatch;
- }
-
- if (bestNumericTokenOverlap === 0 && bestTokenOverlap < MIN_TOKEN_OVERLAP) {
- const fallbackMatch = fallbackClosestMatch(dayIndex, searchTitles);
-
- setBoundedCacheValue(
- closestMatchCache,
- cacheKey,
- fallbackMatch,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return fallbackMatch;
- }
-
- setBoundedCacheValue(
- closestMatchCache,
- cacheKey,
- bestMatch,
- MAX_MATCH_CACHE_ENTRIES,
- );
-
- return bestMatch;
-};
-
-const normalizeTitle = (title: string | null) =>
- (title || "")
- .toLowerCase()
- .replace(/\b(s|season|part|cour)\s*\d+/g, "")
- .replace(/[\W_]+/g, " ")
- .trim();
-
-const findClosestMediaCache = new Map<string, Media | null>();
-
-export const findClosestMedia = (media: Media[], matchFor: string) => {
- if (!matchFor) return null;
-
- const cached = findClosestMediaCache.get(matchFor);
-
- if (cached !== undefined) return cached;
-
- const normalisedMatchFor = normalizeTitle(matchFor);
- const matchForWords = normalisedMatchFor.split(" ");
- let bestFitMedia: Media | null = null;
- let bestDistance = -Infinity;
-
- for (const m of media) {
- const titles = [m.title.romaji, m.title.english, ...m.synonyms].filter(
- Boolean,
- );
-
- if (
- titles.some(
- (title) =>
- title.toLowerCase().includes("special") ||
- title.toLowerCase().includes("ova"),
- )
- )
- continue;
-
- const normalisedTitles = titles.map(normalizeTitle);
-
- for (const normalisedTitle of normalisedTitles) {
- const distance = stringSimilarity.compareTwoStrings(
- normalisedMatchFor,
- normalisedTitle,
- );
-
- if (distance <= bestDistance) continue;
-
- const wordMatch =
- matchForWords.every((word) =>
- normalisedTitles.some((t) => t.includes(word)),
- ) || normalisedTitles.some((t) => t.includes(normalisedMatchFor));
-
- if (wordMatch) {
- bestDistance = distance;
- bestFitMedia = m;
-
- if (distance === 1) break;
- }
- }
-
- if (bestDistance === 1) break;
- }
-
- findClosestMediaCache.set(matchFor, bestFitMedia);
-
- return bestFitMedia as Media | null;
-};
-
-export const clearClosestMediaCache = () => findClosestMediaCache.clear();
-
-const getScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => {
- const cached = scheduleIndexCache.get(subsPlease);
-
- if (cached) return cached;
-
- const built = buildScheduleIndex(subsPlease);
-
- scheduleIndexCache.set(subsPlease, built);
-
- return built;
-};
-
-const buildInjectAiringTimeCacheKey = (
- anime: Media,
- scheduleVersion: string,
- displayNativeCountdown: boolean,
-) =>
- [
- anime.id,
- anime.status,
- anime.mediaListEntry?.status || "",
- anime.mediaListEntry?.progress || 0,
- anime.mediaListEntry?.updatedAt || 0,
- anime.nextAiringEpisode?.episode || 0,
- anime.nextAiringEpisode?.airingAt || 0,
- displayNativeCountdown ? 1 : 0,
- scheduleVersion,
- animeTitleFingerprint(anime),
- ].join(":");
-
-const cloneInjectedMedia = (media: Media): Media =>
- ({
- ...media,
- mediaListEntry: media.mediaListEntry
- ? {
- ...media.mediaListEntry,
- startedAt: { ...media.mediaListEntry.startedAt },
- completedAt: { ...media.mediaListEntry.completedAt },
- customLists: { ...media.mediaListEntry.customLists },
- }
- : undefined,
- nextAiringEpisode: media.nextAiringEpisode
- ? { ...media.nextAiringEpisode }
- : undefined,
- }) as Media;
-
-export const injectAiringTime = (
- anime: Media,
- subsPlease: SubsPlease | null,
-) => {
- if (season() !== anime.season) return anime;
-
- const displayNativeCountdown = get(settings).displayNativeCountdown;
- const scheduleVersion = subsPlease
- ? getScheduleIndex(subsPlease).version
- : "native-only";
- const cacheKey = buildInjectAiringTimeCacheKey(
- anime,
- scheduleVersion,
- displayNativeCountdown,
- );
- const cached = injectAiringTimeCache.get(cacheKey);
-
- if (cached) return cloneInjectedMedia(cached);
-
- const airingAt = anime.nextAiringEpisode?.airingAt;
- const now = new Date();
- // const nativeUntilAiring = airingAt
- // ? Math.round((airingAt - Date.now() / 1000) * 100) / 100
- // : undefined;
- const nativeTime = new Date(airingAt ? airingAt * 1000 : 0);
- let untilAiring: number | undefined;
- let time = new Date(airingAt ? airingAt * 1000 : 0);
- let nextEpisode = anime.nextAiringEpisode?.episode || 0;
- let nativeEpisode = nextEpisode;
-
- if (!(displayNativeCountdown || !subsPlease)) {
- const scheduleIndex = getScheduleIndex(subsPlease);
-
- if ((anime.nextAiringEpisode?.episode || 0) > 1) {
- const foundTime: Time | null = findClosestMatch(scheduleIndex, anime);
-
- if (foundTime) {
- untilAiring = secondsUntil(
- (foundTime as Time).time,
- (foundTime as Time).day,
- );
- time = new Date(Date.now() + untilAiring * 1000);
- }
- }
- }
-
- const SEVEN_DAYS = 7 * 24 * 60 * 60 * 1000;
-
- const nowEpochSeconds = Date.now() / 1000;
- const nativeAheadSeconds =
- nativeTime.getTime() / 1000 - time.getTime() / 1000;
-
- if (
- nativeAheadSeconds > 0 &&
- nativeAheadSeconds <= MAX_EPISODE_SHIFT_WINDOW_SECONDS &&
- nativeTime.getTime() / 1000 > nowEpochSeconds + STALE_AIRING_GRACE_SECONDS
- ) {
- nextEpisode -= 1;
- nativeEpisode = nextEpisode;
- }
-
- if (nativeTime.getTime() - now.getTime() > SEVEN_DAYS) {
- const beforeTime = time;
-
- time = nativeTime;
-
- time.setHours(beforeTime.getHours());
- time.setMinutes(beforeTime.getMinutes());
- }
-
- const injected = {
- ...anime,
- nextAiringEpisode: {
- episode: nextEpisode,
- airingAt: time.getTime() / 1000,
- nativeAiringAt: nativeTime.getTime() / 1000,
- nativeEpisode,
- },
- } as Media;
-
- const cachedValue = cloneInjectedMedia(injected);
-
- setBoundedCacheValue(
- injectAiringTimeCache,
- cacheKey,
- cachedValue,
- MAX_INJECT_CACHE_ENTRIES,
- );
-
- return cloneInjectedMedia(cachedValue);
-};
-
-export const clearInjectAiringTimeCache = () => injectAiringTimeCache.clear();