diff options
Diffstat (limited to 'src/lib/Media/Anime/Airing/Subtitled/match.ts')
| -rw-r--r-- | src/lib/Media/Anime/Airing/Subtitled/match.ts | 652 |
1 files changed, 0 insertions, 652 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts deleted file mode 100644 index a89ac8ac..00000000 --- a/src/lib/Media/Anime/Airing/Subtitled/match.ts +++ /dev/null @@ -1,652 +0,0 @@ -import stringSimilarity from "string-similarity"; -import { get } from "svelte/store"; -import excludeMatch from "$lib/Data/Static/matchExclude.json"; -import type { SubsPlease } from "$lib/Media/Anime/Airing/Subtitled/subsPlease"; -import settings from "$stores/settings"; -import type { Media } from "../../../../Data/AniList/media"; -import { season } from "../../season"; - -export interface Time { - title: string; - time: string; - day: string; -} - -interface IndexedTime { - time: Time; - normalizedTitle: string; - tokens: string[]; -} - -interface DayScheduleIndex { - entries: IndexedTime[]; - exactTitleIndex: Map<string, number[]>; - tokenIndex: Map<string, number[]>; -} - -interface ScheduleIndex { - byDay: Map<string, DayScheduleIndex>; - version: string; -} - -const secondsUntil = (targetTime: string, targetDay: string) => { - const now = new Date(); - const [targetHour, targetMinute] = targetTime.split(":").map(Number); - let dayDifference = - [ - "Sunday", - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", - ].indexOf(targetDay) - now.getDay(); - - if (dayDifference < 0) dayDifference += 7; - - const targetDate = new Date(now); - - targetDate.setDate(now.getDate() + dayDifference); - targetDate.setHours(targetHour, targetMinute, 0, 0); - - const secondsDifference = (Number(targetDate) - Number(now)) / 1000; - - return secondsDifference > 0 - ? secondsDifference - : secondsDifference + 7 * 24 * 60 * 60; -}; - -const preprocessTitle = (title: string): string => { - return title - .toLowerCase() - .replace(/\b(season|s|part|cour)(\d+)\b/g, " $2 ") - .replace(/\b(season|s|part|cour)\b/g, " ") - .replace(/[^a-z0-9\s]/gi, "") - .trim() - .split(/\s+/) - .join(" "); -}; - -const NON_DISTINCTIVE_TOKENS = new Set([ - "a", - "and", - "de", - "e", - "for", - "ga", - "in", - "na", - "ni", - "no", - "o", - "of", - "on", - "the", - "to", - "wa", - "wo", -]); - -const isMeaningfulToken = (token: string): boolean => - /^\d+$/.test(token) || - (token.length >= 3 && !NON_DISTINCTIVE_TOKENS.has(token)); - -const MIN_MATCH_SCORE = 0.3; -const MIN_TOKEN_OVERLAP = 2; -const MIN_MATCH_MARGIN = 0.08; -const FALLBACK_MIN_SCORE = 0.82; -const FALLBACK_MIN_MARGIN = 0.08; -const MAX_MATCH_CACHE_ENTRIES = 10_000; -const MAX_INJECT_CACHE_ENTRIES = 10_000; -const STALE_AIRING_GRACE_SECONDS = 5 * 60; -const MAX_EPISODE_SHIFT_WINDOW_SECONDS = 8 * 24 * 60 * 60; - -interface SimilarityAnalysis { - score: number; - tokenOverlap: number; - numericTokenOverlap: number; -} - -const calculateWeightedSimilarity = ( - title1: string, - title2: string, -): SimilarityAnalysis => { - const tokens1 = title1.split(" ").filter(isMeaningfulToken); - const tokens2 = title2.split(" ").filter(isMeaningfulToken); - - if (tokens1.length === 0 || tokens2.length === 0) - return { - score: 0, - tokenOverlap: 0, - numericTokenOverlap: 0, - }; - - const set2 = new Set(tokens2); - let score = 0; - let tokenOverlap = 0; - let numericTokenOverlap = 0; - const numericTokens1 = tokens1.filter((token) => /^\d+$/.test(token)); - const numericTokens2 = tokens2.filter((token) => /^\d+$/.test(token)); - - tokens1.forEach((token) => { - if (set2.has(token)) { - tokenOverlap += 1; - - if (/^\d+$/.test(token)) numericTokenOverlap += 1; - - score += /^\d+$/.test(token) ? 2 : 1; - } - }); - - let finalScore = - (score / ((Math.max(tokens1.length, tokens2.length) || 1) * 2)) * 0.7 + - stringSimilarity.compareTwoStrings(title1, title2) * 0.3; - - if ( - numericTokens1.length > 0 && - numericTokens2.length > 0 && - numericTokenOverlap === 0 - ) - finalScore *= 0.5; - - return { - score: finalScore, - tokenOverlap, - numericTokenOverlap, - }; -}; - -const indexPush = ( - index: Map<string, number[]>, - key: string, - entryIndex: number, -) => { - const existing = index.get(key); - - if (existing) existing.push(entryIndex); - else index.set(key, [entryIndex]); -}; - -const scheduleIndexCache = new WeakMap<SubsPlease, ScheduleIndex>(); -const closestMatchCache = new Map<string, Time | null>(); -const injectAiringTimeCache = new Map<string, Media>(); - -const hashString = (input: string): string => { - let hash = 2166136261; - - for (let index = 0; index < input.length; index += 1) { - hash ^= input.charCodeAt(index); - hash = Math.imul(hash, 16777619); - } - - return (hash >>> 0).toString(36); -}; - -const setBoundedCacheValue = <T>( - cache: Map<string, T>, - key: string, - value: T, - maxEntries: number, -) => { - if (cache.size >= maxEntries) cache.clear(); - - cache.set(key, value); -}; - -const animeTitleFingerprint = (anime: Media) => - [anime.title.romaji, anime.title.english, ...anime.synonyms] - .filter(Boolean) - .map(preprocessTitle) - .join("|"); - -const localTimeZone = () => - Intl.DateTimeFormat().resolvedOptions().timeZone || "local"; - -const airingDayOf = (airingAt: number | undefined) => - new Date((airingAt || 0) * 1000).toLocaleString("en-US", { weekday: "long" }); - -const fallbackClosestMatch = ( - dayIndex: DayScheduleIndex, - searchTitles: string[], -): Time | null => { - let bestMatch: Time | null = null; - let bestScore = 0; - let secondBestScore = 0; - - for (const searchTitle of searchTitles) { - if (searchTitle.includes("OVA") || searchTitle.includes("Special")) - continue; - - const normalizedSearchTitle = preprocessTitle(searchTitle); - - for (const candidateEntry of dayIndex.entries) { - const score = stringSimilarity.compareTwoStrings( - normalizedSearchTitle, - candidateEntry.normalizedTitle, - ); - - if (score > bestScore) { - secondBestScore = bestScore; - bestScore = score; - bestMatch = candidateEntry.time; - } else if (score > secondBestScore) { - secondBestScore = score; - } - } - } - - if (bestScore < FALLBACK_MIN_SCORE) return null; - if (bestScore - secondBestScore < FALLBACK_MIN_MARGIN) return null; - - return bestMatch; -}; - -const buildScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => { - const byDay = new Map<string, DayScheduleIndex>(); - const versionParts: string[] = []; - - for (const [day, value] of Object.entries(subsPlease.schedule)) { - const flattenedValue = Array.isArray(value) ? value.flat() : []; - - versionParts.push(day); - - const dayIndex: DayScheduleIndex = { - entries: [], - exactTitleIndex: new Map<string, number[]>(), - tokenIndex: new Map<string, number[]>(), - }; - - for (const scheduleTime of flattenedValue) { - const time = { - title: scheduleTime.title, - time: scheduleTime.time, - day, - }; - - versionParts.push(`${day}\u001f${time.title}\u001f${time.time}`); - - const normalizedTitle = preprocessTitle(time.title); - const tokens = normalizedTitle.split(" ").filter(isMeaningfulToken); - const entryIndex = dayIndex.entries.length; - - dayIndex.entries.push({ - time, - normalizedTitle, - tokens, - }); - indexPush(dayIndex.exactTitleIndex, normalizedTitle, entryIndex); - - for (const token of tokens) - indexPush(dayIndex.tokenIndex, token, entryIndex); - } - - byDay.set(day, dayIndex); - } - - return { - byDay, - version: hashString(`${subsPlease.tz}\u001e${versionParts.join("\u001d")}`), - }; -}; - -export const findClosestMatch = ( - scheduleIndex: ScheduleIndex, - anime: Media, -): Time | null => { - if (excludeMatch.includes(anime.id)) { - setBoundedCacheValue( - closestMatchCache, - `${anime.id}:excluded`, - null, - MAX_MATCH_CACHE_ENTRIES, - ); - - return null; - } - - const airingDay = airingDayOf(anime.nextAiringEpisode?.airingAt); - const cacheKey = `${anime.id}:${anime.nextAiringEpisode?.airingAt || 0}:${animeTitleFingerprint( - anime, - )}:${airingDay}:${localTimeZone()}:${scheduleIndex.version}`; - const cached = closestMatchCache.get(cacheKey); - - if (cached !== undefined) return cached; - const dayIndex = scheduleIndex.byDay.get(airingDay); - - if (!dayIndex || dayIndex.entries.length === 0) { - setBoundedCacheValue( - closestMatchCache, - cacheKey, - null, - MAX_MATCH_CACHE_ENTRIES, - ); - - return null; - } - - let bestMatch: Time | null = null; - let bestScore = 0; - let secondBestScore = 0; - let bestTokenOverlap = 0; - let bestNumericTokenOverlap = 0; - const searchTitles = [ - anime.title.romaji, - anime.title.english, - ...anime.synonyms, - ].filter(Boolean); - - for (const searchTitle of searchTitles) { - if (searchTitle.includes("OVA") || searchTitle.includes("Special")) - continue; - - const normalizedSearchTitle = preprocessTitle(searchTitle); - const exactMatchIndexes = dayIndex.exactTitleIndex.get( - normalizedSearchTitle, - ); - - if (exactMatchIndexes && exactMatchIndexes.length > 0) { - const exactMatch = dayIndex.entries[exactMatchIndexes[0]]; - - if (exactMatch) { - setBoundedCacheValue( - closestMatchCache, - cacheKey, - exactMatch.time, - MAX_MATCH_CACHE_ENTRIES, - ); - - return exactMatch.time; - } - } - - const searchTokens = normalizedSearchTitle - .split(" ") - .filter(isMeaningfulToken); - const candidateIndexSet = new Set<number>(); - - for (const token of searchTokens) { - for (const candidateIndex of dayIndex.tokenIndex.get(token) || []) - candidateIndexSet.add(candidateIndex); - } - - const candidateIndexes = - candidateIndexSet.size > 0 - ? [...candidateIndexSet] - : dayIndex.entries.map((_, entryIndex) => entryIndex); - - for (const candidateIndex of candidateIndexes) { - const candidateEntry = dayIndex.entries[candidateIndex]; - const similarity = calculateWeightedSimilarity( - normalizedSearchTitle, - candidateEntry.normalizedTitle, - ); - - if (similarity.score > bestScore) { - secondBestScore = bestScore; - bestScore = similarity.score; - bestTokenOverlap = similarity.tokenOverlap; - bestNumericTokenOverlap = similarity.numericTokenOverlap; - bestMatch = candidateEntry.time; - } else if (similarity.score > secondBestScore) { - secondBestScore = similarity.score; - } - } - } - - if (bestScore < MIN_MATCH_SCORE) { - const fallbackMatch = fallbackClosestMatch(dayIndex, searchTitles); - - setBoundedCacheValue( - closestMatchCache, - cacheKey, - fallbackMatch, - MAX_MATCH_CACHE_ENTRIES, - ); - - return fallbackMatch; - } - - if (bestScore - secondBestScore < MIN_MATCH_MARGIN) { - const fallbackMatch = fallbackClosestMatch(dayIndex, searchTitles); - - setBoundedCacheValue( - closestMatchCache, - cacheKey, - fallbackMatch, - MAX_MATCH_CACHE_ENTRIES, - ); - - return fallbackMatch; - } - - if (bestNumericTokenOverlap === 0 && bestTokenOverlap < MIN_TOKEN_OVERLAP) { - const fallbackMatch = fallbackClosestMatch(dayIndex, searchTitles); - - setBoundedCacheValue( - closestMatchCache, - cacheKey, - fallbackMatch, - MAX_MATCH_CACHE_ENTRIES, - ); - - return fallbackMatch; - } - - setBoundedCacheValue( - closestMatchCache, - cacheKey, - bestMatch, - MAX_MATCH_CACHE_ENTRIES, - ); - - return bestMatch; -}; - -const normalizeTitle = (title: string | null) => - (title || "") - .toLowerCase() - .replace(/\b(s|season|part|cour)\s*\d+/g, "") - .replace(/[\W_]+/g, " ") - .trim(); - -const findClosestMediaCache = new Map<string, Media | null>(); - -export const findClosestMedia = (media: Media[], matchFor: string) => { - if (!matchFor) return null; - - const cached = findClosestMediaCache.get(matchFor); - - if (cached !== undefined) return cached; - - const normalisedMatchFor = normalizeTitle(matchFor); - const matchForWords = normalisedMatchFor.split(" "); - let bestFitMedia: Media | null = null; - let bestDistance = -Infinity; - - for (const m of media) { - const titles = [m.title.romaji, m.title.english, ...m.synonyms].filter( - Boolean, - ); - - if ( - titles.some( - (title) => - title.toLowerCase().includes("special") || - title.toLowerCase().includes("ova"), - ) - ) - continue; - - const normalisedTitles = titles.map(normalizeTitle); - - for (const normalisedTitle of normalisedTitles) { - const distance = stringSimilarity.compareTwoStrings( - normalisedMatchFor, - normalisedTitle, - ); - - if (distance <= bestDistance) continue; - - const wordMatch = - matchForWords.every((word) => - normalisedTitles.some((t) => t.includes(word)), - ) || normalisedTitles.some((t) => t.includes(normalisedMatchFor)); - - if (wordMatch) { - bestDistance = distance; - bestFitMedia = m; - - if (distance === 1) break; - } - } - - if (bestDistance === 1) break; - } - - findClosestMediaCache.set(matchFor, bestFitMedia); - - return bestFitMedia as Media | null; -}; - -export const clearClosestMediaCache = () => findClosestMediaCache.clear(); - -const getScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => { - const cached = scheduleIndexCache.get(subsPlease); - - if (cached) return cached; - - const built = buildScheduleIndex(subsPlease); - - scheduleIndexCache.set(subsPlease, built); - - return built; -}; - -const buildInjectAiringTimeCacheKey = ( - anime: Media, - scheduleVersion: string, - displayNativeCountdown: boolean, -) => - [ - anime.id, - anime.status, - anime.mediaListEntry?.status || "", - anime.mediaListEntry?.progress || 0, - anime.mediaListEntry?.updatedAt || 0, - anime.nextAiringEpisode?.episode || 0, - anime.nextAiringEpisode?.airingAt || 0, - displayNativeCountdown ? 1 : 0, - scheduleVersion, - animeTitleFingerprint(anime), - ].join(":"); - -const cloneInjectedMedia = (media: Media): Media => - ({ - ...media, - mediaListEntry: media.mediaListEntry - ? { - ...media.mediaListEntry, - startedAt: { ...media.mediaListEntry.startedAt }, - completedAt: { ...media.mediaListEntry.completedAt }, - customLists: { ...media.mediaListEntry.customLists }, - } - : undefined, - nextAiringEpisode: media.nextAiringEpisode - ? { ...media.nextAiringEpisode } - : undefined, - }) as Media; - -export const injectAiringTime = ( - anime: Media, - subsPlease: SubsPlease | null, -) => { - if (season() !== anime.season) return anime; - - const displayNativeCountdown = get(settings).displayNativeCountdown; - const scheduleVersion = subsPlease - ? getScheduleIndex(subsPlease).version - : "native-only"; - const cacheKey = buildInjectAiringTimeCacheKey( - anime, - scheduleVersion, - displayNativeCountdown, - ); - const cached = injectAiringTimeCache.get(cacheKey); - - if (cached) return cloneInjectedMedia(cached); - - const airingAt = anime.nextAiringEpisode?.airingAt; - const now = new Date(); - // const nativeUntilAiring = airingAt - // ? Math.round((airingAt - Date.now() / 1000) * 100) / 100 - // : undefined; - const nativeTime = new Date(airingAt ? airingAt * 1000 : 0); - let untilAiring: number | undefined; - let time = new Date(airingAt ? airingAt * 1000 : 0); - let nextEpisode = anime.nextAiringEpisode?.episode || 0; - let nativeEpisode = nextEpisode; - - if (!(displayNativeCountdown || !subsPlease)) { - const scheduleIndex = getScheduleIndex(subsPlease); - - if ((anime.nextAiringEpisode?.episode || 0) > 1) { - const foundTime: Time | null = findClosestMatch(scheduleIndex, anime); - - if (foundTime) { - untilAiring = secondsUntil( - (foundTime as Time).time, - (foundTime as Time).day, - ); - time = new Date(Date.now() + untilAiring * 1000); - } - } - } - - const SEVEN_DAYS = 7 * 24 * 60 * 60 * 1000; - - const nowEpochSeconds = Date.now() / 1000; - const nativeAheadSeconds = - nativeTime.getTime() / 1000 - time.getTime() / 1000; - - if ( - nativeAheadSeconds > 0 && - nativeAheadSeconds <= MAX_EPISODE_SHIFT_WINDOW_SECONDS && - nativeTime.getTime() / 1000 > nowEpochSeconds + STALE_AIRING_GRACE_SECONDS - ) { - nextEpisode -= 1; - nativeEpisode = nextEpisode; - } - - if (nativeTime.getTime() - now.getTime() > SEVEN_DAYS) { - const beforeTime = time; - - time = nativeTime; - - time.setHours(beforeTime.getHours()); - time.setMinutes(beforeTime.getMinutes()); - } - - const injected = { - ...anime, - nextAiringEpisode: { - episode: nextEpisode, - airingAt: time.getTime() / 1000, - nativeAiringAt: nativeTime.getTime() / 1000, - nativeEpisode, - }, - } as Media; - - const cachedValue = cloneInjectedMedia(injected); - - setBoundedCacheValue( - injectAiringTimeCache, - cacheKey, - cachedValue, - MAX_INJECT_CACHE_ENTRIES, - ); - - return cloneInjectedMedia(cachedValue); -}; - -export const clearInjectAiringTimeCache = () => injectAiringTimeCache.clear(); |