diff options
| author | Fuwn <[email protected]> | 2026-03-01 13:11:08 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-03-01 13:11:08 -0800 |
| commit | 78c019540c1aa8ba0f3f061e48da58cf8c4febd4 (patch) | |
| tree | 57c20417cf1f210f66827bbc35cd3a3ee73ba11a /src/lib | |
| parent | feat(match): skip ambiguous subtitle matches via score margin (diff) | |
| download | due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.tar.xz due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.zip | |
perf(match): pre-index subtitle schedule by day and token
Diffstat (limited to 'src/lib')
| -rw-r--r-- | src/lib/Media/Anime/Airing/Subtitled/match.ts | 108 |
1 files changed, 84 insertions, 24 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts index 661d0d23..2281aca8 100644 --- a/src/lib/Media/Anime/Airing/Subtitled/match.ts +++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts @@ -12,6 +12,22 @@ export interface Time { day: string; } +interface IndexedTime { + time: Time; + normalizedTitle: string; + tokens: string[]; +} + +interface DayScheduleIndex { + entries: IndexedTime[]; + exactTitleIndex: Map<string, number[]>; + tokenIndex: Map<string, number[]>; +} + +interface ScheduleIndex { + byDay: Map<string, DayScheduleIndex>; +} + const secondsUntil = (targetTime: string, targetDay: string) => { const now = new Date(); const [targetHour, targetMinute] = targetTime.split(':').map(Number); @@ -108,21 +124,61 @@ const calculateWeightedSimilarity = (title1: string, title2: string): Similarity }; }; -export const findClosestMatch = (times: Time[], anime: Media): Time | null => { +const indexPush = (index: Map<string, number[]>, key: string, entryIndex: number) => { + const existing = index.get(key); + + if (existing) existing.push(entryIndex); + else index.set(key, [entryIndex]); +}; + +const buildScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => { + const byDay = new Map<string, DayScheduleIndex>(); + + for (const [day, value] of Object.entries(subsPlease.schedule)) { + const flattenedValue = Array.isArray(value) ? value.flat() : []; + const dayIndex: DayScheduleIndex = { + entries: [], + exactTitleIndex: new Map<string, number[]>(), + tokenIndex: new Map<string, number[]>() + }; + + for (const scheduleTime of flattenedValue) { + const time = { + title: scheduleTime.title, + time: scheduleTime.time, + day + }; + const normalizedTitle = preprocessTitle(time.title); + const tokens = normalizedTitle.split(' ').filter(isMeaningfulToken); + const entryIndex = dayIndex.entries.length; + + dayIndex.entries.push({ + time, + normalizedTitle, + tokens + }); + indexPush(dayIndex.exactTitleIndex, normalizedTitle, entryIndex); + + for (const token of tokens) indexPush(dayIndex.tokenIndex, token, entryIndex); + } + + byDay.set(day, dayIndex); + } + + return { byDay }; +}; + +export const findClosestMatch = (scheduleIndex: ScheduleIndex, anime: Media): Time | null => { if (excludeMatch.includes(anime.id)) return null; const airingDay = new Date((anime.nextAiringEpisode?.airingAt || 0) * 1000).toLocaleString( 'en-US', { weekday: 'long' } ); - const dayTimes = times.filter((time) => time.day === airingDay); + const dayIndex = scheduleIndex.byDay.get(airingDay); - if (dayTimes.length === 0) return null; + if (!dayIndex || dayIndex.entries.length === 0) return null; - const preprocessedTimes = dayTimes.map((time) => ({ - time, - normalized: preprocessTitle(time.title) - })); let bestMatch: Time | null = null; let bestScore = 0; let secondBestScore = 0; @@ -134,16 +190,32 @@ export const findClosestMatch = (times: Time[], anime: Media): Time | null => { if (searchTitle.includes('OVA') || searchTitle.includes('Special')) continue; const normalizedSearchTitle = preprocessTitle(searchTitle); + const searchTokens = normalizedSearchTitle.split(' ').filter(isMeaningfulToken); + const candidateIndexSet = new Set<number>(); + + for (const token of searchTokens) { + for (const candidateIndex of dayIndex.tokenIndex.get(token) || []) + candidateIndexSet.add(candidateIndex); + } + + const candidateIndexes = + candidateIndexSet.size > 0 + ? [...candidateIndexSet] + : dayIndex.entries.map((_, entryIndex) => entryIndex); - for (const { time, normalized } of preprocessedTimes) { - const similarity = calculateWeightedSimilarity(normalizedSearchTitle, normalized); + for (const candidateIndex of candidateIndexes) { + const candidateEntry = dayIndex.entries[candidateIndex]; + const similarity = calculateWeightedSimilarity( + normalizedSearchTitle, + candidateEntry.normalizedTitle + ); if (similarity.score > bestScore) { secondBestScore = bestScore; bestScore = similarity.score; bestTokenOverlap = similarity.tokenOverlap; bestNumericTokenOverlap = similarity.numericTokenOverlap; - bestMatch = time; + bestMatch = candidateEntry.time; } else if (similarity.score > secondBestScore) { secondBestScore = similarity.score; } @@ -236,22 +308,10 @@ export const injectAiringTime = (anime: Media, subsPlease: SubsPlease | null) => // || !(nativeUntilAiring !== undefined && nativeUntilAiring < 24 * 60 * 60) ) ) { - const times: Time[] = []; - - for (const [key, value] of Object.entries(subsPlease.schedule)) { - const flattenedValue = Array.isArray(value) ? value.flat() : []; - - for (const time of flattenedValue) { - times.push({ - title: time.title, - time: time.time, - day: key - }); - } - } + const scheduleIndex = buildScheduleIndex(subsPlease); if ((anime.nextAiringEpisode?.episode || 0) > 1) { - const foundTime: Time | null = findClosestMatch(times, anime); + const foundTime: Time | null = findClosestMatch(scheduleIndex, anime); if (foundTime) { untilAiring = secondsUntil((foundTime as Time).time, (foundTime as Time).day); |