perf(match): pre-index subtitle schedule by day and token

author: Fuwn <[email protected]> 2026-03-01 13:11:08 -0800
committer: Fuwn <[email protected]> 2026-03-01 13:11:08 -0800
commit: 78c019540c1aa8ba0f3f061e48da58cf8c4febd4 (patch)
tree: 57c20417cf1f210f66827bbc35cd3a3ee73ba11a /src/lib
parent: feat(match): skip ambiguous subtitle matches via score margin (diff)
download: due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.tar.xz
due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.zip
1 files changed, 84 insertions, 24 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts
index 661d0d23..2281aca8 100644
--- a/src/lib/Media/Anime/Airing/Subtitled/match.ts
+++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts
@@ -12,6 +12,22 @@ export interface Time {
   day: string;
 }
 
+interface IndexedTime {
+  time: Time;
+  normalizedTitle: string;
+  tokens: string[];
+}
+
+interface DayScheduleIndex {
+  entries: IndexedTime[];
+  exactTitleIndex: Map<string, number[]>;
+  tokenIndex: Map<string, number[]>;
+}
+
+interface ScheduleIndex {
+  byDay: Map<string, DayScheduleIndex>;
+}
+
 const secondsUntil = (targetTime: string, targetDay: string) => {
   const now = new Date();
   const [targetHour, targetMinute] = targetTime.split(':').map(Number);
@@ -108,21 +124,61 @@ const calculateWeightedSimilarity = (title1: string, title2: string): Similarity
   };
 };
 
-export const findClosestMatch = (times: Time[], anime: Media): Time | null => {
+const indexPush = (index: Map<string, number[]>, key: string, entryIndex: number) => {
+  const existing = index.get(key);
+
+  if (existing) existing.push(entryIndex);
+  else index.set(key, [entryIndex]);
+};
+
+const buildScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => {
+  const byDay = new Map<string, DayScheduleIndex>();
+
+  for (const [day, value] of Object.entries(subsPlease.schedule)) {
+    const flattenedValue = Array.isArray(value) ? value.flat() : [];
+    const dayIndex: DayScheduleIndex = {
+      entries: [],
+      exactTitleIndex: new Map<string, number[]>(),
+      tokenIndex: new Map<string, number[]>()
+    };
+
+    for (const scheduleTime of flattenedValue) {
+      const time = {
+        title: scheduleTime.title,
+        time: scheduleTime.time,
+        day
+      };
+      const normalizedTitle = preprocessTitle(time.title);
+      const tokens = normalizedTitle.split(' ').filter(isMeaningfulToken);
+      const entryIndex = dayIndex.entries.length;
+
+      dayIndex.entries.push({
+        time,
+        normalizedTitle,
+        tokens
+      });
+      indexPush(dayIndex.exactTitleIndex, normalizedTitle, entryIndex);
+
+      for (const token of tokens) indexPush(dayIndex.tokenIndex, token, entryIndex);
+    }
+
+    byDay.set(day, dayIndex);
+  }
+
+  return { byDay };
+};
+
+export const findClosestMatch = (scheduleIndex: ScheduleIndex, anime: Media): Time | null => {
   if (excludeMatch.includes(anime.id)) return null;
 
   const airingDay = new Date((anime.nextAiringEpisode?.airingAt || 0) * 1000).toLocaleString(
     'en-US',
     { weekday: 'long' }
   );
-  const dayTimes = times.filter((time) => time.day === airingDay);
+  const dayIndex = scheduleIndex.byDay.get(airingDay);
 
-  if (dayTimes.length === 0) return null;
+  if (!dayIndex || dayIndex.entries.length === 0) return null;
 
-  const preprocessedTimes = dayTimes.map((time) => ({
-    time,
-    normalized: preprocessTitle(time.title)
-  }));
   let bestMatch: Time | null = null;
   let bestScore = 0;
   let secondBestScore = 0;
@@ -134,16 +190,32 @@ export const findClosestMatch = (times: Time[], anime: Media): Time | null => {
     if (searchTitle.includes('OVA') || searchTitle.includes('Special')) continue;
 
     const normalizedSearchTitle = preprocessTitle(searchTitle);
+    const searchTokens = normalizedSearchTitle.split(' ').filter(isMeaningfulToken);
+    const candidateIndexSet = new Set<number>();
+
+    for (const token of searchTokens) {
+      for (const candidateIndex of dayIndex.tokenIndex.get(token) || [])
+        candidateIndexSet.add(candidateIndex);
+    }
+
+    const candidateIndexes =
+      candidateIndexSet.size > 0
+        ? [...candidateIndexSet]
+        : dayIndex.entries.map((_, entryIndex) => entryIndex);
 
-    for (const { time, normalized } of preprocessedTimes) {
-      const similarity = calculateWeightedSimilarity(normalizedSearchTitle, normalized);
+    for (const candidateIndex of candidateIndexes) {
+      const candidateEntry = dayIndex.entries[candidateIndex];
+      const similarity = calculateWeightedSimilarity(
+        normalizedSearchTitle,
+        candidateEntry.normalizedTitle
+      );
 
       if (similarity.score > bestScore) {
         secondBestScore = bestScore;
         bestScore = similarity.score;
         bestTokenOverlap = similarity.tokenOverlap;
         bestNumericTokenOverlap = similarity.numericTokenOverlap;
-        bestMatch = time;
+        bestMatch = candidateEntry.time;
       } else if (similarity.score > secondBestScore) {
         secondBestScore = similarity.score;
       }
@@ -236,22 +308,10 @@ export const injectAiringTime = (anime: Media, subsPlease: SubsPlease | null) =>
       // || !(nativeUntilAiring !== undefined && nativeUntilAiring < 24 * 60 * 60)
     )
   ) {
-    const times: Time[] = [];
-
-    for (const [key, value] of Object.entries(subsPlease.schedule)) {
-      const flattenedValue = Array.isArray(value) ? value.flat() : [];
-
-      for (const time of flattenedValue) {
-        times.push({
-          title: time.title,
-          time: time.time,
-          day: key
-        });
-      }
-    }
+    const scheduleIndex = buildScheduleIndex(subsPlease);
 
     if ((anime.nextAiringEpisode?.episode || 0) > 1) {
-      const foundTime: Time | null = findClosestMatch(times, anime);
+      const foundTime: Time | null = findClosestMatch(scheduleIndex, anime);
 
       if (foundTime) {
         untilAiring = secondsUntil((foundTime as Time).time, (foundTime as Time).day);
author	Fuwn <[email protected]>	2026-03-01 13:11:08 -0800
committer	Fuwn <[email protected]>	2026-03-01 13:11:08 -0800
commit	78c019540c1aa8ba0f3f061e48da58cf8c4febd4 (patch)
tree	57c20417cf1f210f66827bbc35cd3a3ee73ba11a /src/lib
parent	feat(match): skip ambiguous subtitle matches via score margin (diff)
download	due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.tar.xz due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.zip