aboutsummaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorFuwn <[email protected]>2026-03-01 13:11:08 -0800
committerFuwn <[email protected]>2026-03-01 13:11:08 -0800
commit78c019540c1aa8ba0f3f061e48da58cf8c4febd4 (patch)
tree57c20417cf1f210f66827bbc35cd3a3ee73ba11a /src/lib
parentfeat(match): skip ambiguous subtitle matches via score margin (diff)
downloaddue.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.tar.xz
due.moe-78c019540c1aa8ba0f3f061e48da58cf8c4febd4.zip
perf(match): pre-index subtitle schedule by day and token
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/Media/Anime/Airing/Subtitled/match.ts108
1 files changed, 84 insertions, 24 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts
index 661d0d23..2281aca8 100644
--- a/src/lib/Media/Anime/Airing/Subtitled/match.ts
+++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts
@@ -12,6 +12,22 @@ export interface Time {
day: string;
}
+interface IndexedTime {
+ time: Time;
+ normalizedTitle: string;
+ tokens: string[];
+}
+
+interface DayScheduleIndex {
+ entries: IndexedTime[];
+ exactTitleIndex: Map<string, number[]>;
+ tokenIndex: Map<string, number[]>;
+}
+
+interface ScheduleIndex {
+ byDay: Map<string, DayScheduleIndex>;
+}
+
const secondsUntil = (targetTime: string, targetDay: string) => {
const now = new Date();
const [targetHour, targetMinute] = targetTime.split(':').map(Number);
@@ -108,21 +124,61 @@ const calculateWeightedSimilarity = (title1: string, title2: string): Similarity
};
};
-export const findClosestMatch = (times: Time[], anime: Media): Time | null => {
+const indexPush = (index: Map<string, number[]>, key: string, entryIndex: number) => {
+ const existing = index.get(key);
+
+ if (existing) existing.push(entryIndex);
+ else index.set(key, [entryIndex]);
+};
+
+const buildScheduleIndex = (subsPlease: SubsPlease): ScheduleIndex => {
+ const byDay = new Map<string, DayScheduleIndex>();
+
+ for (const [day, value] of Object.entries(subsPlease.schedule)) {
+ const flattenedValue = Array.isArray(value) ? value.flat() : [];
+ const dayIndex: DayScheduleIndex = {
+ entries: [],
+ exactTitleIndex: new Map<string, number[]>(),
+ tokenIndex: new Map<string, number[]>()
+ };
+
+ for (const scheduleTime of flattenedValue) {
+ const time = {
+ title: scheduleTime.title,
+ time: scheduleTime.time,
+ day
+ };
+ const normalizedTitle = preprocessTitle(time.title);
+ const tokens = normalizedTitle.split(' ').filter(isMeaningfulToken);
+ const entryIndex = dayIndex.entries.length;
+
+ dayIndex.entries.push({
+ time,
+ normalizedTitle,
+ tokens
+ });
+ indexPush(dayIndex.exactTitleIndex, normalizedTitle, entryIndex);
+
+ for (const token of tokens) indexPush(dayIndex.tokenIndex, token, entryIndex);
+ }
+
+ byDay.set(day, dayIndex);
+ }
+
+ return { byDay };
+};
+
+export const findClosestMatch = (scheduleIndex: ScheduleIndex, anime: Media): Time | null => {
if (excludeMatch.includes(anime.id)) return null;
const airingDay = new Date((anime.nextAiringEpisode?.airingAt || 0) * 1000).toLocaleString(
'en-US',
{ weekday: 'long' }
);
- const dayTimes = times.filter((time) => time.day === airingDay);
+ const dayIndex = scheduleIndex.byDay.get(airingDay);
- if (dayTimes.length === 0) return null;
+ if (!dayIndex || dayIndex.entries.length === 0) return null;
- const preprocessedTimes = dayTimes.map((time) => ({
- time,
- normalized: preprocessTitle(time.title)
- }));
let bestMatch: Time | null = null;
let bestScore = 0;
let secondBestScore = 0;
@@ -134,16 +190,32 @@ export const findClosestMatch = (times: Time[], anime: Media): Time | null => {
if (searchTitle.includes('OVA') || searchTitle.includes('Special')) continue;
const normalizedSearchTitle = preprocessTitle(searchTitle);
+ const searchTokens = normalizedSearchTitle.split(' ').filter(isMeaningfulToken);
+ const candidateIndexSet = new Set<number>();
+
+ for (const token of searchTokens) {
+ for (const candidateIndex of dayIndex.tokenIndex.get(token) || [])
+ candidateIndexSet.add(candidateIndex);
+ }
+
+ const candidateIndexes =
+ candidateIndexSet.size > 0
+ ? [...candidateIndexSet]
+ : dayIndex.entries.map((_, entryIndex) => entryIndex);
- for (const { time, normalized } of preprocessedTimes) {
- const similarity = calculateWeightedSimilarity(normalizedSearchTitle, normalized);
+ for (const candidateIndex of candidateIndexes) {
+ const candidateEntry = dayIndex.entries[candidateIndex];
+ const similarity = calculateWeightedSimilarity(
+ normalizedSearchTitle,
+ candidateEntry.normalizedTitle
+ );
if (similarity.score > bestScore) {
secondBestScore = bestScore;
bestScore = similarity.score;
bestTokenOverlap = similarity.tokenOverlap;
bestNumericTokenOverlap = similarity.numericTokenOverlap;
- bestMatch = time;
+ bestMatch = candidateEntry.time;
} else if (similarity.score > secondBestScore) {
secondBestScore = similarity.score;
}
@@ -236,22 +308,10 @@ export const injectAiringTime = (anime: Media, subsPlease: SubsPlease | null) =>
// || !(nativeUntilAiring !== undefined && nativeUntilAiring < 24 * 60 * 60)
)
) {
- const times: Time[] = [];
-
- for (const [key, value] of Object.entries(subsPlease.schedule)) {
- const flattenedValue = Array.isArray(value) ? value.flat() : [];
-
- for (const time of flattenedValue) {
- times.push({
- title: time.title,
- time: time.time,
- day: key
- });
- }
- }
+ const scheduleIndex = buildScheduleIndex(subsPlease);
if ((anime.nextAiringEpisode?.episode || 0) > 1) {
- const foundTime: Time | null = findClosestMatch(times, anime);
+ const foundTime: Time | null = findClosestMatch(scheduleIndex, anime);
if (foundTime) {
untilAiring = secondsUntil((foundTime as Time).time, (foundTime as Time).day);