diff options
| author | Fuwn <[email protected]> | 2024-02-06 05:58:57 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2024-02-06 05:58:57 -0800 |
| commit | e5713cbc2109e3775153e3092199262d2d259bfd (patch) | |
| tree | dbaf64dfe22000e411ef252f30295a90520507f2 /src/lib | |
| parent | fix(match): revert to old matcher (diff) | |
| download | due.moe-e5713cbc2109e3775153e3092199262d2d259bfd.tar.xz due.moe-e5713cbc2109e3775153e3092199262d2d259bfd.zip | |
fix(match): stronger matcher
Diffstat (limited to 'src/lib')
| -rw-r--r-- | src/lib/Media/Anime/Airing/Subtitled/match.ts | 80 |
1 files changed, 38 insertions, 42 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts index b4176080..c9e73e81 100644 --- a/src/lib/Media/Anime/Airing/Subtitled/match.ts +++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts @@ -30,60 +30,55 @@ const secondsUntil = (targetTime: string, targetDay: string) => { return secondsDifference > 0 ? secondsDifference : secondsDifference + 7 * 24 * 60 * 60; }; -const normalizeTitle = (title: string | null) => - (title || '') +const preprocessTitle = (title: string): string => { + return title .toLowerCase() - .replace(/\b(s|season|part|cour)\s*\d+/g, '') - .replace(/[\W_]+/g, ' ') - .trim(); + .replace(/\b(season|s|part|cour)\b/g, ' ') + .replace(/[^a-z0-9\s]/gi, '') + .trim() + .split(/\s+/) + .join(' '); +}; + +const calculateWeightedSimilarity = (title1: string, title2: string): number => { + const tokens1 = title1.split(' '); + const tokens2 = title2.split(' '); + const set2 = new Set(tokens2); + let score = 0; + + tokens1.forEach((token) => { + if (set2.has(token)) { + score += /^\d+$/.test(token) ? 2 : 1; + } + }); + + return score / (Math.max(tokens1.length, tokens2.length) * 2); +}; const findClosestMatch = (times: Time[], titles: string[]): Time | null => { let bestMatch: Time | null = null; - let highestScore = 0; + let bestScore = 0; - times.forEach((time) => { - titles.map(normalizeTitle).forEach((title) => { - const similarityScore = stringSimilarity.compareTwoStrings(normalizeTitle(time.title), title); + titles.filter(Boolean).forEach((searchTitle) => { + if (searchTitle.includes('OVA') || searchTitle.includes('Special')) return; - if (similarityScore > highestScore) { - highestScore = similarityScore; - bestMatch = time; - } - }); - }); + const normalizedSearchTitle = preprocessTitle(searchTitle); - return bestMatch; -}; + times.forEach((time) => { + const normalizedTimeTitle = preprocessTitle(time.title); + const similarityScore = calculateWeightedSimilarity( + normalizedSearchTitle, + normalizedTimeTitle + ); -export const findClosestMedia = (media: Media[], matchFor: string) => { - if (!matchFor) return null; - - let bestFitMedia: Media | null = null; - let smallestDistance = -Infinity; - - media.forEach((m) => { - const titles = [m.title.romaji, m.title.english, ...m.synonyms].filter(Boolean); - - titles.forEach((title) => { - const normalisedTitle = normalizeTitle(title); - const normalisedMatchFor = normalizeTitle(matchFor); - const distance = stringSimilarity.compareTwoStrings(normalisedMatchFor, normalisedTitle); - - if ( - distance > smallestDistance && - (normalisedMatchFor - .split(' ') - .filter((word) => titles.some((title) => normalizeTitle(title).includes(word))).length >= - normalisedMatchFor.split(' ').length || - titles.some((title) => normalizeTitle(title).includes(normalisedMatchFor))) - ) { - smallestDistance = distance; - bestFitMedia = m; + if (similarityScore > bestScore) { + bestScore = similarityScore; + bestMatch = time; } }); }); - return bestFitMedia as Media | null; + return bestMatch; }; export const injectAiringTime = (anime: Media, subsPlease: SubsPlease | null) => { @@ -129,6 +124,7 @@ export const injectAiringTime = (anime: Media, subsPlease: SubsPlease | null) => } if ( + anime.nextAiringEpisode?.nativeAiringAt && airingAt && nativeTime.getTime() !== time.getTime() && nativeTime.getTime() - time.getTime() > 24 * 60 * 60 * 1000 |