diff options
| author | Fuwn <[email protected]> | 2026-03-01 11:32:01 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-03-01 11:32:01 -0800 |
| commit | 3cbf4df4ceb3d8923cc371ec1bb5d66177411a87 (patch) | |
| tree | dd3064b9bd76c9202166eb6f1ed6b335323801fe /src | |
| parent | chore(supabase): Move Supabase schema path (diff) | |
| download | due.moe-3cbf4df4ceb3d8923cc371ec1bb5d66177411a87.tar.xz due.moe-3cbf4df4ceb3d8923cc371ec1bb5d66177411a87.zip | |
fix(match): Harden calculateWeightedSimilarity
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/Media/Anime/Airing/Subtitled/match.ts | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts index fb494539..d6168d2c 100644 --- a/src/lib/Media/Anime/Airing/Subtitled/match.ts +++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts @@ -42,9 +42,35 @@ const preprocessTitle = (title: string): string => { .join(' '); }; +const NON_DISTINCTIVE_TOKENS = new Set([ + 'a', + 'and', + 'de', + 'e', + 'for', + 'ga', + 'in', + 'na', + 'ni', + 'no', + 'o', + 'of', + 'on', + 'the', + 'to', + 'wa', + 'wo' +]); + +const isMeaningfulToken = (token: string): boolean => + /^\d+$/.test(token) || (token.length >= 3 && !NON_DISTINCTIVE_TOKENS.has(token)); + const calculateWeightedSimilarity = (title1: string, title2: string): number => { - const tokens1 = title1.split(' '); - const tokens2 = title2.split(' '); + const tokens1 = title1.split(' ').filter(isMeaningfulToken); + const tokens2 = title2.split(' ').filter(isMeaningfulToken); + + if (tokens1.length === 0 || tokens2.length === 0) return 0; + const set2 = new Set(tokens2); let score = 0; @@ -54,7 +80,7 @@ const calculateWeightedSimilarity = (title1: string, title2: string): number => } }); - return score / (Math.max(tokens1.length, tokens2.length) * 2); + return score / ((Math.max(tokens1.length, tokens2.length) || 1) * 2); }; export const findClosestMatch = (times: Time[], anime: Media): Time | null => { |