aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFuwn <[email protected]>2026-03-01 11:32:01 -0800
committerFuwn <[email protected]>2026-03-01 11:32:01 -0800
commit3cbf4df4ceb3d8923cc371ec1bb5d66177411a87 (patch)
treedd3064b9bd76c9202166eb6f1ed6b335323801fe /src
parentchore(supabase): Move Supabase schema path (diff)
downloaddue.moe-3cbf4df4ceb3d8923cc371ec1bb5d66177411a87.tar.xz
due.moe-3cbf4df4ceb3d8923cc371ec1bb5d66177411a87.zip
fix(match): Harden calculateWeightedSimilarity
Diffstat (limited to 'src')
-rw-r--r--src/lib/Media/Anime/Airing/Subtitled/match.ts32
1 files changed, 29 insertions, 3 deletions
diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts
index fb494539..d6168d2c 100644
--- a/src/lib/Media/Anime/Airing/Subtitled/match.ts
+++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts
@@ -42,9 +42,35 @@ const preprocessTitle = (title: string): string => {
.join(' ');
};
+const NON_DISTINCTIVE_TOKENS = new Set([
+ 'a',
+ 'and',
+ 'de',
+ 'e',
+ 'for',
+ 'ga',
+ 'in',
+ 'na',
+ 'ni',
+ 'no',
+ 'o',
+ 'of',
+ 'on',
+ 'the',
+ 'to',
+ 'wa',
+ 'wo'
+]);
+
+const isMeaningfulToken = (token: string): boolean =>
+ /^\d+$/.test(token) || (token.length >= 3 && !NON_DISTINCTIVE_TOKENS.has(token));
+
const calculateWeightedSimilarity = (title1: string, title2: string): number => {
- const tokens1 = title1.split(' ');
- const tokens2 = title2.split(' ');
+ const tokens1 = title1.split(' ').filter(isMeaningfulToken);
+ const tokens2 = title2.split(' ').filter(isMeaningfulToken);
+
+ if (tokens1.length === 0 || tokens2.length === 0) return 0;
+
const set2 = new Set(tokens2);
let score = 0;
@@ -54,7 +80,7 @@ const calculateWeightedSimilarity = (title1: string, title2: string): number =>
}
});
- return score / (Math.max(tokens1.length, tokens2.length) * 2);
+ return score / ((Math.max(tokens1.length, tokens2.length) || 1) * 2);
};
export const findClosestMatch = (times: Time[], anime: Media): Time | null => {