From 20601f70b2156e35767c1f3e2a466ae6c939ce28 Mon Sep 17 00:00:00 2001 From: Fuwn Date: Mon, 8 Jan 2024 22:10:40 -0800 Subject: feat(match): better schedule match algorithm --- bun.lockb | Bin 146194 -> 146958 bytes package.json | 2 ++ src/lib/Media/Anime/Airing/Subtitled/match.ts | 23 +++++++++++++++++------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/bun.lockb b/bun.lockb index e31b637b..0612850c 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index 57f8e9b4..73b159a3 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "@sveltejs/kit": "^1.20.4", "@types/fast-levenshtein": "^0.0.4", "@types/jsdom": "^21.1.6", + "@types/string-similarity": "^4.0.2", "@typescript-eslint/eslint-plugin": "^5.45.0", "@typescript-eslint/parser": "^5.45.0", "eslint": "^8.28.0", @@ -40,6 +41,7 @@ "lz-string": "^1.5.0", "modern-screenshot": "^4.4.33", "rss-parser": "^3.13.0", + "string-similarity": "^4.0.4", "wanakana": "^5.3.1" } } diff --git a/src/lib/Media/Anime/Airing/Subtitled/match.ts b/src/lib/Media/Anime/Airing/Subtitled/match.ts index 1a0c6b2d..1bf31b6f 100644 --- a/src/lib/Media/Anime/Airing/Subtitled/match.ts +++ b/src/lib/Media/Anime/Airing/Subtitled/match.ts @@ -3,6 +3,7 @@ import type { Media } from '../../../../AniList/media'; import settings from '$stores/settings'; import type { SubsPlease } from '$lib/Media/Anime/Airing/Subtitled/subsPlease'; import levenshtein from 'fast-levenshtein'; +import stringSimilarity from 'string-similarity'; export interface Time { title: string; @@ -33,7 +34,7 @@ const secondsUntil = (targetTime: string, targetDay: string) => { const normalizeTitle = (title: string | null) => { return (title || '') .toLowerCase() - .replace(/season \d+|s\d+|\W/g, '') + .replace(/season \d+|s\d+/g, '') .replace(/\b(\d)(st|nd|rd|th)\b/g, '$1') .replace(/\b(part|pt)\b/gi, '') .trim(); @@ -67,14 +68,24 @@ export const findClosestMedia = (media: Media[], matchFor: string) => { if (!matchFor) return null; let bestFitMedia: Media | null = null; - let smallestDistance = Infinity; + let smallestDistance = -Infinity; media.forEach((m) => { - [m.title.romaji, m.title.english, ...m.synonyms].filter(Boolean).forEach((title) => { - const normalizedItemTitle = normalizeTitle(title); - const distance = levenshtein.get(normalizeTitle(matchFor), normalizedItemTitle); + const titles = [m.title.romaji, m.title.english, ...m.synonyms].filter(Boolean); + + titles.forEach((title) => { + const normalisedTitle = normalizeTitle(title); + const normalisedMatchFor = normalizeTitle(matchFor); + const distance = stringSimilarity.compareTwoStrings(normalisedMatchFor, normalisedTitle); - if (distance < smallestDistance && distance < Math.max(3, normalizedItemTitle.length * 0.4)) { + if ( + distance > smallestDistance && + (normalisedMatchFor + .split(' ') + .filter((word) => titles.some((title) => normalizeTitle(title).includes(word))).length >= + normalisedMatchFor.split(' ').length || + titles.some((title) => normalizeTitle(title).includes(normalisedMatchFor))) + ) { smallestDistance = distance; bestFitMedia = m; } -- cgit v1.2.3