From 3f6a7e1f6895054853211b90172260dc7ab6cd17 Mon Sep 17 00:00:00 2001 From: Andy Date: Wed, 8 Oct 2025 01:54:30 +0000 Subject: [PATCH] feat: add --exact-lang flag for precise language matching New --exact-lang CLI flag that enables exact language code matching instead of fuzzy matching. This allows users to get specific regional variants without matching all related variants. Examples: - `-l es-419` normally matches all Spanish (es-ES, es-419, es-MX) - `-l es-419 --exact-lang` matches ONLY es-419 (Latin American Spanish) Fixes language detection issue where specific variants like es-419 (Latin American Spanish) would match all Spanish variants instead of just close regional variants. --- unshackle/commands/dl.py | 21 +++++++++++++++++---- unshackle/core/constants.py | 1 + unshackle/core/tracks/tracks.py | 9 ++++++--- unshackle/core/utilities.py | 10 +++++++++- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index c2d8002..8b37032 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -180,6 +180,12 @@ class dl: help="Required subtitle languages. Downloads all subtitles only if these languages exist. Cannot be used with --s-lang.", ) @click.option("-fs", "--forced-subs", is_flag=True, default=False, help="Include forced subtitle tracks.") + @click.option( + "--exact-lang", + is_flag=True, + default=False, + help="Use exact language matching (no variants). With this flag, -l es-419 matches ONLY es-419, not es-ES or other variants.", + ) @click.option( "--proxy", type=str, @@ -468,6 +474,7 @@ class dl: s_lang: list[str], require_subs: list[str], forced_subs: bool, + exact_lang: bool, sub_format: Optional[Subtitle.Codec], video_only: bool, audio_only: bool, @@ -709,7 +716,9 @@ class dl: else: if language not in processed_video_lang: processed_video_lang.append(language) - title.tracks.videos = title.tracks.by_language(title.tracks.videos, processed_video_lang) + title.tracks.videos = title.tracks.by_language( + title.tracks.videos, processed_video_lang, exact_match=exact_lang + ) if not title.tracks.videos: self.log.error(f"There's no {processed_video_lang} Video Track...") sys.exit(1) @@ -792,16 +801,20 @@ class dl: f"Required languages found ({', '.join(require_subs)}), downloading all available subtitles" ) elif s_lang and "all" not in s_lang: + from unshackle.core.utilities import is_exact_match + + match_func = is_exact_match if exact_lang else is_close_match + missing_langs = [ lang_ for lang_ in s_lang - if not any(is_close_match(lang_, [sub.language]) for sub in title.tracks.subtitles) + if not any(match_func(lang_, [sub.language]) for sub in title.tracks.subtitles) ] if missing_langs: self.log.error(", ".join(missing_langs) + " not found in tracks") sys.exit(1) - title.tracks.select_subtitles(lambda x: is_close_match(x.language, s_lang)) + title.tracks.select_subtitles(lambda x: match_func(x.language, s_lang)) if not title.tracks.subtitles: self.log.error(f"There's no {s_lang} Subtitle Track...") sys.exit(1) @@ -865,7 +878,7 @@ class dl: elif "all" not in processed_lang: per_language = 1 title.tracks.audio = title.tracks.by_language( - title.tracks.audio, processed_lang, per_language=per_language + title.tracks.audio, processed_lang, per_language=per_language, exact_match=exact_lang ) if not title.tracks.audio: self.log.error(f"There's no {processed_lang} Audio Track, cannot continue...") diff --git a/unshackle/core/constants.py b/unshackle/core/constants.py index 609fcc3..6a14f7d 100644 --- a/unshackle/core/constants.py +++ b/unshackle/core/constants.py @@ -6,6 +6,7 @@ DOWNLOAD_LICENCE_ONLY = Event() DRM_SORT_MAP = ["ClearKey", "Widevine"] LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU +LANGUAGE_EXACT_DISTANCE = 0 # exact match only, no variants VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"} DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR10P", "Dolby Vision": "DV", "HDR10 / HDR10+": "HDR10P", "HDR10 / HDR10": "HDR"} AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"} diff --git a/unshackle/core/tracks/tracks.py b/unshackle/core/tracks/tracks.py index cf691b7..eeacd47 100644 --- a/unshackle/core/tracks/tracks.py +++ b/unshackle/core/tracks/tracks.py @@ -14,7 +14,7 @@ from rich.tree import Tree from unshackle.core import binaries from unshackle.core.config import config from unshackle.core.console import console -from unshackle.core.constants import LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT +from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT from unshackle.core.events import events from unshackle.core.tracks.attachment import Attachment from unshackle.core.tracks.audio import Audio @@ -294,11 +294,14 @@ class Tracks: self.videos = selected @staticmethod - def by_language(tracks: list[TrackT], languages: list[str], per_language: int = 0) -> list[TrackT]: + def by_language( + tracks: list[TrackT], languages: list[str], per_language: int = 0, exact_match: bool = False + ) -> list[TrackT]: + distance = LANGUAGE_EXACT_DISTANCE if exact_match else LANGUAGE_MAX_DISTANCE selected = [] for language in languages: selected.extend( - [x for x in tracks if closest_supported_match(x.language, [language], LANGUAGE_MAX_DISTANCE)][ + [x for x in tracks if closest_supported_match(str(x.language), [language], distance)][ : per_language or None ] ) diff --git a/unshackle/core/utilities.py b/unshackle/core/utilities.py index 784c037..9302e0d 100644 --- a/unshackle/core/utilities.py +++ b/unshackle/core/utilities.py @@ -24,7 +24,7 @@ from unidecode import unidecode from unshackle.core.cacher import Cacher from unshackle.core.config import config -from unshackle.core.constants import LANGUAGE_MAX_DISTANCE +from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE def rotate_log_file(log_path: Path, keep: int = 20) -> Path: @@ -114,6 +114,14 @@ def is_close_match(language: Union[str, Language], languages: Sequence[Union[str return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE +def is_exact_match(language: Union[str, Language], languages: Sequence[Union[str, Language, None]]) -> bool: + """Check if a language is an exact match to any of the provided languages.""" + languages = [x for x in languages if x] + if not languages: + return False + return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_EXACT_DISTANCE + + def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box: """ Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.