feat: add --exact-lang flag for precise language matching

New --exact-lang CLI flag that enables exact language code matching instead of fuzzy matching. This allows users to get specific regional variants without matching all related variants.

Examples:
- `-l es-419` normally matches all Spanish (es-ES, es-419, es-MX)
- `-l es-419 --exact-lang` matches ONLY es-419 (Latin American Spanish)

Fixes language detection issue where specific variants like es-419 (Latin American Spanish) would match all Spanish variants instead of just close regional variants.
This commit is contained in:
Andy
2025-10-08 01:54:30 +00:00
parent e9ba78cec3
commit 3f6a7e1f68
4 changed files with 33 additions and 8 deletions

View File

@@ -180,6 +180,12 @@ class dl:
help="Required subtitle languages. Downloads all subtitles only if these languages exist. Cannot be used with --s-lang.", help="Required subtitle languages. Downloads all subtitles only if these languages exist. Cannot be used with --s-lang.",
) )
@click.option("-fs", "--forced-subs", is_flag=True, default=False, help="Include forced subtitle tracks.") @click.option("-fs", "--forced-subs", is_flag=True, default=False, help="Include forced subtitle tracks.")
@click.option(
"--exact-lang",
is_flag=True,
default=False,
help="Use exact language matching (no variants). With this flag, -l es-419 matches ONLY es-419, not es-ES or other variants.",
)
@click.option( @click.option(
"--proxy", "--proxy",
type=str, type=str,
@@ -468,6 +474,7 @@ class dl:
s_lang: list[str], s_lang: list[str],
require_subs: list[str], require_subs: list[str],
forced_subs: bool, forced_subs: bool,
exact_lang: bool,
sub_format: Optional[Subtitle.Codec], sub_format: Optional[Subtitle.Codec],
video_only: bool, video_only: bool,
audio_only: bool, audio_only: bool,
@@ -709,7 +716,9 @@ class dl:
else: else:
if language not in processed_video_lang: if language not in processed_video_lang:
processed_video_lang.append(language) processed_video_lang.append(language)
title.tracks.videos = title.tracks.by_language(title.tracks.videos, processed_video_lang) title.tracks.videos = title.tracks.by_language(
title.tracks.videos, processed_video_lang, exact_match=exact_lang
)
if not title.tracks.videos: if not title.tracks.videos:
self.log.error(f"There's no {processed_video_lang} Video Track...") self.log.error(f"There's no {processed_video_lang} Video Track...")
sys.exit(1) sys.exit(1)
@@ -792,16 +801,20 @@ class dl:
f"Required languages found ({', '.join(require_subs)}), downloading all available subtitles" f"Required languages found ({', '.join(require_subs)}), downloading all available subtitles"
) )
elif s_lang and "all" not in s_lang: elif s_lang and "all" not in s_lang:
from unshackle.core.utilities import is_exact_match
match_func = is_exact_match if exact_lang else is_close_match
missing_langs = [ missing_langs = [
lang_ lang_
for lang_ in s_lang for lang_ in s_lang
if not any(is_close_match(lang_, [sub.language]) for sub in title.tracks.subtitles) if not any(match_func(lang_, [sub.language]) for sub in title.tracks.subtitles)
] ]
if missing_langs: if missing_langs:
self.log.error(", ".join(missing_langs) + " not found in tracks") self.log.error(", ".join(missing_langs) + " not found in tracks")
sys.exit(1) sys.exit(1)
title.tracks.select_subtitles(lambda x: is_close_match(x.language, s_lang)) title.tracks.select_subtitles(lambda x: match_func(x.language, s_lang))
if not title.tracks.subtitles: if not title.tracks.subtitles:
self.log.error(f"There's no {s_lang} Subtitle Track...") self.log.error(f"There's no {s_lang} Subtitle Track...")
sys.exit(1) sys.exit(1)
@@ -865,7 +878,7 @@ class dl:
elif "all" not in processed_lang: elif "all" not in processed_lang:
per_language = 1 per_language = 1
title.tracks.audio = title.tracks.by_language( title.tracks.audio = title.tracks.by_language(
title.tracks.audio, processed_lang, per_language=per_language title.tracks.audio, processed_lang, per_language=per_language, exact_match=exact_lang
) )
if not title.tracks.audio: if not title.tracks.audio:
self.log.error(f"There's no {processed_lang} Audio Track, cannot continue...") self.log.error(f"There's no {processed_lang} Audio Track, cannot continue...")

View File

@@ -6,6 +6,7 @@ DOWNLOAD_LICENCE_ONLY = Event()
DRM_SORT_MAP = ["ClearKey", "Widevine"] DRM_SORT_MAP = ["ClearKey", "Widevine"]
LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU
LANGUAGE_EXACT_DISTANCE = 0 # exact match only, no variants
VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"} VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"}
DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR10P", "Dolby Vision": "DV", "HDR10 / HDR10+": "HDR10P", "HDR10 / HDR10": "HDR"} DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR10P", "Dolby Vision": "DV", "HDR10 / HDR10+": "HDR10P", "HDR10 / HDR10": "HDR"}
AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"} AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"}

View File

@@ -14,7 +14,7 @@ from rich.tree import Tree
from unshackle.core import binaries from unshackle.core import binaries
from unshackle.core.config import config from unshackle.core.config import config
from unshackle.core.console import console from unshackle.core.console import console
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
from unshackle.core.events import events from unshackle.core.events import events
from unshackle.core.tracks.attachment import Attachment from unshackle.core.tracks.attachment import Attachment
from unshackle.core.tracks.audio import Audio from unshackle.core.tracks.audio import Audio
@@ -294,11 +294,14 @@ class Tracks:
self.videos = selected self.videos = selected
@staticmethod @staticmethod
def by_language(tracks: list[TrackT], languages: list[str], per_language: int = 0) -> list[TrackT]: def by_language(
tracks: list[TrackT], languages: list[str], per_language: int = 0, exact_match: bool = False
) -> list[TrackT]:
distance = LANGUAGE_EXACT_DISTANCE if exact_match else LANGUAGE_MAX_DISTANCE
selected = [] selected = []
for language in languages: for language in languages:
selected.extend( selected.extend(
[x for x in tracks if closest_supported_match(x.language, [language], LANGUAGE_MAX_DISTANCE)][ [x for x in tracks if closest_supported_match(str(x.language), [language], distance)][
: per_language or None : per_language or None
] ]
) )

View File

@@ -24,7 +24,7 @@ from unidecode import unidecode
from unshackle.core.cacher import Cacher from unshackle.core.cacher import Cacher
from unshackle.core.config import config from unshackle.core.config import config
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE
def rotate_log_file(log_path: Path, keep: int = 20) -> Path: def rotate_log_file(log_path: Path, keep: int = 20) -> Path:
@@ -114,6 +114,14 @@ def is_close_match(language: Union[str, Language], languages: Sequence[Union[str
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE
def is_exact_match(language: Union[str, Language], languages: Sequence[Union[str, Language, None]]) -> bool:
"""Check if a language is an exact match to any of the provided languages."""
languages = [x for x in languages if x]
if not languages:
return False
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_EXACT_DISTANCE
def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box: def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
""" """
Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find. Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.