mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2025-10-23 15:11:08 +00:00
feat: add --exact-lang flag for precise language matching
New --exact-lang CLI flag that enables exact language code matching instead of fuzzy matching. This allows users to get specific regional variants without matching all related variants. Examples: - `-l es-419` normally matches all Spanish (es-ES, es-419, es-MX) - `-l es-419 --exact-lang` matches ONLY es-419 (Latin American Spanish) Fixes language detection issue where specific variants like es-419 (Latin American Spanish) would match all Spanish variants instead of just close regional variants.
This commit is contained in:
@@ -180,6 +180,12 @@ class dl:
|
|||||||
help="Required subtitle languages. Downloads all subtitles only if these languages exist. Cannot be used with --s-lang.",
|
help="Required subtitle languages. Downloads all subtitles only if these languages exist. Cannot be used with --s-lang.",
|
||||||
)
|
)
|
||||||
@click.option("-fs", "--forced-subs", is_flag=True, default=False, help="Include forced subtitle tracks.")
|
@click.option("-fs", "--forced-subs", is_flag=True, default=False, help="Include forced subtitle tracks.")
|
||||||
|
@click.option(
|
||||||
|
"--exact-lang",
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Use exact language matching (no variants). With this flag, -l es-419 matches ONLY es-419, not es-ES or other variants.",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--proxy",
|
"--proxy",
|
||||||
type=str,
|
type=str,
|
||||||
@@ -468,6 +474,7 @@ class dl:
|
|||||||
s_lang: list[str],
|
s_lang: list[str],
|
||||||
require_subs: list[str],
|
require_subs: list[str],
|
||||||
forced_subs: bool,
|
forced_subs: bool,
|
||||||
|
exact_lang: bool,
|
||||||
sub_format: Optional[Subtitle.Codec],
|
sub_format: Optional[Subtitle.Codec],
|
||||||
video_only: bool,
|
video_only: bool,
|
||||||
audio_only: bool,
|
audio_only: bool,
|
||||||
@@ -709,7 +716,9 @@ class dl:
|
|||||||
else:
|
else:
|
||||||
if language not in processed_video_lang:
|
if language not in processed_video_lang:
|
||||||
processed_video_lang.append(language)
|
processed_video_lang.append(language)
|
||||||
title.tracks.videos = title.tracks.by_language(title.tracks.videos, processed_video_lang)
|
title.tracks.videos = title.tracks.by_language(
|
||||||
|
title.tracks.videos, processed_video_lang, exact_match=exact_lang
|
||||||
|
)
|
||||||
if not title.tracks.videos:
|
if not title.tracks.videos:
|
||||||
self.log.error(f"There's no {processed_video_lang} Video Track...")
|
self.log.error(f"There's no {processed_video_lang} Video Track...")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -792,16 +801,20 @@ class dl:
|
|||||||
f"Required languages found ({', '.join(require_subs)}), downloading all available subtitles"
|
f"Required languages found ({', '.join(require_subs)}), downloading all available subtitles"
|
||||||
)
|
)
|
||||||
elif s_lang and "all" not in s_lang:
|
elif s_lang and "all" not in s_lang:
|
||||||
|
from unshackle.core.utilities import is_exact_match
|
||||||
|
|
||||||
|
match_func = is_exact_match if exact_lang else is_close_match
|
||||||
|
|
||||||
missing_langs = [
|
missing_langs = [
|
||||||
lang_
|
lang_
|
||||||
for lang_ in s_lang
|
for lang_ in s_lang
|
||||||
if not any(is_close_match(lang_, [sub.language]) for sub in title.tracks.subtitles)
|
if not any(match_func(lang_, [sub.language]) for sub in title.tracks.subtitles)
|
||||||
]
|
]
|
||||||
if missing_langs:
|
if missing_langs:
|
||||||
self.log.error(", ".join(missing_langs) + " not found in tracks")
|
self.log.error(", ".join(missing_langs) + " not found in tracks")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
title.tracks.select_subtitles(lambda x: is_close_match(x.language, s_lang))
|
title.tracks.select_subtitles(lambda x: match_func(x.language, s_lang))
|
||||||
if not title.tracks.subtitles:
|
if not title.tracks.subtitles:
|
||||||
self.log.error(f"There's no {s_lang} Subtitle Track...")
|
self.log.error(f"There's no {s_lang} Subtitle Track...")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -865,7 +878,7 @@ class dl:
|
|||||||
elif "all" not in processed_lang:
|
elif "all" not in processed_lang:
|
||||||
per_language = 1
|
per_language = 1
|
||||||
title.tracks.audio = title.tracks.by_language(
|
title.tracks.audio = title.tracks.by_language(
|
||||||
title.tracks.audio, processed_lang, per_language=per_language
|
title.tracks.audio, processed_lang, per_language=per_language, exact_match=exact_lang
|
||||||
)
|
)
|
||||||
if not title.tracks.audio:
|
if not title.tracks.audio:
|
||||||
self.log.error(f"There's no {processed_lang} Audio Track, cannot continue...")
|
self.log.error(f"There's no {processed_lang} Audio Track, cannot continue...")
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ DOWNLOAD_LICENCE_ONLY = Event()
|
|||||||
|
|
||||||
DRM_SORT_MAP = ["ClearKey", "Widevine"]
|
DRM_SORT_MAP = ["ClearKey", "Widevine"]
|
||||||
LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU
|
LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU
|
||||||
|
LANGUAGE_EXACT_DISTANCE = 0 # exact match only, no variants
|
||||||
VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"}
|
VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"}
|
||||||
DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR10P", "Dolby Vision": "DV", "HDR10 / HDR10+": "HDR10P", "HDR10 / HDR10": "HDR"}
|
DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR10P", "Dolby Vision": "DV", "HDR10 / HDR10+": "HDR10P", "HDR10 / HDR10": "HDR"}
|
||||||
AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"}
|
AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ from rich.tree import Tree
|
|||||||
from unshackle.core import binaries
|
from unshackle.core import binaries
|
||||||
from unshackle.core.config import config
|
from unshackle.core.config import config
|
||||||
from unshackle.core.console import console
|
from unshackle.core.console import console
|
||||||
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
|
from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
|
||||||
from unshackle.core.events import events
|
from unshackle.core.events import events
|
||||||
from unshackle.core.tracks.attachment import Attachment
|
from unshackle.core.tracks.attachment import Attachment
|
||||||
from unshackle.core.tracks.audio import Audio
|
from unshackle.core.tracks.audio import Audio
|
||||||
@@ -294,11 +294,14 @@ class Tracks:
|
|||||||
self.videos = selected
|
self.videos = selected
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def by_language(tracks: list[TrackT], languages: list[str], per_language: int = 0) -> list[TrackT]:
|
def by_language(
|
||||||
|
tracks: list[TrackT], languages: list[str], per_language: int = 0, exact_match: bool = False
|
||||||
|
) -> list[TrackT]:
|
||||||
|
distance = LANGUAGE_EXACT_DISTANCE if exact_match else LANGUAGE_MAX_DISTANCE
|
||||||
selected = []
|
selected = []
|
||||||
for language in languages:
|
for language in languages:
|
||||||
selected.extend(
|
selected.extend(
|
||||||
[x for x in tracks if closest_supported_match(x.language, [language], LANGUAGE_MAX_DISTANCE)][
|
[x for x in tracks if closest_supported_match(str(x.language), [language], distance)][
|
||||||
: per_language or None
|
: per_language or None
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ from unidecode import unidecode
|
|||||||
|
|
||||||
from unshackle.core.cacher import Cacher
|
from unshackle.core.cacher import Cacher
|
||||||
from unshackle.core.config import config
|
from unshackle.core.config import config
|
||||||
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE
|
from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE
|
||||||
|
|
||||||
|
|
||||||
def rotate_log_file(log_path: Path, keep: int = 20) -> Path:
|
def rotate_log_file(log_path: Path, keep: int = 20) -> Path:
|
||||||
@@ -114,6 +114,14 @@ def is_close_match(language: Union[str, Language], languages: Sequence[Union[str
|
|||||||
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE
|
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE
|
||||||
|
|
||||||
|
|
||||||
|
def is_exact_match(language: Union[str, Language], languages: Sequence[Union[str, Language, None]]) -> bool:
|
||||||
|
"""Check if a language is an exact match to any of the provided languages."""
|
||||||
|
languages = [x for x in languages if x]
|
||||||
|
if not languages:
|
||||||
|
return False
|
||||||
|
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_EXACT_DISTANCE
|
||||||
|
|
||||||
|
|
||||||
def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
|
def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
|
||||||
"""
|
"""
|
||||||
Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.
|
Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.
|
||||||
|
|||||||
Reference in New Issue
Block a user