feat: add --exact-lang flag for precise language matching

New --exact-lang CLI flag that enables exact language code matching instead of fuzzy matching. This allows users to get specific regional variants without matching all related variants.

Examples:
- `-l es-419` normally matches all Spanish (es-ES, es-419, es-MX)
- `-l es-419 --exact-lang` matches ONLY es-419 (Latin American Spanish)

Fixes language detection issue where specific variants like es-419 (Latin American Spanish) would match all Spanish variants instead of just close regional variants.
This commit is contained in:
Andy
2025-10-08 01:54:30 +00:00
parent e9ba78cec3
commit 3f6a7e1f68
4 changed files with 33 additions and 8 deletions

View File

@@ -6,6 +6,7 @@ DOWNLOAD_LICENCE_ONLY = Event()
DRM_SORT_MAP = ["ClearKey", "Widevine"]
LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU
LANGUAGE_EXACT_DISTANCE = 0 # exact match only, no variants
VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"}
DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR10P", "Dolby Vision": "DV", "HDR10 / HDR10+": "HDR10P", "HDR10 / HDR10": "HDR"}
AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"}

View File

@@ -14,7 +14,7 @@ from rich.tree import Tree
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
from unshackle.core.events import events
from unshackle.core.tracks.attachment import Attachment
from unshackle.core.tracks.audio import Audio
@@ -294,11 +294,14 @@ class Tracks:
self.videos = selected
@staticmethod
def by_language(tracks: list[TrackT], languages: list[str], per_language: int = 0) -> list[TrackT]:
def by_language(
tracks: list[TrackT], languages: list[str], per_language: int = 0, exact_match: bool = False
) -> list[TrackT]:
distance = LANGUAGE_EXACT_DISTANCE if exact_match else LANGUAGE_MAX_DISTANCE
selected = []
for language in languages:
selected.extend(
[x for x in tracks if closest_supported_match(x.language, [language], LANGUAGE_MAX_DISTANCE)][
[x for x in tracks if closest_supported_match(str(x.language), [language], distance)][
: per_language or None
]
)

View File

@@ -24,7 +24,7 @@ from unidecode import unidecode
from unshackle.core.cacher import Cacher
from unshackle.core.config import config
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE
from unshackle.core.constants import LANGUAGE_EXACT_DISTANCE, LANGUAGE_MAX_DISTANCE
def rotate_log_file(log_path: Path, keep: int = 20) -> Path:
@@ -114,6 +114,14 @@ def is_close_match(language: Union[str, Language], languages: Sequence[Union[str
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE
def is_exact_match(language: Union[str, Language], languages: Sequence[Union[str, Language, None]]) -> bool:
"""Check if a language is an exact match to any of the provided languages."""
languages = [x for x in languages if x]
if not languages:
return False
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_EXACT_DISTANCE
def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
"""
Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.