diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index ac2446b..eea32b2 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -240,6 +240,8 @@ class dl: help="Max workers/threads to download with per-track. Default depends on the downloader.", ) @click.option("--downloads", type=int, default=1, help="Amount of tracks to download concurrently.") + @click.option("--no-cache", "no_cache", is_flag=True, default=False, help="Bypass title cache for this download.") + @click.option("--reset-cache", "reset_cache", is_flag=True, default=False, help="Clear title cache before fetching.") @click.pass_context def cli(ctx: click.Context, **kwargs: Any) -> dl: return dl(ctx, **kwargs) @@ -460,7 +462,7 @@ class dl: self.log.info("Authenticated with Service") with console.status("Fetching Title Metadata...", spinner="dots"): - titles = service.get_titles() + titles = service.get_titles_cached() if not titles: self.log.error("No titles returned, nothing to download...") sys.exit(1) diff --git a/unshackle/core/config.py b/unshackle/core/config.py index 5124137..d7b99e9 100644 --- a/unshackle/core/config.py +++ b/unshackle/core/config.py @@ -90,6 +90,10 @@ class Config: self.update_check_interval: int = kwargs.get("update_check_interval", 24) self.scene_naming: bool = kwargs.get("scene_naming", True) + self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default + self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default + self.title_cache_enabled: bool = kwargs.get("title_cache_enabled", True) + @classmethod def from_yaml(cls, path: Path) -> Config: if not path.exists(): diff --git a/unshackle/core/service.py b/unshackle/core/service.py index 8e8a69d..19cf01c 100644 --- a/unshackle/core/service.py +++ b/unshackle/core/service.py @@ -21,6 +21,7 @@ from unshackle.core.constants import AnyTrack from unshackle.core.credential import Credential from unshackle.core.drm import DRM_T from unshackle.core.search_result import SearchResult +from unshackle.core.title_cacher import TitleCacher, get_account_hash, get_region_from_proxy from unshackle.core.titles import Title_T, Titles_T from unshackle.core.tracks import Chapters, Tracks from unshackle.core.utilities import get_ip_info @@ -42,6 +43,12 @@ class Service(metaclass=ABCMeta): self.session = self.get_session() self.cache = Cacher(self.__class__.__name__) + self.title_cache = TitleCacher(self.__class__.__name__) + + # Store context for cache control flags and credential + self.ctx = ctx + self.credential = None # Will be set in authenticate() + self.current_region = None # Will be set based on proxy/geolocation if not ctx.parent or not ctx.parent.params.get("no_proxy"): if ctx.parent: @@ -79,6 +86,15 @@ class Service(metaclass=ABCMeta): ).decode() } ) + # Store region from proxy + self.current_region = get_region_from_proxy(proxy) + else: + # No proxy, try to get current region + try: + ip_info = get_ip_info(self.session) + self.current_region = ip_info.get("country", "").lower() if ip_info else None + except Exception: + self.current_region = None # Optional Abstract functions # The following functions may be implemented by the Service. @@ -123,6 +139,9 @@ class Service(metaclass=ABCMeta): raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.") self.session.cookies.update(cookies) + # Store credential for cache key generation + self.credential = credential + def search(self) -> Generator[SearchResult, None, None]: """ Search by query for titles from the Service. @@ -187,6 +206,52 @@ class Service(metaclass=ABCMeta): This can be useful to store information on each title that will be required like any sub-asset IDs, or such. """ + def get_titles_cached(self, title_id: str = None) -> Titles_T: + """ + Cached wrapper around get_titles() to reduce redundant API calls. + + This method checks the cache before calling get_titles() and handles + fallback to cached data when API calls fail. + + Args: + title_id: Optional title ID for cache key generation. + If not provided, will try to extract from service instance. + + Returns: + Titles object (Movies, Series, or Album) + """ + # Try to get title_id from service instance if not provided + if title_id is None: + # Different services store the title ID in different attributes + if hasattr(self, "title"): + title_id = self.title + elif hasattr(self, "title_id"): + title_id = self.title_id + else: + # If we can't determine title_id, just call get_titles directly + self.log.debug("Cannot determine title_id for caching, bypassing cache") + return self.get_titles() + + # Get cache control flags from context + no_cache = False + reset_cache = False + if self.ctx and self.ctx.parent: + no_cache = self.ctx.parent.params.get("no_cache", False) + reset_cache = self.ctx.parent.params.get("reset_cache", False) + + # Get account hash for cache key + account_hash = get_account_hash(self.credential) + + # Use title cache to get titles with fallback support + return self.title_cache.get_cached_titles( + title_id=str(title_id), + fetch_function=self.get_titles, + region=self.current_region, + account_hash=account_hash, + no_cache=no_cache, + reset_cache=reset_cache, + ) + @abstractmethod def get_tracks(self, title: Title_T) -> Tracks: """ diff --git a/unshackle/core/title_cacher.py b/unshackle/core/title_cacher.py new file mode 100644 index 0000000..f3346aa --- /dev/null +++ b/unshackle/core/title_cacher.py @@ -0,0 +1,240 @@ +from __future__ import annotations + +import hashlib +import logging +from datetime import datetime, timedelta +from typing import Optional + +from unshackle.core.cacher import Cacher +from unshackle.core.config import config +from unshackle.core.titles import Titles_T + + +class TitleCacher: + """ + Handles caching of Title objects to reduce redundant API calls. + + This wrapper provides: + - Region-aware caching to handle geo-restricted content + - Automatic fallback to cached data when API calls fail + - Cache lifetime extension during failures + - Cache hit/miss statistics for debugging + """ + + def __init__(self, service_name: str): + self.service_name = service_name + self.log = logging.getLogger(f"{service_name}.TitleCache") + self.cacher = Cacher(service_name) + self.stats = {"hits": 0, "misses": 0, "fallbacks": 0} + + def _generate_cache_key( + self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None + ) -> str: + """ + Generate a unique cache key for title data. + + Args: + title_id: The title identifier + region: The region/proxy identifier + account_hash: Hash of account credentials (if applicable) + + Returns: + A unique cache key string + """ + # Hash the title_id to handle complex IDs (URLs, dots, special chars) + # This ensures consistent length and filesystem-safe keys + title_hash = hashlib.sha256(title_id.encode()).hexdigest()[:16] + + # Start with base key using hash + key_parts = ["titles", title_hash] + + # Add region if available + if region: + key_parts.append(region.lower()) + + # Add account hash if available + if account_hash: + key_parts.append(account_hash[:8]) # Use first 8 chars of hash + + # Join with underscores + cache_key = "_".join(key_parts) + + # Log the mapping for debugging + self.log.debug(f"Cache key mapping: {title_id} -> {cache_key}") + + return cache_key + + def get_cached_titles( + self, + title_id: str, + fetch_function, + region: Optional[str] = None, + account_hash: Optional[str] = None, + no_cache: bool = False, + reset_cache: bool = False, + ) -> Optional[Titles_T]: + """ + Get titles from cache or fetch from API with fallback support. + + Args: + title_id: The title identifier + fetch_function: Function to call to fetch fresh titles + region: The region/proxy identifier + account_hash: Hash of account credentials + no_cache: Bypass cache completely + reset_cache: Clear cache before fetching + + Returns: + Titles object (Movies, Series, or Album) + """ + # If caching is globally disabled or no_cache flag is set + if not config.title_cache_enabled or no_cache: + self.log.debug("Cache bypassed, fetching fresh titles") + return fetch_function() + + # Generate cache key + cache_key = self._generate_cache_key(title_id, region, account_hash) + + # If reset_cache flag is set, clear the cache entry + if reset_cache: + self.log.info(f"Clearing cache for {cache_key}") + cache_path = (config.directories.cache / self.service_name / cache_key).with_suffix(".json") + if cache_path.exists(): + cache_path.unlink() + + # Try to get from cache + cache = self.cacher.get(cache_key, version=1) + + # Check if we have valid cached data + if cache and not cache.expired: + self.stats["hits"] += 1 + self.log.debug(f"Cache hit for {title_id} (hits: {self.stats['hits']}, misses: {self.stats['misses']})") + return cache.data + + # Cache miss or expired, try to fetch fresh data + self.stats["misses"] += 1 + self.log.debug(f"Cache miss for {title_id}, fetching fresh data") + + try: + # Attempt to fetch fresh titles + titles = fetch_function() + + if titles: + # Successfully fetched, update cache + self.log.debug(f"Successfully fetched titles for {title_id}, updating cache") + cache = self.cacher.get(cache_key, version=1) + cache.set(titles, expiration=datetime.now() + timedelta(seconds=config.title_cache_time)) + + return titles + + except Exception as e: + # API call failed, check if we have fallback cached data + if cache and cache.data: + # We have expired cached data, use it as fallback + current_time = datetime.now() + max_retention_time = cache.expiration + timedelta( + seconds=config.title_cache_max_retention - config.title_cache_time + ) + + if current_time < max_retention_time: + self.stats["fallbacks"] += 1 + self.log.warning( + f"API call failed for {title_id}, using cached data as fallback " + f"(fallbacks: {self.stats['fallbacks']})" + ) + self.log.debug(f"Error was: {e}") + + # Extend cache lifetime + extended_expiration = current_time + timedelta(minutes=5) + if extended_expiration < max_retention_time: + cache.expiration = extended_expiration + cache.set(cache.data, expiration=extended_expiration) + + return cache.data + else: + self.log.error(f"API call failed and cached data for {title_id} exceeded maximum retention time") + + # Re-raise the exception if no fallback available + raise + + def clear_all_title_cache(self): + """Clear all title caches for this service.""" + cache_dir = config.directories.cache / self.service_name + if cache_dir.exists(): + for cache_file in cache_dir.glob("titles_*.json"): + cache_file.unlink() + self.log.info(f"Cleared cache file: {cache_file.name}") + + def get_cache_stats(self) -> dict: + """Get cache statistics.""" + total = sum(self.stats.values()) + if total > 0: + hit_rate = (self.stats["hits"] / total) * 100 + else: + hit_rate = 0 + + return { + "hits": self.stats["hits"], + "misses": self.stats["misses"], + "fallbacks": self.stats["fallbacks"], + "hit_rate": f"{hit_rate:.1f}%", + } + + +def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]: + """ + Extract region identifier from proxy URL. + + Args: + proxy_url: The proxy URL string + + Returns: + Region identifier or None + """ + if not proxy_url: + return None + + # Try to extract region from common proxy patterns + # e.g., "us123.nordvpn.com", "gb-proxy.example.com" + import re + + # Pattern for NordVPN style + nord_match = re.search(r"([a-z]{2})\d+\.nordvpn", proxy_url.lower()) + if nord_match: + return nord_match.group(1) + + # Pattern for country code at start + cc_match = re.search(r"([a-z]{2})[-_]", proxy_url.lower()) + if cc_match: + return cc_match.group(1) + + # Pattern for country code subdomain + subdomain_match = re.search(r"://([a-z]{2})\.", proxy_url.lower()) + if subdomain_match: + return subdomain_match.group(1) + + return None + + +def get_account_hash(credential) -> Optional[str]: + """ + Generate a hash for account identification. + + Args: + credential: Credential object + + Returns: + SHA1 hash of the credential or None + """ + if not credential: + return None + + # Use existing sha1 property if available + if hasattr(credential, "sha1"): + return credential.sha1 + + # Otherwise generate hash from username + if hasattr(credential, "username"): + return hashlib.sha1(credential.username.encode()).hexdigest() + + return None diff --git a/unshackle/unshackle-example.yaml b/unshackle/unshackle-example.yaml index 8c58ea6..fb9e652 100644 --- a/unshackle/unshackle-example.yaml +++ b/unshackle/unshackle-example.yaml @@ -15,6 +15,12 @@ update_checks: true # How often to check for updates, in hours (default: 24) update_check_interval: 24 +# Title caching configuration +# Cache title metadata to reduce redundant API calls +title_cache_enabled: true # Enable/disable title caching globally (default: true) +title_cache_time: 1800 # Cache duration in seconds (default: 1800 = 30 minutes) +title_cache_max_retention: 86400 # Maximum cache retention for fallback when API fails (default: 86400 = 24 hours) + # Muxing configuration muxing: set_title: false