feat: Implement title caching system to reduce API calls

- Add configurable title caching with fallback support - Cache titles for 30 minutes by default, with 24-hour fallback on API failures - Add --no-cache and --reset-cache CLI flags for cache control - Implement region-aware caching to handle geo-restricted content - Use SHA256 hashing for cache keys to handle complex title IDs - Add cache configuration variables to config system - Document new caching options in example config This caching system significantly reduces redundant API calls when debugging or modifying CLI parameters, improving both performance and reliability.
2025-10-23 15:11:08 +00:00 · 2025-08-06 17:08:58 +00:00
parent ead05d08ac
commit f0493292af
5 changed files with 318 additions and 1 deletions
--- a/unshackle/commands/dl.py
+++ b/unshackle/commands/dl.py
@@ -240,6 +240,8 @@ class dl:
        help="Max workers/threads to download with per-track. Default depends on the downloader.",
    )
    @click.option("--downloads", type=int, default=1, help="Amount of tracks to download concurrently.")
+    @click.option("--no-cache", "no_cache", is_flag=True, default=False, help="Bypass title cache for this download.")
+    @click.option("--reset-cache", "reset_cache", is_flag=True, default=False, help="Clear title cache before fetching.")
    @click.pass_context
    def cli(ctx: click.Context, **kwargs: Any) -> dl:
        return dl(ctx, **kwargs)
@@ -460,7 +462,7 @@ class dl:
                self.log.info("Authenticated with Service")

        with console.status("Fetching Title Metadata...", spinner="dots"):
-            titles = service.get_titles()
+            titles = service.get_titles_cached()
            if not titles:
                self.log.error("No titles returned, nothing to download...")
                sys.exit(1)
--- a/unshackle/core/config.py
+++ b/unshackle/core/config.py
@@ -90,6 +90,10 @@ class Config:
        self.update_check_interval: int = kwargs.get("update_check_interval", 24)
        self.scene_naming: bool = kwargs.get("scene_naming", True)

+        self.title_cache_time: int = kwargs.get("title_cache_time", 1800)  # 30 minutes default
+        self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400)  # 24 hours default
+        self.title_cache_enabled: bool = kwargs.get("title_cache_enabled", True)
+
    @classmethod
    def from_yaml(cls, path: Path) -> Config:
        if not path.exists():
--- a/unshackle/core/service.py
+++ b/unshackle/core/service.py
@@ -21,6 +21,7 @@ from unshackle.core.constants import AnyTrack
 from unshackle.core.credential import Credential
 from unshackle.core.drm import DRM_T
 from unshackle.core.search_result import SearchResult
+from unshackle.core.title_cacher import TitleCacher, get_account_hash, get_region_from_proxy
 from unshackle.core.titles import Title_T, Titles_T
 from unshackle.core.tracks import Chapters, Tracks
 from unshackle.core.utilities import get_ip_info
@@ -42,6 +43,12 @@ class Service(metaclass=ABCMeta):

        self.session = self.get_session()
        self.cache = Cacher(self.__class__.__name__)
+        self.title_cache = TitleCacher(self.__class__.__name__)
+
+        # Store context for cache control flags and credential
+        self.ctx = ctx
+        self.credential = None  # Will be set in authenticate()
+        self.current_region = None  # Will be set based on proxy/geolocation

        if not ctx.parent or not ctx.parent.params.get("no_proxy"):
            if ctx.parent:
@@ -79,6 +86,15 @@ class Service(metaclass=ABCMeta):
                            ).decode()
                        }
                    )
+                # Store region from proxy
+                self.current_region = get_region_from_proxy(proxy)
+            else:
+                # No proxy, try to get current region
+                try:
+                    ip_info = get_ip_info(self.session)
+                    self.current_region = ip_info.get("country", "").lower() if ip_info else None
+                except Exception:
+                    self.current_region = None

    # Optional Abstract functions
    # The following functions may be implemented by the Service.
@@ -123,6 +139,9 @@ class Service(metaclass=ABCMeta):
                raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
            self.session.cookies.update(cookies)

+        # Store credential for cache key generation
+        self.credential = credential
+
    def search(self) -> Generator[SearchResult, None, None]:
        """
        Search by query for titles from the Service.
@@ -187,6 +206,52 @@ class Service(metaclass=ABCMeta):
        This can be useful to store information on each title that will be required like any sub-asset IDs, or such.
        """

+    def get_titles_cached(self, title_id: str = None) -> Titles_T:
+        """
+        Cached wrapper around get_titles() to reduce redundant API calls.
+
+        This method checks the cache before calling get_titles() and handles
+        fallback to cached data when API calls fail.
+
+        Args:
+            title_id: Optional title ID for cache key generation.
+                     If not provided, will try to extract from service instance.
+
+        Returns:
+            Titles object (Movies, Series, or Album)
+        """
+        # Try to get title_id from service instance if not provided
+        if title_id is None:
+            # Different services store the title ID in different attributes
+            if hasattr(self, "title"):
+                title_id = self.title
+            elif hasattr(self, "title_id"):
+                title_id = self.title_id
+            else:
+                # If we can't determine title_id, just call get_titles directly
+                self.log.debug("Cannot determine title_id for caching, bypassing cache")
+                return self.get_titles()
+
+        # Get cache control flags from context
+        no_cache = False
+        reset_cache = False
+        if self.ctx and self.ctx.parent:
+            no_cache = self.ctx.parent.params.get("no_cache", False)
+            reset_cache = self.ctx.parent.params.get("reset_cache", False)
+
+        # Get account hash for cache key
+        account_hash = get_account_hash(self.credential)
+
+        # Use title cache to get titles with fallback support
+        return self.title_cache.get_cached_titles(
+            title_id=str(title_id),
+            fetch_function=self.get_titles,
+            region=self.current_region,
+            account_hash=account_hash,
+            no_cache=no_cache,
+            reset_cache=reset_cache,
+        )
+
    @abstractmethod
    def get_tracks(self, title: Title_T) -> Tracks:
        """
--- a/unshackle/core/title_cacher.py
+++ b/unshackle/core/title_cacher.py
@@ -0,0 +1,240 @@
+from __future__ import annotations
+
+import hashlib
+import logging
+from datetime import datetime, timedelta
+from typing import Optional
+
+from unshackle.core.cacher import Cacher
+from unshackle.core.config import config
+from unshackle.core.titles import Titles_T
+
+
+class TitleCacher:
+    """
+    Handles caching of Title objects to reduce redundant API calls.
+
+    This wrapper provides:
+    - Region-aware caching to handle geo-restricted content
+    - Automatic fallback to cached data when API calls fail
+    - Cache lifetime extension during failures
+    - Cache hit/miss statistics for debugging
+    """
+
+    def __init__(self, service_name: str):
+        self.service_name = service_name
+        self.log = logging.getLogger(f"{service_name}.TitleCache")
+        self.cacher = Cacher(service_name)
+        self.stats = {"hits": 0, "misses": 0, "fallbacks": 0}
+
+    def _generate_cache_key(
+        self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None
+    ) -> str:
+        """
+        Generate a unique cache key for title data.
+
+        Args:
+            title_id: The title identifier
+            region: The region/proxy identifier
+            account_hash: Hash of account credentials (if applicable)
+
+        Returns:
+            A unique cache key string
+        """
+        # Hash the title_id to handle complex IDs (URLs, dots, special chars)
+        # This ensures consistent length and filesystem-safe keys
+        title_hash = hashlib.sha256(title_id.encode()).hexdigest()[:16]
+
+        # Start with base key using hash
+        key_parts = ["titles", title_hash]
+
+        # Add region if available
+        if region:
+            key_parts.append(region.lower())
+
+        # Add account hash if available
+        if account_hash:
+            key_parts.append(account_hash[:8])  # Use first 8 chars of hash
+
+        # Join with underscores
+        cache_key = "_".join(key_parts)
+
+        # Log the mapping for debugging
+        self.log.debug(f"Cache key mapping: {title_id} -> {cache_key}")
+
+        return cache_key
+
+    def get_cached_titles(
+        self,
+        title_id: str,
+        fetch_function,
+        region: Optional[str] = None,
+        account_hash: Optional[str] = None,
+        no_cache: bool = False,
+        reset_cache: bool = False,
+    ) -> Optional[Titles_T]:
+        """
+        Get titles from cache or fetch from API with fallback support.
+
+        Args:
+            title_id: The title identifier
+            fetch_function: Function to call to fetch fresh titles
+            region: The region/proxy identifier
+            account_hash: Hash of account credentials
+            no_cache: Bypass cache completely
+            reset_cache: Clear cache before fetching
+
+        Returns:
+            Titles object (Movies, Series, or Album)
+        """
+        # If caching is globally disabled or no_cache flag is set
+        if not config.title_cache_enabled or no_cache:
+            self.log.debug("Cache bypassed, fetching fresh titles")
+            return fetch_function()
+
+        # Generate cache key
+        cache_key = self._generate_cache_key(title_id, region, account_hash)
+
+        # If reset_cache flag is set, clear the cache entry
+        if reset_cache:
+            self.log.info(f"Clearing cache for {cache_key}")
+            cache_path = (config.directories.cache / self.service_name / cache_key).with_suffix(".json")
+            if cache_path.exists():
+                cache_path.unlink()
+
+        # Try to get from cache
+        cache = self.cacher.get(cache_key, version=1)
+
+        # Check if we have valid cached data
+        if cache and not cache.expired:
+            self.stats["hits"] += 1
+            self.log.debug(f"Cache hit for {title_id} (hits: {self.stats['hits']}, misses: {self.stats['misses']})")
+            return cache.data
+
+        # Cache miss or expired, try to fetch fresh data
+        self.stats["misses"] += 1
+        self.log.debug(f"Cache miss for {title_id}, fetching fresh data")
+
+        try:
+            # Attempt to fetch fresh titles
+            titles = fetch_function()
+
+            if titles:
+                # Successfully fetched, update cache
+                self.log.debug(f"Successfully fetched titles for {title_id}, updating cache")
+                cache = self.cacher.get(cache_key, version=1)
+                cache.set(titles, expiration=datetime.now() + timedelta(seconds=config.title_cache_time))
+
+            return titles
+
+        except Exception as e:
+            # API call failed, check if we have fallback cached data
+            if cache and cache.data:
+                # We have expired cached data, use it as fallback
+                current_time = datetime.now()
+                max_retention_time = cache.expiration + timedelta(
+                    seconds=config.title_cache_max_retention - config.title_cache_time
+                )
+
+                if current_time < max_retention_time:
+                    self.stats["fallbacks"] += 1
+                    self.log.warning(
+                        f"API call failed for {title_id}, using cached data as fallback "
+                        f"(fallbacks: {self.stats['fallbacks']})"
+                    )
+                    self.log.debug(f"Error was: {e}")
+
+                    # Extend cache lifetime
+                    extended_expiration = current_time + timedelta(minutes=5)
+                    if extended_expiration < max_retention_time:
+                        cache.expiration = extended_expiration
+                        cache.set(cache.data, expiration=extended_expiration)
+
+                    return cache.data
+                else:
+                    self.log.error(f"API call failed and cached data for {title_id} exceeded maximum retention time")
+
+            # Re-raise the exception if no fallback available
+            raise
+
+    def clear_all_title_cache(self):
+        """Clear all title caches for this service."""
+        cache_dir = config.directories.cache / self.service_name
+        if cache_dir.exists():
+            for cache_file in cache_dir.glob("titles_*.json"):
+                cache_file.unlink()
+                self.log.info(f"Cleared cache file: {cache_file.name}")
+
+    def get_cache_stats(self) -> dict:
+        """Get cache statistics."""
+        total = sum(self.stats.values())
+        if total > 0:
+            hit_rate = (self.stats["hits"] / total) * 100
+        else:
+            hit_rate = 0
+
+        return {
+            "hits": self.stats["hits"],
+            "misses": self.stats["misses"],
+            "fallbacks": self.stats["fallbacks"],
+            "hit_rate": f"{hit_rate:.1f}%",
+        }
+
+
+def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]:
+    """
+    Extract region identifier from proxy URL.
+
+    Args:
+        proxy_url: The proxy URL string
+
+    Returns:
+        Region identifier or None
+    """
+    if not proxy_url:
+        return None
+
+    # Try to extract region from common proxy patterns
+    # e.g., "us123.nordvpn.com", "gb-proxy.example.com"
+    import re
+
+    # Pattern for NordVPN style
+    nord_match = re.search(r"([a-z]{2})\d+\.nordvpn", proxy_url.lower())
+    if nord_match:
+        return nord_match.group(1)
+
+    # Pattern for country code at start
+    cc_match = re.search(r"([a-z]{2})[-_]", proxy_url.lower())
+    if cc_match:
+        return cc_match.group(1)
+
+    # Pattern for country code subdomain
+    subdomain_match = re.search(r"://([a-z]{2})\.", proxy_url.lower())
+    if subdomain_match:
+        return subdomain_match.group(1)
+
+    return None
+
+
+def get_account_hash(credential) -> Optional[str]:
+    """
+    Generate a hash for account identification.
+
+    Args:
+        credential: Credential object
+
+    Returns:
+        SHA1 hash of the credential or None
+    """
+    if not credential:
+        return None
+
+    # Use existing sha1 property if available
+    if hasattr(credential, "sha1"):
+        return credential.sha1
+
+    # Otherwise generate hash from username
+    if hasattr(credential, "username"):
+        return hashlib.sha1(credential.username.encode()).hexdigest()
+
+    return None
--- a/unshackle/unshackle-example.yaml
+++ b/unshackle/unshackle-example.yaml
@@ -15,6 +15,12 @@ update_checks: true
 # How often to check for updates, in hours (default: 24)
 update_check_interval: 24

+# Title caching configuration
+# Cache title metadata to reduce redundant API calls
+title_cache_enabled: true  # Enable/disable title caching globally (default: true)
+title_cache_time: 1800  # Cache duration in seconds (default: 1800 = 30 minutes)
+title_cache_max_retention: 86400  # Maximum cache retention for fallback when API fails (default: 86400 = 24 hours)
+
 # Muxing configuration
 muxing:
  set_title: false