feat: Implement title caching system to reduce API calls

- Add configurable title caching with fallback support
- Cache titles for 30 minutes by default, with 24-hour fallback on API failures
- Add --no-cache and --reset-cache CLI flags for cache control
- Implement region-aware caching to handle geo-restricted content
- Use SHA256 hashing for cache keys to handle complex title IDs
- Add cache configuration variables to config system
- Document new caching options in example config

This caching system significantly reduces redundant API calls when debugging
or modifying CLI parameters, improving both performance and reliability.
This commit is contained in:
Andy
2025-08-06 17:08:58 +00:00
parent ead05d08ac
commit f0493292af
5 changed files with 318 additions and 1 deletions

View File

@@ -240,6 +240,8 @@ class dl:
help="Max workers/threads to download with per-track. Default depends on the downloader.",
)
@click.option("--downloads", type=int, default=1, help="Amount of tracks to download concurrently.")
@click.option("--no-cache", "no_cache", is_flag=True, default=False, help="Bypass title cache for this download.")
@click.option("--reset-cache", "reset_cache", is_flag=True, default=False, help="Clear title cache before fetching.")
@click.pass_context
def cli(ctx: click.Context, **kwargs: Any) -> dl:
return dl(ctx, **kwargs)
@@ -460,7 +462,7 @@ class dl:
self.log.info("Authenticated with Service")
with console.status("Fetching Title Metadata...", spinner="dots"):
titles = service.get_titles()
titles = service.get_titles_cached()
if not titles:
self.log.error("No titles returned, nothing to download...")
sys.exit(1)

View File

@@ -90,6 +90,10 @@ class Config:
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
self.scene_naming: bool = kwargs.get("scene_naming", True)
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
self.title_cache_enabled: bool = kwargs.get("title_cache_enabled", True)
@classmethod
def from_yaml(cls, path: Path) -> Config:
if not path.exists():

View File

@@ -21,6 +21,7 @@ from unshackle.core.constants import AnyTrack
from unshackle.core.credential import Credential
from unshackle.core.drm import DRM_T
from unshackle.core.search_result import SearchResult
from unshackle.core.title_cacher import TitleCacher, get_account_hash, get_region_from_proxy
from unshackle.core.titles import Title_T, Titles_T
from unshackle.core.tracks import Chapters, Tracks
from unshackle.core.utilities import get_ip_info
@@ -42,6 +43,12 @@ class Service(metaclass=ABCMeta):
self.session = self.get_session()
self.cache = Cacher(self.__class__.__name__)
self.title_cache = TitleCacher(self.__class__.__name__)
# Store context for cache control flags and credential
self.ctx = ctx
self.credential = None # Will be set in authenticate()
self.current_region = None # Will be set based on proxy/geolocation
if not ctx.parent or not ctx.parent.params.get("no_proxy"):
if ctx.parent:
@@ -79,6 +86,15 @@ class Service(metaclass=ABCMeta):
).decode()
}
)
# Store region from proxy
self.current_region = get_region_from_proxy(proxy)
else:
# No proxy, try to get current region
try:
ip_info = get_ip_info(self.session)
self.current_region = ip_info.get("country", "").lower() if ip_info else None
except Exception:
self.current_region = None
# Optional Abstract functions
# The following functions may be implemented by the Service.
@@ -123,6 +139,9 @@ class Service(metaclass=ABCMeta):
raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
self.session.cookies.update(cookies)
# Store credential for cache key generation
self.credential = credential
def search(self) -> Generator[SearchResult, None, None]:
"""
Search by query for titles from the Service.
@@ -187,6 +206,52 @@ class Service(metaclass=ABCMeta):
This can be useful to store information on each title that will be required like any sub-asset IDs, or such.
"""
def get_titles_cached(self, title_id: str = None) -> Titles_T:
"""
Cached wrapper around get_titles() to reduce redundant API calls.
This method checks the cache before calling get_titles() and handles
fallback to cached data when API calls fail.
Args:
title_id: Optional title ID for cache key generation.
If not provided, will try to extract from service instance.
Returns:
Titles object (Movies, Series, or Album)
"""
# Try to get title_id from service instance if not provided
if title_id is None:
# Different services store the title ID in different attributes
if hasattr(self, "title"):
title_id = self.title
elif hasattr(self, "title_id"):
title_id = self.title_id
else:
# If we can't determine title_id, just call get_titles directly
self.log.debug("Cannot determine title_id for caching, bypassing cache")
return self.get_titles()
# Get cache control flags from context
no_cache = False
reset_cache = False
if self.ctx and self.ctx.parent:
no_cache = self.ctx.parent.params.get("no_cache", False)
reset_cache = self.ctx.parent.params.get("reset_cache", False)
# Get account hash for cache key
account_hash = get_account_hash(self.credential)
# Use title cache to get titles with fallback support
return self.title_cache.get_cached_titles(
title_id=str(title_id),
fetch_function=self.get_titles,
region=self.current_region,
account_hash=account_hash,
no_cache=no_cache,
reset_cache=reset_cache,
)
@abstractmethod
def get_tracks(self, title: Title_T) -> Tracks:
"""

View File

@@ -0,0 +1,240 @@
from __future__ import annotations
import hashlib
import logging
from datetime import datetime, timedelta
from typing import Optional
from unshackle.core.cacher import Cacher
from unshackle.core.config import config
from unshackle.core.titles import Titles_T
class TitleCacher:
"""
Handles caching of Title objects to reduce redundant API calls.
This wrapper provides:
- Region-aware caching to handle geo-restricted content
- Automatic fallback to cached data when API calls fail
- Cache lifetime extension during failures
- Cache hit/miss statistics for debugging
"""
def __init__(self, service_name: str):
self.service_name = service_name
self.log = logging.getLogger(f"{service_name}.TitleCache")
self.cacher = Cacher(service_name)
self.stats = {"hits": 0, "misses": 0, "fallbacks": 0}
def _generate_cache_key(
self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None
) -> str:
"""
Generate a unique cache key for title data.
Args:
title_id: The title identifier
region: The region/proxy identifier
account_hash: Hash of account credentials (if applicable)
Returns:
A unique cache key string
"""
# Hash the title_id to handle complex IDs (URLs, dots, special chars)
# This ensures consistent length and filesystem-safe keys
title_hash = hashlib.sha256(title_id.encode()).hexdigest()[:16]
# Start with base key using hash
key_parts = ["titles", title_hash]
# Add region if available
if region:
key_parts.append(region.lower())
# Add account hash if available
if account_hash:
key_parts.append(account_hash[:8]) # Use first 8 chars of hash
# Join with underscores
cache_key = "_".join(key_parts)
# Log the mapping for debugging
self.log.debug(f"Cache key mapping: {title_id} -> {cache_key}")
return cache_key
def get_cached_titles(
self,
title_id: str,
fetch_function,
region: Optional[str] = None,
account_hash: Optional[str] = None,
no_cache: bool = False,
reset_cache: bool = False,
) -> Optional[Titles_T]:
"""
Get titles from cache or fetch from API with fallback support.
Args:
title_id: The title identifier
fetch_function: Function to call to fetch fresh titles
region: The region/proxy identifier
account_hash: Hash of account credentials
no_cache: Bypass cache completely
reset_cache: Clear cache before fetching
Returns:
Titles object (Movies, Series, or Album)
"""
# If caching is globally disabled or no_cache flag is set
if not config.title_cache_enabled or no_cache:
self.log.debug("Cache bypassed, fetching fresh titles")
return fetch_function()
# Generate cache key
cache_key = self._generate_cache_key(title_id, region, account_hash)
# If reset_cache flag is set, clear the cache entry
if reset_cache:
self.log.info(f"Clearing cache for {cache_key}")
cache_path = (config.directories.cache / self.service_name / cache_key).with_suffix(".json")
if cache_path.exists():
cache_path.unlink()
# Try to get from cache
cache = self.cacher.get(cache_key, version=1)
# Check if we have valid cached data
if cache and not cache.expired:
self.stats["hits"] += 1
self.log.debug(f"Cache hit for {title_id} (hits: {self.stats['hits']}, misses: {self.stats['misses']})")
return cache.data
# Cache miss or expired, try to fetch fresh data
self.stats["misses"] += 1
self.log.debug(f"Cache miss for {title_id}, fetching fresh data")
try:
# Attempt to fetch fresh titles
titles = fetch_function()
if titles:
# Successfully fetched, update cache
self.log.debug(f"Successfully fetched titles for {title_id}, updating cache")
cache = self.cacher.get(cache_key, version=1)
cache.set(titles, expiration=datetime.now() + timedelta(seconds=config.title_cache_time))
return titles
except Exception as e:
# API call failed, check if we have fallback cached data
if cache and cache.data:
# We have expired cached data, use it as fallback
current_time = datetime.now()
max_retention_time = cache.expiration + timedelta(
seconds=config.title_cache_max_retention - config.title_cache_time
)
if current_time < max_retention_time:
self.stats["fallbacks"] += 1
self.log.warning(
f"API call failed for {title_id}, using cached data as fallback "
f"(fallbacks: {self.stats['fallbacks']})"
)
self.log.debug(f"Error was: {e}")
# Extend cache lifetime
extended_expiration = current_time + timedelta(minutes=5)
if extended_expiration < max_retention_time:
cache.expiration = extended_expiration
cache.set(cache.data, expiration=extended_expiration)
return cache.data
else:
self.log.error(f"API call failed and cached data for {title_id} exceeded maximum retention time")
# Re-raise the exception if no fallback available
raise
def clear_all_title_cache(self):
"""Clear all title caches for this service."""
cache_dir = config.directories.cache / self.service_name
if cache_dir.exists():
for cache_file in cache_dir.glob("titles_*.json"):
cache_file.unlink()
self.log.info(f"Cleared cache file: {cache_file.name}")
def get_cache_stats(self) -> dict:
"""Get cache statistics."""
total = sum(self.stats.values())
if total > 0:
hit_rate = (self.stats["hits"] / total) * 100
else:
hit_rate = 0
return {
"hits": self.stats["hits"],
"misses": self.stats["misses"],
"fallbacks": self.stats["fallbacks"],
"hit_rate": f"{hit_rate:.1f}%",
}
def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]:
"""
Extract region identifier from proxy URL.
Args:
proxy_url: The proxy URL string
Returns:
Region identifier or None
"""
if not proxy_url:
return None
# Try to extract region from common proxy patterns
# e.g., "us123.nordvpn.com", "gb-proxy.example.com"
import re
# Pattern for NordVPN style
nord_match = re.search(r"([a-z]{2})\d+\.nordvpn", proxy_url.lower())
if nord_match:
return nord_match.group(1)
# Pattern for country code at start
cc_match = re.search(r"([a-z]{2})[-_]", proxy_url.lower())
if cc_match:
return cc_match.group(1)
# Pattern for country code subdomain
subdomain_match = re.search(r"://([a-z]{2})\.", proxy_url.lower())
if subdomain_match:
return subdomain_match.group(1)
return None
def get_account_hash(credential) -> Optional[str]:
"""
Generate a hash for account identification.
Args:
credential: Credential object
Returns:
SHA1 hash of the credential or None
"""
if not credential:
return None
# Use existing sha1 property if available
if hasattr(credential, "sha1"):
return credential.sha1
# Otherwise generate hash from username
if hasattr(credential, "username"):
return hashlib.sha1(credential.username.encode()).hexdigest()
return None

View File

@@ -15,6 +15,12 @@ update_checks: true
# How often to check for updates, in hours (default: 24)
update_check_interval: 24
# Title caching configuration
# Cache title metadata to reduce redundant API calls
title_cache_enabled: true # Enable/disable title caching globally (default: true)
title_cache_time: 1800 # Cache duration in seconds (default: 1800 = 30 minutes)
title_cache_max_retention: 86400 # Maximum cache retention for fallback when API fails (default: 86400 = 24 hours)
# Muxing configuration
muxing:
set_title: false