mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2025-10-23 15:11:08 +00:00
feat: Implement title caching system to reduce API calls
- Add configurable title caching with fallback support - Cache titles for 30 minutes by default, with 24-hour fallback on API failures - Add --no-cache and --reset-cache CLI flags for cache control - Implement region-aware caching to handle geo-restricted content - Use SHA256 hashing for cache keys to handle complex title IDs - Add cache configuration variables to config system - Document new caching options in example config This caching system significantly reduces redundant API calls when debugging or modifying CLI parameters, improving both performance and reliability.
This commit is contained in:
@@ -240,6 +240,8 @@ class dl:
|
|||||||
help="Max workers/threads to download with per-track. Default depends on the downloader.",
|
help="Max workers/threads to download with per-track. Default depends on the downloader.",
|
||||||
)
|
)
|
||||||
@click.option("--downloads", type=int, default=1, help="Amount of tracks to download concurrently.")
|
@click.option("--downloads", type=int, default=1, help="Amount of tracks to download concurrently.")
|
||||||
|
@click.option("--no-cache", "no_cache", is_flag=True, default=False, help="Bypass title cache for this download.")
|
||||||
|
@click.option("--reset-cache", "reset_cache", is_flag=True, default=False, help="Clear title cache before fetching.")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def cli(ctx: click.Context, **kwargs: Any) -> dl:
|
def cli(ctx: click.Context, **kwargs: Any) -> dl:
|
||||||
return dl(ctx, **kwargs)
|
return dl(ctx, **kwargs)
|
||||||
@@ -460,7 +462,7 @@ class dl:
|
|||||||
self.log.info("Authenticated with Service")
|
self.log.info("Authenticated with Service")
|
||||||
|
|
||||||
with console.status("Fetching Title Metadata...", spinner="dots"):
|
with console.status("Fetching Title Metadata...", spinner="dots"):
|
||||||
titles = service.get_titles()
|
titles = service.get_titles_cached()
|
||||||
if not titles:
|
if not titles:
|
||||||
self.log.error("No titles returned, nothing to download...")
|
self.log.error("No titles returned, nothing to download...")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -90,6 +90,10 @@ class Config:
|
|||||||
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
self.update_check_interval: int = kwargs.get("update_check_interval", 24)
|
||||||
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
self.scene_naming: bool = kwargs.get("scene_naming", True)
|
||||||
|
|
||||||
|
self.title_cache_time: int = kwargs.get("title_cache_time", 1800) # 30 minutes default
|
||||||
|
self.title_cache_max_retention: int = kwargs.get("title_cache_max_retention", 86400) # 24 hours default
|
||||||
|
self.title_cache_enabled: bool = kwargs.get("title_cache_enabled", True)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml(cls, path: Path) -> Config:
|
def from_yaml(cls, path: Path) -> Config:
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ from unshackle.core.constants import AnyTrack
|
|||||||
from unshackle.core.credential import Credential
|
from unshackle.core.credential import Credential
|
||||||
from unshackle.core.drm import DRM_T
|
from unshackle.core.drm import DRM_T
|
||||||
from unshackle.core.search_result import SearchResult
|
from unshackle.core.search_result import SearchResult
|
||||||
|
from unshackle.core.title_cacher import TitleCacher, get_account_hash, get_region_from_proxy
|
||||||
from unshackle.core.titles import Title_T, Titles_T
|
from unshackle.core.titles import Title_T, Titles_T
|
||||||
from unshackle.core.tracks import Chapters, Tracks
|
from unshackle.core.tracks import Chapters, Tracks
|
||||||
from unshackle.core.utilities import get_ip_info
|
from unshackle.core.utilities import get_ip_info
|
||||||
@@ -42,6 +43,12 @@ class Service(metaclass=ABCMeta):
|
|||||||
|
|
||||||
self.session = self.get_session()
|
self.session = self.get_session()
|
||||||
self.cache = Cacher(self.__class__.__name__)
|
self.cache = Cacher(self.__class__.__name__)
|
||||||
|
self.title_cache = TitleCacher(self.__class__.__name__)
|
||||||
|
|
||||||
|
# Store context for cache control flags and credential
|
||||||
|
self.ctx = ctx
|
||||||
|
self.credential = None # Will be set in authenticate()
|
||||||
|
self.current_region = None # Will be set based on proxy/geolocation
|
||||||
|
|
||||||
if not ctx.parent or not ctx.parent.params.get("no_proxy"):
|
if not ctx.parent or not ctx.parent.params.get("no_proxy"):
|
||||||
if ctx.parent:
|
if ctx.parent:
|
||||||
@@ -79,6 +86,15 @@ class Service(metaclass=ABCMeta):
|
|||||||
).decode()
|
).decode()
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Store region from proxy
|
||||||
|
self.current_region = get_region_from_proxy(proxy)
|
||||||
|
else:
|
||||||
|
# No proxy, try to get current region
|
||||||
|
try:
|
||||||
|
ip_info = get_ip_info(self.session)
|
||||||
|
self.current_region = ip_info.get("country", "").lower() if ip_info else None
|
||||||
|
except Exception:
|
||||||
|
self.current_region = None
|
||||||
|
|
||||||
# Optional Abstract functions
|
# Optional Abstract functions
|
||||||
# The following functions may be implemented by the Service.
|
# The following functions may be implemented by the Service.
|
||||||
@@ -123,6 +139,9 @@ class Service(metaclass=ABCMeta):
|
|||||||
raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
|
raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
|
||||||
self.session.cookies.update(cookies)
|
self.session.cookies.update(cookies)
|
||||||
|
|
||||||
|
# Store credential for cache key generation
|
||||||
|
self.credential = credential
|
||||||
|
|
||||||
def search(self) -> Generator[SearchResult, None, None]:
|
def search(self) -> Generator[SearchResult, None, None]:
|
||||||
"""
|
"""
|
||||||
Search by query for titles from the Service.
|
Search by query for titles from the Service.
|
||||||
@@ -187,6 +206,52 @@ class Service(metaclass=ABCMeta):
|
|||||||
This can be useful to store information on each title that will be required like any sub-asset IDs, or such.
|
This can be useful to store information on each title that will be required like any sub-asset IDs, or such.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def get_titles_cached(self, title_id: str = None) -> Titles_T:
|
||||||
|
"""
|
||||||
|
Cached wrapper around get_titles() to reduce redundant API calls.
|
||||||
|
|
||||||
|
This method checks the cache before calling get_titles() and handles
|
||||||
|
fallback to cached data when API calls fail.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title_id: Optional title ID for cache key generation.
|
||||||
|
If not provided, will try to extract from service instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Titles object (Movies, Series, or Album)
|
||||||
|
"""
|
||||||
|
# Try to get title_id from service instance if not provided
|
||||||
|
if title_id is None:
|
||||||
|
# Different services store the title ID in different attributes
|
||||||
|
if hasattr(self, "title"):
|
||||||
|
title_id = self.title
|
||||||
|
elif hasattr(self, "title_id"):
|
||||||
|
title_id = self.title_id
|
||||||
|
else:
|
||||||
|
# If we can't determine title_id, just call get_titles directly
|
||||||
|
self.log.debug("Cannot determine title_id for caching, bypassing cache")
|
||||||
|
return self.get_titles()
|
||||||
|
|
||||||
|
# Get cache control flags from context
|
||||||
|
no_cache = False
|
||||||
|
reset_cache = False
|
||||||
|
if self.ctx and self.ctx.parent:
|
||||||
|
no_cache = self.ctx.parent.params.get("no_cache", False)
|
||||||
|
reset_cache = self.ctx.parent.params.get("reset_cache", False)
|
||||||
|
|
||||||
|
# Get account hash for cache key
|
||||||
|
account_hash = get_account_hash(self.credential)
|
||||||
|
|
||||||
|
# Use title cache to get titles with fallback support
|
||||||
|
return self.title_cache.get_cached_titles(
|
||||||
|
title_id=str(title_id),
|
||||||
|
fetch_function=self.get_titles,
|
||||||
|
region=self.current_region,
|
||||||
|
account_hash=account_hash,
|
||||||
|
no_cache=no_cache,
|
||||||
|
reset_cache=reset_cache,
|
||||||
|
)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_tracks(self, title: Title_T) -> Tracks:
|
def get_tracks(self, title: Title_T) -> Tracks:
|
||||||
"""
|
"""
|
||||||
|
|||||||
240
unshackle/core/title_cacher.py
Normal file
240
unshackle/core/title_cacher.py
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from unshackle.core.cacher import Cacher
|
||||||
|
from unshackle.core.config import config
|
||||||
|
from unshackle.core.titles import Titles_T
|
||||||
|
|
||||||
|
|
||||||
|
class TitleCacher:
|
||||||
|
"""
|
||||||
|
Handles caching of Title objects to reduce redundant API calls.
|
||||||
|
|
||||||
|
This wrapper provides:
|
||||||
|
- Region-aware caching to handle geo-restricted content
|
||||||
|
- Automatic fallback to cached data when API calls fail
|
||||||
|
- Cache lifetime extension during failures
|
||||||
|
- Cache hit/miss statistics for debugging
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, service_name: str):
|
||||||
|
self.service_name = service_name
|
||||||
|
self.log = logging.getLogger(f"{service_name}.TitleCache")
|
||||||
|
self.cacher = Cacher(service_name)
|
||||||
|
self.stats = {"hits": 0, "misses": 0, "fallbacks": 0}
|
||||||
|
|
||||||
|
def _generate_cache_key(
|
||||||
|
self, title_id: str, region: Optional[str] = None, account_hash: Optional[str] = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Generate a unique cache key for title data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title_id: The title identifier
|
||||||
|
region: The region/proxy identifier
|
||||||
|
account_hash: Hash of account credentials (if applicable)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A unique cache key string
|
||||||
|
"""
|
||||||
|
# Hash the title_id to handle complex IDs (URLs, dots, special chars)
|
||||||
|
# This ensures consistent length and filesystem-safe keys
|
||||||
|
title_hash = hashlib.sha256(title_id.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
# Start with base key using hash
|
||||||
|
key_parts = ["titles", title_hash]
|
||||||
|
|
||||||
|
# Add region if available
|
||||||
|
if region:
|
||||||
|
key_parts.append(region.lower())
|
||||||
|
|
||||||
|
# Add account hash if available
|
||||||
|
if account_hash:
|
||||||
|
key_parts.append(account_hash[:8]) # Use first 8 chars of hash
|
||||||
|
|
||||||
|
# Join with underscores
|
||||||
|
cache_key = "_".join(key_parts)
|
||||||
|
|
||||||
|
# Log the mapping for debugging
|
||||||
|
self.log.debug(f"Cache key mapping: {title_id} -> {cache_key}")
|
||||||
|
|
||||||
|
return cache_key
|
||||||
|
|
||||||
|
def get_cached_titles(
|
||||||
|
self,
|
||||||
|
title_id: str,
|
||||||
|
fetch_function,
|
||||||
|
region: Optional[str] = None,
|
||||||
|
account_hash: Optional[str] = None,
|
||||||
|
no_cache: bool = False,
|
||||||
|
reset_cache: bool = False,
|
||||||
|
) -> Optional[Titles_T]:
|
||||||
|
"""
|
||||||
|
Get titles from cache or fetch from API with fallback support.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title_id: The title identifier
|
||||||
|
fetch_function: Function to call to fetch fresh titles
|
||||||
|
region: The region/proxy identifier
|
||||||
|
account_hash: Hash of account credentials
|
||||||
|
no_cache: Bypass cache completely
|
||||||
|
reset_cache: Clear cache before fetching
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Titles object (Movies, Series, or Album)
|
||||||
|
"""
|
||||||
|
# If caching is globally disabled or no_cache flag is set
|
||||||
|
if not config.title_cache_enabled or no_cache:
|
||||||
|
self.log.debug("Cache bypassed, fetching fresh titles")
|
||||||
|
return fetch_function()
|
||||||
|
|
||||||
|
# Generate cache key
|
||||||
|
cache_key = self._generate_cache_key(title_id, region, account_hash)
|
||||||
|
|
||||||
|
# If reset_cache flag is set, clear the cache entry
|
||||||
|
if reset_cache:
|
||||||
|
self.log.info(f"Clearing cache for {cache_key}")
|
||||||
|
cache_path = (config.directories.cache / self.service_name / cache_key).with_suffix(".json")
|
||||||
|
if cache_path.exists():
|
||||||
|
cache_path.unlink()
|
||||||
|
|
||||||
|
# Try to get from cache
|
||||||
|
cache = self.cacher.get(cache_key, version=1)
|
||||||
|
|
||||||
|
# Check if we have valid cached data
|
||||||
|
if cache and not cache.expired:
|
||||||
|
self.stats["hits"] += 1
|
||||||
|
self.log.debug(f"Cache hit for {title_id} (hits: {self.stats['hits']}, misses: {self.stats['misses']})")
|
||||||
|
return cache.data
|
||||||
|
|
||||||
|
# Cache miss or expired, try to fetch fresh data
|
||||||
|
self.stats["misses"] += 1
|
||||||
|
self.log.debug(f"Cache miss for {title_id}, fetching fresh data")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Attempt to fetch fresh titles
|
||||||
|
titles = fetch_function()
|
||||||
|
|
||||||
|
if titles:
|
||||||
|
# Successfully fetched, update cache
|
||||||
|
self.log.debug(f"Successfully fetched titles for {title_id}, updating cache")
|
||||||
|
cache = self.cacher.get(cache_key, version=1)
|
||||||
|
cache.set(titles, expiration=datetime.now() + timedelta(seconds=config.title_cache_time))
|
||||||
|
|
||||||
|
return titles
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# API call failed, check if we have fallback cached data
|
||||||
|
if cache and cache.data:
|
||||||
|
# We have expired cached data, use it as fallback
|
||||||
|
current_time = datetime.now()
|
||||||
|
max_retention_time = cache.expiration + timedelta(
|
||||||
|
seconds=config.title_cache_max_retention - config.title_cache_time
|
||||||
|
)
|
||||||
|
|
||||||
|
if current_time < max_retention_time:
|
||||||
|
self.stats["fallbacks"] += 1
|
||||||
|
self.log.warning(
|
||||||
|
f"API call failed for {title_id}, using cached data as fallback "
|
||||||
|
f"(fallbacks: {self.stats['fallbacks']})"
|
||||||
|
)
|
||||||
|
self.log.debug(f"Error was: {e}")
|
||||||
|
|
||||||
|
# Extend cache lifetime
|
||||||
|
extended_expiration = current_time + timedelta(minutes=5)
|
||||||
|
if extended_expiration < max_retention_time:
|
||||||
|
cache.expiration = extended_expiration
|
||||||
|
cache.set(cache.data, expiration=extended_expiration)
|
||||||
|
|
||||||
|
return cache.data
|
||||||
|
else:
|
||||||
|
self.log.error(f"API call failed and cached data for {title_id} exceeded maximum retention time")
|
||||||
|
|
||||||
|
# Re-raise the exception if no fallback available
|
||||||
|
raise
|
||||||
|
|
||||||
|
def clear_all_title_cache(self):
|
||||||
|
"""Clear all title caches for this service."""
|
||||||
|
cache_dir = config.directories.cache / self.service_name
|
||||||
|
if cache_dir.exists():
|
||||||
|
for cache_file in cache_dir.glob("titles_*.json"):
|
||||||
|
cache_file.unlink()
|
||||||
|
self.log.info(f"Cleared cache file: {cache_file.name}")
|
||||||
|
|
||||||
|
def get_cache_stats(self) -> dict:
|
||||||
|
"""Get cache statistics."""
|
||||||
|
total = sum(self.stats.values())
|
||||||
|
if total > 0:
|
||||||
|
hit_rate = (self.stats["hits"] / total) * 100
|
||||||
|
else:
|
||||||
|
hit_rate = 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"hits": self.stats["hits"],
|
||||||
|
"misses": self.stats["misses"],
|
||||||
|
"fallbacks": self.stats["fallbacks"],
|
||||||
|
"hit_rate": f"{hit_rate:.1f}%",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_region_from_proxy(proxy_url: Optional[str]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Extract region identifier from proxy URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
proxy_url: The proxy URL string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Region identifier or None
|
||||||
|
"""
|
||||||
|
if not proxy_url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Try to extract region from common proxy patterns
|
||||||
|
# e.g., "us123.nordvpn.com", "gb-proxy.example.com"
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Pattern for NordVPN style
|
||||||
|
nord_match = re.search(r"([a-z]{2})\d+\.nordvpn", proxy_url.lower())
|
||||||
|
if nord_match:
|
||||||
|
return nord_match.group(1)
|
||||||
|
|
||||||
|
# Pattern for country code at start
|
||||||
|
cc_match = re.search(r"([a-z]{2})[-_]", proxy_url.lower())
|
||||||
|
if cc_match:
|
||||||
|
return cc_match.group(1)
|
||||||
|
|
||||||
|
# Pattern for country code subdomain
|
||||||
|
subdomain_match = re.search(r"://([a-z]{2})\.", proxy_url.lower())
|
||||||
|
if subdomain_match:
|
||||||
|
return subdomain_match.group(1)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_account_hash(credential) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Generate a hash for account identification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credential: Credential object
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SHA1 hash of the credential or None
|
||||||
|
"""
|
||||||
|
if not credential:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Use existing sha1 property if available
|
||||||
|
if hasattr(credential, "sha1"):
|
||||||
|
return credential.sha1
|
||||||
|
|
||||||
|
# Otherwise generate hash from username
|
||||||
|
if hasattr(credential, "username"):
|
||||||
|
return hashlib.sha1(credential.username.encode()).hexdigest()
|
||||||
|
|
||||||
|
return None
|
||||||
@@ -15,6 +15,12 @@ update_checks: true
|
|||||||
# How often to check for updates, in hours (default: 24)
|
# How often to check for updates, in hours (default: 24)
|
||||||
update_check_interval: 24
|
update_check_interval: 24
|
||||||
|
|
||||||
|
# Title caching configuration
|
||||||
|
# Cache title metadata to reduce redundant API calls
|
||||||
|
title_cache_enabled: true # Enable/disable title caching globally (default: true)
|
||||||
|
title_cache_time: 1800 # Cache duration in seconds (default: 1800 = 30 minutes)
|
||||||
|
title_cache_max_retention: 86400 # Maximum cache retention for fallback when API fails (default: 86400 = 24 hours)
|
||||||
|
|
||||||
# Muxing configuration
|
# Muxing configuration
|
||||||
muxing:
|
muxing:
|
||||||
set_title: false
|
set_title: false
|
||||||
|
|||||||
Reference in New Issue
Block a user