diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 8d8caeb..9a99bfc 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -1564,6 +1564,9 @@ class dl: @staticmethod def save_cookies(path: Path, cookies: CookieJar): + if hasattr(cookies, 'jar'): + cookies = cookies.jar + cookie_jar = MozillaCookieJar(path) cookie_jar.load() for cookie in cookies: diff --git a/unshackle/core/drm/clearkey.py b/unshackle/core/drm/clearkey.py index 5652501..089fa71 100644 --- a/unshackle/core/drm/clearkey.py +++ b/unshackle/core/drm/clearkey.py @@ -8,6 +8,7 @@ from urllib.parse import urljoin from Cryptodome.Cipher import AES from Cryptodome.Util.Padding import unpad +from curl_cffi.requests import Session as CurlSession from m3u8.model import Key from requests import Session @@ -69,8 +70,8 @@ class ClearKey: """ if not isinstance(m3u_key, Key): raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}") - if not isinstance(session, (Session, type(None))): - raise TypeError(f"Expected session to be a {Session}, not a {type(session)}") + if not isinstance(session, (Session, CurlSession, type(None))): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not a {type(session)}") if not m3u_key.method.startswith("AES"): raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}") diff --git a/unshackle/core/manifests/dash.py b/unshackle/core/manifests/dash.py index 0e494ee..ec19e25 100644 --- a/unshackle/core/manifests/dash.py +++ b/unshackle/core/manifests/dash.py @@ -15,6 +15,7 @@ from uuid import UUID from zlib import crc32 import requests +from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from lxml.etree import Element, ElementTree from pyplayready.system.pssh import PSSH as PR_PSSH @@ -47,7 +48,7 @@ class DASH: self.url = url @classmethod - def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH: + def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH: if not url: raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.") if not isinstance(url, str): @@ -55,8 +56,8 @@ class DASH: if not session: session = Session() - elif not isinstance(session, Session): - raise TypeError(f"Expected session to be a {Session}, not {session!r}") + elif not isinstance(session, (Session, CurlSession)): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") res = session.get(url, **args) if res.url != url: @@ -103,6 +104,10 @@ class DASH: continue if next(iter(period.xpath("SegmentType/@value")), "content") != "content": continue + if "urn:amazon:primevideo:cachingBreadth" in [ + x.get("schemeIdUri") for x in period.findall("SupplementalProperty") + ]: + continue for adaptation_set in period.findall("AdaptationSet"): if self.is_trick_mode(adaptation_set): diff --git a/unshackle/core/manifests/hls.py b/unshackle/core/manifests/hls.py index 48a8a33..d48d96e 100644 --- a/unshackle/core/manifests/hls.py +++ b/unshackle/core/manifests/hls.py @@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union from urllib.parse import urljoin from zlib import crc32 -import httpx import m3u8 import requests +from curl_cffi.requests import Response as CurlResponse +from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from m3u8 import M3U8 from pyplayready.cdm import Cdm as PlayReadyCdm @@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u class HLS: - def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None): + def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None): if not manifest: raise ValueError("HLS manifest must be provided.") if not isinstance(manifest, M3U8): @@ -47,7 +48,7 @@ class HLS: self.session = session or Session() @classmethod - def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS: + def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS: if not url: raise requests.URLRequired("HLS manifest URL must be provided.") if not isinstance(url, str): @@ -55,22 +56,22 @@ class HLS: if not session: session = Session() - elif not isinstance(session, (Session, httpx.Client)): - raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}") + elif not isinstance(session, (Session, CurlSession)): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") res = session.get(url, **args) - # Handle both requests and httpx response objects + # Handle requests and curl_cffi response objects if isinstance(res, requests.Response): if not res.ok: raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res) content = res.text - elif isinstance(res, httpx.Response): - if res.status_code >= 400: + elif isinstance(res, CurlResponse): + if not res.ok: raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res) content = res.text else: - raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}") + raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}") master = m3u8.loads(content, uri=url) @@ -229,7 +230,7 @@ class HLS: save_path: Path, save_dir: Path, progress: partial, - session: Optional[Union[Session, httpx.Client]] = None, + session: Optional[Union[Session, CurlSession]] = None, proxy: Optional[str] = None, max_workers: Optional[int] = None, license_widevine: Optional[Callable] = None, @@ -238,15 +239,13 @@ class HLS: ) -> None: if not session: session = Session() - elif not isinstance(session, (Session, httpx.Client)): - raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}") + elif not isinstance(session, (Session, CurlSession)): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") if proxy: # Handle proxies differently based on session type if isinstance(session, Session): session.proxies.update({"all": proxy}) - elif isinstance(session, httpx.Client): - session.proxies = {"http://": proxy, "https://": proxy} log = logging.getLogger("HLS") @@ -257,13 +256,8 @@ class HLS: log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}") sys.exit(1) playlist_text = response.text - elif isinstance(response, httpx.Response): - if response.status_code >= 400: - log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}") - sys.exit(1) - playlist_text = response.text else: - raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}") + raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}") master = m3u8.loads(playlist_text, uri=track.url) @@ -533,13 +527,9 @@ class HLS: if isinstance(res, requests.Response): res.raise_for_status() init_content = res.content - elif isinstance(res, httpx.Response): - if res.status_code >= 400: - raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res) - init_content = res.content else: raise TypeError( - f"Expected response to be requests.Response or httpx.Response, not {type(res)}" + f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}" ) map_data = (segment.init_section, init_content) @@ -707,7 +697,7 @@ class HLS: @staticmethod def parse_session_data_keys( - manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None + manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None ) -> list[m3u8.model.Key]: """Parse `com.apple.hls.keys` session data and return Key objects.""" keys: list[m3u8.model.Key] = [] @@ -798,7 +788,8 @@ class HLS: @staticmethod def get_drm( - key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None + key: Union[m3u8.model.SessionKey, m3u8.model.Key], + session: Optional[Union[Session, CurlSession]] = None, ) -> DRM_T: """ Convert HLS EXT-X-KEY data to an initialized DRM object. @@ -810,8 +801,8 @@ class HLS: Raises a NotImplementedError if the key system is not supported. """ - if not isinstance(session, (Session, httpx.Client, type(None))): - raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}") + if not isinstance(session, (Session, CurlSession, type(None))): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}") if not session: session = Session() diff --git a/unshackle/core/manifests/ism.py b/unshackle/core/manifests/ism.py index b291dc6..346c9e6 100644 --- a/unshackle/core/manifests/ism.py +++ b/unshackle/core/manifests/ism.py @@ -10,6 +10,7 @@ from pathlib import Path from typing import Any, Callable, Optional, Union import requests +from curl_cffi.requests import Session as CurlSession from langcodes import Language, tag_is_valid from lxml.etree import Element from pyplayready.system.pssh import PSSH as PR_PSSH @@ -34,11 +35,13 @@ class ISM: self.url = url @classmethod - def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM": + def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM": if not url: raise requests.URLRequired("ISM manifest URL must be provided") if not session: session = Session() + elif not isinstance(session, (Session, CurlSession)): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}") res = session.get(url, **kwargs) if res.url != url: url = res.url diff --git a/unshackle/core/manifests/m3u8.py b/unshackle/core/manifests/m3u8.py index 16fad1d..761d73c 100644 --- a/unshackle/core/manifests/m3u8.py +++ b/unshackle/core/manifests/m3u8.py @@ -2,9 +2,10 @@ from __future__ import annotations -from typing import Optional +from typing import Optional, Union import m3u8 +from curl_cffi.requests import Session as CurlSession from requests import Session from unshackle.core.manifests.hls import HLS @@ -15,7 +16,7 @@ def parse( master: m3u8.M3U8, language: str, *, - session: Optional[Session] = None, + session: Optional[Union[Session, CurlSession]] = None, ) -> Tracks: """Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading.""" tracks = HLS(master, session=session).to_tracks(language) diff --git a/unshackle/core/session.py b/unshackle/core/session.py new file mode 100644 index 0000000..4cda472 --- /dev/null +++ b/unshackle/core/session.py @@ -0,0 +1,79 @@ +"""Session utilities for creating HTTP sessions with different backends.""" + +from __future__ import annotations + +import warnings + +from curl_cffi.requests import Session as CurlSession + +from unshackle.core.config import config + +# Globally suppress curl_cffi HTTPS proxy warnings since some proxy providers +# (like NordVPN) require HTTPS URLs but curl_cffi expects HTTP format +warnings.filterwarnings( + "ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning, module="curl_cffi.*" +) + + +class Session(CurlSession): + """curl_cffi Session with warning suppression.""" + + def request(self, method, url, **kwargs): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning + ) + return super().request(method, url, **kwargs) + + +def session(browser: str | None = None, **kwargs) -> Session: + """ + Create a curl_cffi session that impersonates a browser. + + This is a full replacement for requests.Session with browser impersonation + and anti-bot capabilities. The session uses curl-impersonate under the hood + to mimic real browser behavior. + + Args: + browser: Browser to impersonate (e.g. "chrome124", "firefox", "safari"). + Uses the configured default from curl_impersonate.browser if not specified. + See https://github.com/lexiforest/curl_cffi#sessions for available options. + **kwargs: Additional arguments passed to CurlSession constructor: + - headers: Additional headers (dict) + - cookies: Cookie jar or dict + - auth: HTTP basic auth tuple (username, password) + - proxies: Proxy configuration dict + - verify: SSL certificate verification (bool, default True) + - timeout: Request timeout in seconds (float or tuple) + - allow_redirects: Follow redirects (bool, default True) + - max_redirects: Maximum redirect count (int) + - cert: Client certificate (str or tuple) + + Returns: + curl_cffi.requests.Session configured with browser impersonation, common headers, + and equivalent retry behavior to requests.Session. + + Example: + from unshackle.core.session import session + + class MyService(Service): + @staticmethod + def get_session(): + return session() # Uses config default browser + """ + if browser is None: + browser = config.curl_impersonate.get("browser", "chrome124") + + session_config = { + "impersonate": browser, + "timeout": 30.0, + "allow_redirects": True, + "max_redirects": 15, + "verify": True, + } + + session_config.update(kwargs) + session_obj = Session(**session_config) + session_obj.headers.update(config.headers) + + return session_obj diff --git a/unshackle/core/tracks/track.py b/unshackle/core/tracks/track.py index 9ff4939..12c7af0 100644 --- a/unshackle/core/tracks/track.py +++ b/unshackle/core/tracks/track.py @@ -13,6 +13,7 @@ from typing import Any, Callable, Iterable, Optional, Union from uuid import UUID from zlib import crc32 +from curl_cffi.requests import Session as CurlSession from langcodes import Language from pyplayready.cdm import Cdm as PlayReadyCdm from pywidevine.cdm import Cdm as WidevineCdm @@ -585,8 +586,8 @@ class Track: raise TypeError(f"Expected url to be a {str}, not {type(url)}") if not isinstance(byte_range, (str, type(None))): raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}") - if not isinstance(session, (Session, type(None))): - raise TypeError(f"Expected session to be a {Session}, not {type(session)}") + if not isinstance(session, (Session, CurlSession, type(None))): + raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}") if not url: if self.descriptor != self.Descriptor.URL: