feat: add curl_cffi session support with browser impersonation

Add new session utility with curl_cffi support for anti-bot protection
Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions
Add browser impersonation support (Chrome, Firefox, Safari)
Fix cookie handling compatibility between requests and curl_cffi
Suppress HTTPS proxy warnings for better UX
Maintain full backward compatibility with requests.Session
This commit is contained in:
Andy
2025-09-25 06:27:14 +00:00
parent 63b7a49c1a
commit 35efdbff6d
8 changed files with 123 additions and 39 deletions

View File

@@ -1564,6 +1564,9 @@ class dl:
@staticmethod @staticmethod
def save_cookies(path: Path, cookies: CookieJar): def save_cookies(path: Path, cookies: CookieJar):
if hasattr(cookies, 'jar'):
cookies = cookies.jar
cookie_jar = MozillaCookieJar(path) cookie_jar = MozillaCookieJar(path)
cookie_jar.load() cookie_jar.load()
for cookie in cookies: for cookie in cookies:

View File

@@ -8,6 +8,7 @@ from urllib.parse import urljoin
from Cryptodome.Cipher import AES from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import unpad from Cryptodome.Util.Padding import unpad
from curl_cffi.requests import Session as CurlSession
from m3u8.model import Key from m3u8.model import Key
from requests import Session from requests import Session
@@ -69,8 +70,8 @@ class ClearKey:
""" """
if not isinstance(m3u_key, Key): if not isinstance(m3u_key, Key):
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}") raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
if not isinstance(session, (Session, type(None))): if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session}, not a {type(session)}") raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not a {type(session)}")
if not m3u_key.method.startswith("AES"): if not m3u_key.method.startswith("AES"):
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}") raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")

View File

@@ -15,6 +15,7 @@ from uuid import UUID
from zlib import crc32 from zlib import crc32
import requests import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid from langcodes import Language, tag_is_valid
from lxml.etree import Element, ElementTree from lxml.etree import Element, ElementTree
from pyplayready.system.pssh import PSSH as PR_PSSH from pyplayready.system.pssh import PSSH as PR_PSSH
@@ -47,7 +48,7 @@ class DASH:
self.url = url self.url = url
@classmethod @classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH: def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH:
if not url: if not url:
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.") raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
if not isinstance(url, str): if not isinstance(url, str):
@@ -55,8 +56,8 @@ class DASH:
if not session: if not session:
session = Session() session = Session()
elif not isinstance(session, Session): elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session}, not {session!r}") raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **args) res = session.get(url, **args)
if res.url != url: if res.url != url:
@@ -103,6 +104,10 @@ class DASH:
continue continue
if next(iter(period.xpath("SegmentType/@value")), "content") != "content": if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
continue continue
if "urn:amazon:primevideo:cachingBreadth" in [
x.get("schemeIdUri") for x in period.findall("SupplementalProperty")
]:
continue
for adaptation_set in period.findall("AdaptationSet"): for adaptation_set in period.findall("AdaptationSet"):
if self.is_trick_mode(adaptation_set): if self.is_trick_mode(adaptation_set):

View File

@@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin from urllib.parse import urljoin
from zlib import crc32 from zlib import crc32
import httpx
import m3u8 import m3u8
import requests import requests
from curl_cffi.requests import Response as CurlResponse
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid from langcodes import Language, tag_is_valid
from m3u8 import M3U8 from m3u8 import M3U8
from pyplayready.cdm import Cdm as PlayReadyCdm from pyplayready.cdm import Cdm as PlayReadyCdm
@@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u
class HLS: class HLS:
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None): def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None):
if not manifest: if not manifest:
raise ValueError("HLS manifest must be provided.") raise ValueError("HLS manifest must be provided.")
if not isinstance(manifest, M3U8): if not isinstance(manifest, M3U8):
@@ -47,7 +48,7 @@ class HLS:
self.session = session or Session() self.session = session or Session()
@classmethod @classmethod
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS: def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS:
if not url: if not url:
raise requests.URLRequired("HLS manifest URL must be provided.") raise requests.URLRequired("HLS manifest URL must be provided.")
if not isinstance(url, str): if not isinstance(url, str):
@@ -55,22 +56,22 @@ class HLS:
if not session: if not session:
session = Session() session = Session()
elif not isinstance(session, (Session, httpx.Client)): elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}") raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **args) res = session.get(url, **args)
# Handle both requests and httpx response objects # Handle requests and curl_cffi response objects
if isinstance(res, requests.Response): if isinstance(res, requests.Response):
if not res.ok: if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res) raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text content = res.text
elif isinstance(res, httpx.Response): elif isinstance(res, CurlResponse):
if res.status_code >= 400: if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res) raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text content = res.text
else: else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}") raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}")
master = m3u8.loads(content, uri=url) master = m3u8.loads(content, uri=url)
@@ -229,7 +230,7 @@ class HLS:
save_path: Path, save_path: Path,
save_dir: Path, save_dir: Path,
progress: partial, progress: partial,
session: Optional[Union[Session, httpx.Client]] = None, session: Optional[Union[Session, CurlSession]] = None,
proxy: Optional[str] = None, proxy: Optional[str] = None,
max_workers: Optional[int] = None, max_workers: Optional[int] = None,
license_widevine: Optional[Callable] = None, license_widevine: Optional[Callable] = None,
@@ -238,15 +239,13 @@ class HLS:
) -> None: ) -> None:
if not session: if not session:
session = Session() session = Session()
elif not isinstance(session, (Session, httpx.Client)): elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}") raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
if proxy: if proxy:
# Handle proxies differently based on session type # Handle proxies differently based on session type
if isinstance(session, Session): if isinstance(session, Session):
session.proxies.update({"all": proxy}) session.proxies.update({"all": proxy})
elif isinstance(session, httpx.Client):
session.proxies = {"http://": proxy, "https://": proxy}
log = logging.getLogger("HLS") log = logging.getLogger("HLS")
@@ -257,13 +256,8 @@ class HLS:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}") log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1) sys.exit(1)
playlist_text = response.text playlist_text = response.text
elif isinstance(response, httpx.Response):
if response.status_code >= 400:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
else: else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}") raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}")
master = m3u8.loads(playlist_text, uri=track.url) master = m3u8.loads(playlist_text, uri=track.url)
@@ -533,13 +527,9 @@ class HLS:
if isinstance(res, requests.Response): if isinstance(res, requests.Response):
res.raise_for_status() res.raise_for_status()
init_content = res.content init_content = res.content
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
init_content = res.content
else: else:
raise TypeError( raise TypeError(
f"Expected response to be requests.Response or httpx.Response, not {type(res)}" f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
) )
map_data = (segment.init_section, init_content) map_data = (segment.init_section, init_content)
@@ -707,7 +697,7 @@ class HLS:
@staticmethod @staticmethod
def parse_session_data_keys( def parse_session_data_keys(
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None
) -> list[m3u8.model.Key]: ) -> list[m3u8.model.Key]:
"""Parse `com.apple.hls.keys` session data and return Key objects.""" """Parse `com.apple.hls.keys` session data and return Key objects."""
keys: list[m3u8.model.Key] = [] keys: list[m3u8.model.Key] = []
@@ -798,7 +788,8 @@ class HLS:
@staticmethod @staticmethod
def get_drm( def get_drm(
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None key: Union[m3u8.model.SessionKey, m3u8.model.Key],
session: Optional[Union[Session, CurlSession]] = None,
) -> DRM_T: ) -> DRM_T:
""" """
Convert HLS EXT-X-KEY data to an initialized DRM object. Convert HLS EXT-X-KEY data to an initialized DRM object.
@@ -810,8 +801,8 @@ class HLS:
Raises a NotImplementedError if the key system is not supported. Raises a NotImplementedError if the key system is not supported.
""" """
if not isinstance(session, (Session, httpx.Client, type(None))): if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}") raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
if not session: if not session:
session = Session() session = Session()

View File

@@ -10,6 +10,7 @@ from pathlib import Path
from typing import Any, Callable, Optional, Union from typing import Any, Callable, Optional, Union
import requests import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid from langcodes import Language, tag_is_valid
from lxml.etree import Element from lxml.etree import Element
from pyplayready.system.pssh import PSSH as PR_PSSH from pyplayready.system.pssh import PSSH as PR_PSSH
@@ -34,11 +35,13 @@ class ISM:
self.url = url self.url = url
@classmethod @classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM": def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
if not url: if not url:
raise requests.URLRequired("ISM manifest URL must be provided") raise requests.URLRequired("ISM manifest URL must be provided")
if not session: if not session:
session = Session() session = Session()
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **kwargs) res = session.get(url, **kwargs)
if res.url != url: if res.url != url:
url = res.url url = res.url

View File

@@ -2,9 +2,10 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional from typing import Optional, Union
import m3u8 import m3u8
from curl_cffi.requests import Session as CurlSession
from requests import Session from requests import Session
from unshackle.core.manifests.hls import HLS from unshackle.core.manifests.hls import HLS
@@ -15,7 +16,7 @@ def parse(
master: m3u8.M3U8, master: m3u8.M3U8,
language: str, language: str,
*, *,
session: Optional[Session] = None, session: Optional[Union[Session, CurlSession]] = None,
) -> Tracks: ) -> Tracks:
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading.""" """Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
tracks = HLS(master, session=session).to_tracks(language) tracks = HLS(master, session=session).to_tracks(language)

79
unshackle/core/session.py Normal file
View File

@@ -0,0 +1,79 @@
"""Session utilities for creating HTTP sessions with different backends."""
from __future__ import annotations
import warnings
from curl_cffi.requests import Session as CurlSession
from unshackle.core.config import config
# Globally suppress curl_cffi HTTPS proxy warnings since some proxy providers
# (like NordVPN) require HTTPS URLs but curl_cffi expects HTTP format
warnings.filterwarnings(
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning, module="curl_cffi.*"
)
class Session(CurlSession):
"""curl_cffi Session with warning suppression."""
def request(self, method, url, **kwargs):
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning
)
return super().request(method, url, **kwargs)
def session(browser: str | None = None, **kwargs) -> Session:
"""
Create a curl_cffi session that impersonates a browser.
This is a full replacement for requests.Session with browser impersonation
and anti-bot capabilities. The session uses curl-impersonate under the hood
to mimic real browser behavior.
Args:
browser: Browser to impersonate (e.g. "chrome124", "firefox", "safari").
Uses the configured default from curl_impersonate.browser if not specified.
See https://github.com/lexiforest/curl_cffi#sessions for available options.
**kwargs: Additional arguments passed to CurlSession constructor:
- headers: Additional headers (dict)
- cookies: Cookie jar or dict
- auth: HTTP basic auth tuple (username, password)
- proxies: Proxy configuration dict
- verify: SSL certificate verification (bool, default True)
- timeout: Request timeout in seconds (float or tuple)
- allow_redirects: Follow redirects (bool, default True)
- max_redirects: Maximum redirect count (int)
- cert: Client certificate (str or tuple)
Returns:
curl_cffi.requests.Session configured with browser impersonation, common headers,
and equivalent retry behavior to requests.Session.
Example:
from unshackle.core.session import session
class MyService(Service):
@staticmethod
def get_session():
return session() # Uses config default browser
"""
if browser is None:
browser = config.curl_impersonate.get("browser", "chrome124")
session_config = {
"impersonate": browser,
"timeout": 30.0,
"allow_redirects": True,
"max_redirects": 15,
"verify": True,
}
session_config.update(kwargs)
session_obj = Session(**session_config)
session_obj.headers.update(config.headers)
return session_obj

View File

@@ -13,6 +13,7 @@ from typing import Any, Callable, Iterable, Optional, Union
from uuid import UUID from uuid import UUID
from zlib import crc32 from zlib import crc32
from curl_cffi.requests import Session as CurlSession
from langcodes import Language from langcodes import Language
from pyplayready.cdm import Cdm as PlayReadyCdm from pyplayready.cdm import Cdm as PlayReadyCdm
from pywidevine.cdm import Cdm as WidevineCdm from pywidevine.cdm import Cdm as WidevineCdm
@@ -585,8 +586,8 @@ class Track:
raise TypeError(f"Expected url to be a {str}, not {type(url)}") raise TypeError(f"Expected url to be a {str}, not {type(url)}")
if not isinstance(byte_range, (str, type(None))): if not isinstance(byte_range, (str, type(None))):
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}") raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
if not isinstance(session, (Session, type(None))): if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session}, not {type(session)}") raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
if not url: if not url:
if self.descriptor != self.Descriptor.URL: if self.descriptor != self.Descriptor.URL: