feat: add curl_cffi session support with browser impersonation

Add new session utility with curl_cffi support for anti-bot protection
Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions
Add browser impersonation support (Chrome, Firefox, Safari)
Fix cookie handling compatibility between requests and curl_cffi
Suppress HTTPS proxy warnings for better UX
Maintain full backward compatibility with requests.Session
This commit is contained in:
Andy
2025-09-25 06:27:14 +00:00
parent 63b7a49c1a
commit 35efdbff6d
8 changed files with 123 additions and 39 deletions

View File

@@ -1564,6 +1564,9 @@ class dl:
@staticmethod
def save_cookies(path: Path, cookies: CookieJar):
if hasattr(cookies, 'jar'):
cookies = cookies.jar
cookie_jar = MozillaCookieJar(path)
cookie_jar.load()
for cookie in cookies:

View File

@@ -8,6 +8,7 @@ from urllib.parse import urljoin
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import unpad
from curl_cffi.requests import Session as CurlSession
from m3u8.model import Key
from requests import Session
@@ -69,8 +70,8 @@ class ClearKey:
"""
if not isinstance(m3u_key, Key):
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not a {type(session)}")
if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not a {type(session)}")
if not m3u_key.method.startswith("AES"):
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")

View File

@@ -15,6 +15,7 @@ from uuid import UUID
from zlib import crc32
import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from lxml.etree import Element, ElementTree
from pyplayready.system.pssh import PSSH as PR_PSSH
@@ -47,7 +48,7 @@ class DASH:
self.url = url
@classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH:
if not url:
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
if not isinstance(url, str):
@@ -55,8 +56,8 @@ class DASH:
if not session:
session = Session()
elif not isinstance(session, Session):
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **args)
if res.url != url:
@@ -103,6 +104,10 @@ class DASH:
continue
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
continue
if "urn:amazon:primevideo:cachingBreadth" in [
x.get("schemeIdUri") for x in period.findall("SupplementalProperty")
]:
continue
for adaptation_set in period.findall("AdaptationSet"):
if self.is_trick_mode(adaptation_set):

View File

@@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin
from zlib import crc32
import httpx
import m3u8
import requests
from curl_cffi.requests import Response as CurlResponse
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from m3u8 import M3U8
from pyplayready.cdm import Cdm as PlayReadyCdm
@@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u
class HLS:
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None):
if not manifest:
raise ValueError("HLS manifest must be provided.")
if not isinstance(manifest, M3U8):
@@ -47,7 +48,7 @@ class HLS:
self.session = session or Session()
@classmethod
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS:
if not url:
raise requests.URLRequired("HLS manifest URL must be provided.")
if not isinstance(url, str):
@@ -55,22 +56,22 @@ class HLS:
if not session:
session = Session()
elif not isinstance(session, (Session, httpx.Client)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **args)
# Handle both requests and httpx response objects
# Handle requests and curl_cffi response objects
if isinstance(res, requests.Response):
if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
elif isinstance(res, CurlResponse):
if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text
else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}")
master = m3u8.loads(content, uri=url)
@@ -229,7 +230,7 @@ class HLS:
save_path: Path,
save_dir: Path,
progress: partial,
session: Optional[Union[Session, httpx.Client]] = None,
session: Optional[Union[Session, CurlSession]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
license_widevine: Optional[Callable] = None,
@@ -238,15 +239,13 @@ class HLS:
) -> None:
if not session:
session = Session()
elif not isinstance(session, (Session, httpx.Client)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
if proxy:
# Handle proxies differently based on session type
if isinstance(session, Session):
session.proxies.update({"all": proxy})
elif isinstance(session, httpx.Client):
session.proxies = {"http://": proxy, "https://": proxy}
log = logging.getLogger("HLS")
@@ -257,13 +256,8 @@ class HLS:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
elif isinstance(response, httpx.Response):
if response.status_code >= 400:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}")
master = m3u8.loads(playlist_text, uri=track.url)
@@ -533,13 +527,9 @@ class HLS:
if isinstance(res, requests.Response):
res.raise_for_status()
init_content = res.content
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
init_content = res.content
else:
raise TypeError(
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
)
map_data = (segment.init_section, init_content)
@@ -707,7 +697,7 @@ class HLS:
@staticmethod
def parse_session_data_keys(
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None
) -> list[m3u8.model.Key]:
"""Parse `com.apple.hls.keys` session data and return Key objects."""
keys: list[m3u8.model.Key] = []
@@ -798,7 +788,8 @@ class HLS:
@staticmethod
def get_drm(
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
key: Union[m3u8.model.SessionKey, m3u8.model.Key],
session: Optional[Union[Session, CurlSession]] = None,
) -> DRM_T:
"""
Convert HLS EXT-X-KEY data to an initialized DRM object.
@@ -810,8 +801,8 @@ class HLS:
Raises a NotImplementedError if the key system is not supported.
"""
if not isinstance(session, (Session, httpx.Client, type(None))):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
if not session:
session = Session()

View File

@@ -10,6 +10,7 @@ from pathlib import Path
from typing import Any, Callable, Optional, Union
import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from lxml.etree import Element
from pyplayready.system.pssh import PSSH as PR_PSSH
@@ -34,11 +35,13 @@ class ISM:
self.url = url
@classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
if not url:
raise requests.URLRequired("ISM manifest URL must be provided")
if not session:
session = Session()
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **kwargs)
if res.url != url:
url = res.url

View File

@@ -2,9 +2,10 @@
from __future__ import annotations
from typing import Optional
from typing import Optional, Union
import m3u8
from curl_cffi.requests import Session as CurlSession
from requests import Session
from unshackle.core.manifests.hls import HLS
@@ -15,7 +16,7 @@ def parse(
master: m3u8.M3U8,
language: str,
*,
session: Optional[Session] = None,
session: Optional[Union[Session, CurlSession]] = None,
) -> Tracks:
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
tracks = HLS(master, session=session).to_tracks(language)

79
unshackle/core/session.py Normal file
View File

@@ -0,0 +1,79 @@
"""Session utilities for creating HTTP sessions with different backends."""
from __future__ import annotations
import warnings
from curl_cffi.requests import Session as CurlSession
from unshackle.core.config import config
# Globally suppress curl_cffi HTTPS proxy warnings since some proxy providers
# (like NordVPN) require HTTPS URLs but curl_cffi expects HTTP format
warnings.filterwarnings(
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning, module="curl_cffi.*"
)
class Session(CurlSession):
"""curl_cffi Session with warning suppression."""
def request(self, method, url, **kwargs):
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning
)
return super().request(method, url, **kwargs)
def session(browser: str | None = None, **kwargs) -> Session:
"""
Create a curl_cffi session that impersonates a browser.
This is a full replacement for requests.Session with browser impersonation
and anti-bot capabilities. The session uses curl-impersonate under the hood
to mimic real browser behavior.
Args:
browser: Browser to impersonate (e.g. "chrome124", "firefox", "safari").
Uses the configured default from curl_impersonate.browser if not specified.
See https://github.com/lexiforest/curl_cffi#sessions for available options.
**kwargs: Additional arguments passed to CurlSession constructor:
- headers: Additional headers (dict)
- cookies: Cookie jar or dict
- auth: HTTP basic auth tuple (username, password)
- proxies: Proxy configuration dict
- verify: SSL certificate verification (bool, default True)
- timeout: Request timeout in seconds (float or tuple)
- allow_redirects: Follow redirects (bool, default True)
- max_redirects: Maximum redirect count (int)
- cert: Client certificate (str or tuple)
Returns:
curl_cffi.requests.Session configured with browser impersonation, common headers,
and equivalent retry behavior to requests.Session.
Example:
from unshackle.core.session import session
class MyService(Service):
@staticmethod
def get_session():
return session() # Uses config default browser
"""
if browser is None:
browser = config.curl_impersonate.get("browser", "chrome124")
session_config = {
"impersonate": browser,
"timeout": 30.0,
"allow_redirects": True,
"max_redirects": 15,
"verify": True,
}
session_config.update(kwargs)
session_obj = Session(**session_config)
session_obj.headers.update(config.headers)
return session_obj

View File

@@ -13,6 +13,7 @@ from typing import Any, Callable, Iterable, Optional, Union
from uuid import UUID
from zlib import crc32
from curl_cffi.requests import Session as CurlSession
from langcodes import Language
from pyplayready.cdm import Cdm as PlayReadyCdm
from pywidevine.cdm import Cdm as WidevineCdm
@@ -585,8 +586,8 @@ class Track:
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
if not isinstance(byte_range, (str, type(None))):
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not {type(session)}")
if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
if not url:
if self.descriptor != self.Descriptor.URL: