feat: add curl_cffi session support with browser impersonation

Add new session utility with curl_cffi support for anti-bot protection
Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions
Add browser impersonation support (Chrome, Firefox, Safari)
Fix cookie handling compatibility between requests and curl_cffi
Suppress HTTPS proxy warnings for better UX
Maintain full backward compatibility with requests.Session
This commit is contained in:
Andy
2025-09-25 06:27:14 +00:00
parent 63b7a49c1a
commit 35efdbff6d
8 changed files with 123 additions and 39 deletions

View File

@@ -15,6 +15,7 @@ from uuid import UUID
from zlib import crc32
import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from lxml.etree import Element, ElementTree
from pyplayready.system.pssh import PSSH as PR_PSSH
@@ -47,7 +48,7 @@ class DASH:
self.url = url
@classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH:
if not url:
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
if not isinstance(url, str):
@@ -55,8 +56,8 @@ class DASH:
if not session:
session = Session()
elif not isinstance(session, Session):
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **args)
if res.url != url:
@@ -103,6 +104,10 @@ class DASH:
continue
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
continue
if "urn:amazon:primevideo:cachingBreadth" in [
x.get("schemeIdUri") for x in period.findall("SupplementalProperty")
]:
continue
for adaptation_set in period.findall("AdaptationSet"):
if self.is_trick_mode(adaptation_set):

View File

@@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin
from zlib import crc32
import httpx
import m3u8
import requests
from curl_cffi.requests import Response as CurlResponse
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from m3u8 import M3U8
from pyplayready.cdm import Cdm as PlayReadyCdm
@@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u
class HLS:
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None):
if not manifest:
raise ValueError("HLS manifest must be provided.")
if not isinstance(manifest, M3U8):
@@ -47,7 +48,7 @@ class HLS:
self.session = session or Session()
@classmethod
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS:
if not url:
raise requests.URLRequired("HLS manifest URL must be provided.")
if not isinstance(url, str):
@@ -55,22 +56,22 @@ class HLS:
if not session:
session = Session()
elif not isinstance(session, (Session, httpx.Client)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **args)
# Handle both requests and httpx response objects
# Handle requests and curl_cffi response objects
if isinstance(res, requests.Response):
if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
elif isinstance(res, CurlResponse):
if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text
else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}")
master = m3u8.loads(content, uri=url)
@@ -229,7 +230,7 @@ class HLS:
save_path: Path,
save_dir: Path,
progress: partial,
session: Optional[Union[Session, httpx.Client]] = None,
session: Optional[Union[Session, CurlSession]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
license_widevine: Optional[Callable] = None,
@@ -238,15 +239,13 @@ class HLS:
) -> None:
if not session:
session = Session()
elif not isinstance(session, (Session, httpx.Client)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
if proxy:
# Handle proxies differently based on session type
if isinstance(session, Session):
session.proxies.update({"all": proxy})
elif isinstance(session, httpx.Client):
session.proxies = {"http://": proxy, "https://": proxy}
log = logging.getLogger("HLS")
@@ -257,13 +256,8 @@ class HLS:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
elif isinstance(response, httpx.Response):
if response.status_code >= 400:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}")
master = m3u8.loads(playlist_text, uri=track.url)
@@ -533,13 +527,9 @@ class HLS:
if isinstance(res, requests.Response):
res.raise_for_status()
init_content = res.content
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
init_content = res.content
else:
raise TypeError(
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
)
map_data = (segment.init_section, init_content)
@@ -707,7 +697,7 @@ class HLS:
@staticmethod
def parse_session_data_keys(
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None
) -> list[m3u8.model.Key]:
"""Parse `com.apple.hls.keys` session data and return Key objects."""
keys: list[m3u8.model.Key] = []
@@ -798,7 +788,8 @@ class HLS:
@staticmethod
def get_drm(
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
key: Union[m3u8.model.SessionKey, m3u8.model.Key],
session: Optional[Union[Session, CurlSession]] = None,
) -> DRM_T:
"""
Convert HLS EXT-X-KEY data to an initialized DRM object.
@@ -810,8 +801,8 @@ class HLS:
Raises a NotImplementedError if the key system is not supported.
"""
if not isinstance(session, (Session, httpx.Client, type(None))):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
if not isinstance(session, (Session, CurlSession, type(None))):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
if not session:
session = Session()

View File

@@ -10,6 +10,7 @@ from pathlib import Path
from typing import Any, Callable, Optional, Union
import requests
from curl_cffi.requests import Session as CurlSession
from langcodes import Language, tag_is_valid
from lxml.etree import Element
from pyplayready.system.pssh import PSSH as PR_PSSH
@@ -34,11 +35,13 @@ class ISM:
self.url = url
@classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
if not url:
raise requests.URLRequired("ISM manifest URL must be provided")
if not session:
session = Session()
elif not isinstance(session, (Session, CurlSession)):
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
res = session.get(url, **kwargs)
if res.url != url:
url = res.url

View File

@@ -2,9 +2,10 @@
from __future__ import annotations
from typing import Optional
from typing import Optional, Union
import m3u8
from curl_cffi.requests import Session as CurlSession
from requests import Session
from unshackle.core.manifests.hls import HLS
@@ -15,7 +16,7 @@ def parse(
master: m3u8.M3U8,
language: str,
*,
session: Optional[Session] = None,
session: Optional[Union[Session, CurlSession]] = None,
) -> Tracks:
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
tracks = HLS(master, session=session).to_tracks(language)