mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2025-10-23 15:11:08 +00:00
feat: add curl_cffi session support with browser impersonation
Add new session utility with curl_cffi support for anti-bot protection Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions Add browser impersonation support (Chrome, Firefox, Safari) Fix cookie handling compatibility between requests and curl_cffi Suppress HTTPS proxy warnings for better UX Maintain full backward compatibility with requests.Session
This commit is contained in:
@@ -1564,6 +1564,9 @@ class dl:
|
||||
|
||||
@staticmethod
|
||||
def save_cookies(path: Path, cookies: CookieJar):
|
||||
if hasattr(cookies, 'jar'):
|
||||
cookies = cookies.jar
|
||||
|
||||
cookie_jar = MozillaCookieJar(path)
|
||||
cookie_jar.load()
|
||||
for cookie in cookies:
|
||||
|
||||
@@ -8,6 +8,7 @@ from urllib.parse import urljoin
|
||||
|
||||
from Cryptodome.Cipher import AES
|
||||
from Cryptodome.Util.Padding import unpad
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from m3u8.model import Key
|
||||
from requests import Session
|
||||
|
||||
@@ -69,8 +70,8 @@ class ClearKey:
|
||||
"""
|
||||
if not isinstance(m3u_key, Key):
|
||||
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
|
||||
if not isinstance(session, (Session, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session}, not a {type(session)}")
|
||||
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not a {type(session)}")
|
||||
|
||||
if not m3u_key.method.startswith("AES"):
|
||||
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")
|
||||
|
||||
@@ -15,6 +15,7 @@ from uuid import UUID
|
||||
from zlib import crc32
|
||||
|
||||
import requests
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element, ElementTree
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
@@ -47,7 +48,7 @@ class DASH:
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH:
|
||||
if not url:
|
||||
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
||||
if not isinstance(url, str):
|
||||
@@ -55,8 +56,8 @@ class DASH:
|
||||
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, Session):
|
||||
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
|
||||
res = session.get(url, **args)
|
||||
if res.url != url:
|
||||
@@ -103,6 +104,10 @@ class DASH:
|
||||
continue
|
||||
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
|
||||
continue
|
||||
if "urn:amazon:primevideo:cachingBreadth" in [
|
||||
x.get("schemeIdUri") for x in period.findall("SupplementalProperty")
|
||||
]:
|
||||
continue
|
||||
|
||||
for adaptation_set in period.findall("AdaptationSet"):
|
||||
if self.is_trick_mode(adaptation_set):
|
||||
|
||||
@@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union
|
||||
from urllib.parse import urljoin
|
||||
from zlib import crc32
|
||||
|
||||
import httpx
|
||||
import m3u8
|
||||
import requests
|
||||
from curl_cffi.requests import Response as CurlResponse
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from m3u8 import M3U8
|
||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||
@@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u
|
||||
|
||||
|
||||
class HLS:
|
||||
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
|
||||
def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None):
|
||||
if not manifest:
|
||||
raise ValueError("HLS manifest must be provided.")
|
||||
if not isinstance(manifest, M3U8):
|
||||
@@ -47,7 +48,7 @@ class HLS:
|
||||
self.session = session or Session()
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS:
|
||||
if not url:
|
||||
raise requests.URLRequired("HLS manifest URL must be provided.")
|
||||
if not isinstance(url, str):
|
||||
@@ -55,22 +56,22 @@ class HLS:
|
||||
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, httpx.Client)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
|
||||
res = session.get(url, **args)
|
||||
|
||||
# Handle both requests and httpx response objects
|
||||
# Handle requests and curl_cffi response objects
|
||||
if isinstance(res, requests.Response):
|
||||
if not res.ok:
|
||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||
content = res.text
|
||||
elif isinstance(res, httpx.Response):
|
||||
if res.status_code >= 400:
|
||||
elif isinstance(res, CurlResponse):
|
||||
if not res.ok:
|
||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||
content = res.text
|
||||
else:
|
||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
|
||||
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}")
|
||||
|
||||
master = m3u8.loads(content, uri=url)
|
||||
|
||||
@@ -229,7 +230,7 @@ class HLS:
|
||||
save_path: Path,
|
||||
save_dir: Path,
|
||||
progress: partial,
|
||||
session: Optional[Union[Session, httpx.Client]] = None,
|
||||
session: Optional[Union[Session, CurlSession]] = None,
|
||||
proxy: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
license_widevine: Optional[Callable] = None,
|
||||
@@ -238,15 +239,13 @@ class HLS:
|
||||
) -> None:
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, httpx.Client)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
|
||||
if proxy:
|
||||
# Handle proxies differently based on session type
|
||||
if isinstance(session, Session):
|
||||
session.proxies.update({"all": proxy})
|
||||
elif isinstance(session, httpx.Client):
|
||||
session.proxies = {"http://": proxy, "https://": proxy}
|
||||
|
||||
log = logging.getLogger("HLS")
|
||||
|
||||
@@ -257,13 +256,8 @@ class HLS:
|
||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||
sys.exit(1)
|
||||
playlist_text = response.text
|
||||
elif isinstance(response, httpx.Response):
|
||||
if response.status_code >= 400:
|
||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||
sys.exit(1)
|
||||
playlist_text = response.text
|
||||
else:
|
||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
|
||||
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}")
|
||||
|
||||
master = m3u8.loads(playlist_text, uri=track.url)
|
||||
|
||||
@@ -533,13 +527,9 @@ class HLS:
|
||||
if isinstance(res, requests.Response):
|
||||
res.raise_for_status()
|
||||
init_content = res.content
|
||||
elif isinstance(res, httpx.Response):
|
||||
if res.status_code >= 400:
|
||||
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
|
||||
init_content = res.content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
|
||||
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
|
||||
)
|
||||
|
||||
map_data = (segment.init_section, init_content)
|
||||
@@ -707,7 +697,7 @@ class HLS:
|
||||
|
||||
@staticmethod
|
||||
def parse_session_data_keys(
|
||||
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
|
||||
manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None
|
||||
) -> list[m3u8.model.Key]:
|
||||
"""Parse `com.apple.hls.keys` session data and return Key objects."""
|
||||
keys: list[m3u8.model.Key] = []
|
||||
@@ -798,7 +788,8 @@ class HLS:
|
||||
|
||||
@staticmethod
|
||||
def get_drm(
|
||||
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
|
||||
key: Union[m3u8.model.SessionKey, m3u8.model.Key],
|
||||
session: Optional[Union[Session, CurlSession]] = None,
|
||||
) -> DRM_T:
|
||||
"""
|
||||
Convert HLS EXT-X-KEY data to an initialized DRM object.
|
||||
@@ -810,8 +801,8 @@ class HLS:
|
||||
|
||||
Raises a NotImplementedError if the key system is not supported.
|
||||
"""
|
||||
if not isinstance(session, (Session, httpx.Client, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
|
||||
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
|
||||
if not session:
|
||||
session = Session()
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import requests
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
@@ -34,11 +35,13 @@ class ISM:
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
|
||||
if not url:
|
||||
raise requests.URLRequired("ISM manifest URL must be provided")
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
res = session.get(url, **kwargs)
|
||||
if res.url != url:
|
||||
url = res.url
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
from typing import Optional, Union
|
||||
|
||||
import m3u8
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from requests import Session
|
||||
|
||||
from unshackle.core.manifests.hls import HLS
|
||||
@@ -15,7 +16,7 @@ def parse(
|
||||
master: m3u8.M3U8,
|
||||
language: str,
|
||||
*,
|
||||
session: Optional[Session] = None,
|
||||
session: Optional[Union[Session, CurlSession]] = None,
|
||||
) -> Tracks:
|
||||
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
|
||||
tracks = HLS(master, session=session).to_tracks(language)
|
||||
|
||||
79
unshackle/core/session.py
Normal file
79
unshackle/core/session.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Session utilities for creating HTTP sessions with different backends."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
|
||||
from unshackle.core.config import config
|
||||
|
||||
# Globally suppress curl_cffi HTTPS proxy warnings since some proxy providers
|
||||
# (like NordVPN) require HTTPS URLs but curl_cffi expects HTTP format
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning, module="curl_cffi.*"
|
||||
)
|
||||
|
||||
|
||||
class Session(CurlSession):
|
||||
"""curl_cffi Session with warning suppression."""
|
||||
|
||||
def request(self, method, url, **kwargs):
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning
|
||||
)
|
||||
return super().request(method, url, **kwargs)
|
||||
|
||||
|
||||
def session(browser: str | None = None, **kwargs) -> Session:
|
||||
"""
|
||||
Create a curl_cffi session that impersonates a browser.
|
||||
|
||||
This is a full replacement for requests.Session with browser impersonation
|
||||
and anti-bot capabilities. The session uses curl-impersonate under the hood
|
||||
to mimic real browser behavior.
|
||||
|
||||
Args:
|
||||
browser: Browser to impersonate (e.g. "chrome124", "firefox", "safari").
|
||||
Uses the configured default from curl_impersonate.browser if not specified.
|
||||
See https://github.com/lexiforest/curl_cffi#sessions for available options.
|
||||
**kwargs: Additional arguments passed to CurlSession constructor:
|
||||
- headers: Additional headers (dict)
|
||||
- cookies: Cookie jar or dict
|
||||
- auth: HTTP basic auth tuple (username, password)
|
||||
- proxies: Proxy configuration dict
|
||||
- verify: SSL certificate verification (bool, default True)
|
||||
- timeout: Request timeout in seconds (float or tuple)
|
||||
- allow_redirects: Follow redirects (bool, default True)
|
||||
- max_redirects: Maximum redirect count (int)
|
||||
- cert: Client certificate (str or tuple)
|
||||
|
||||
Returns:
|
||||
curl_cffi.requests.Session configured with browser impersonation, common headers,
|
||||
and equivalent retry behavior to requests.Session.
|
||||
|
||||
Example:
|
||||
from unshackle.core.session import session
|
||||
|
||||
class MyService(Service):
|
||||
@staticmethod
|
||||
def get_session():
|
||||
return session() # Uses config default browser
|
||||
"""
|
||||
if browser is None:
|
||||
browser = config.curl_impersonate.get("browser", "chrome124")
|
||||
|
||||
session_config = {
|
||||
"impersonate": browser,
|
||||
"timeout": 30.0,
|
||||
"allow_redirects": True,
|
||||
"max_redirects": 15,
|
||||
"verify": True,
|
||||
}
|
||||
|
||||
session_config.update(kwargs)
|
||||
session_obj = Session(**session_config)
|
||||
session_obj.headers.update(config.headers)
|
||||
|
||||
return session_obj
|
||||
@@ -13,6 +13,7 @@ from typing import Any, Callable, Iterable, Optional, Union
|
||||
from uuid import UUID
|
||||
from zlib import crc32
|
||||
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language
|
||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||
from pywidevine.cdm import Cdm as WidevineCdm
|
||||
@@ -585,8 +586,8 @@ class Track:
|
||||
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
|
||||
if not isinstance(byte_range, (str, type(None))):
|
||||
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
|
||||
if not isinstance(session, (Session, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session}, not {type(session)}")
|
||||
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
|
||||
|
||||
if not url:
|
||||
if self.descriptor != self.Descriptor.URL:
|
||||
|
||||
Reference in New Issue
Block a user