mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2025-10-23 15:11:08 +00:00
feat: add curl_cffi session support with browser impersonation
Add new session utility with curl_cffi support for anti-bot protection Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions Add browser impersonation support (Chrome, Firefox, Safari) Fix cookie handling compatibility between requests and curl_cffi Suppress HTTPS proxy warnings for better UX Maintain full backward compatibility with requests.Session
This commit is contained in:
@@ -1564,6 +1564,9 @@ class dl:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def save_cookies(path: Path, cookies: CookieJar):
|
def save_cookies(path: Path, cookies: CookieJar):
|
||||||
|
if hasattr(cookies, 'jar'):
|
||||||
|
cookies = cookies.jar
|
||||||
|
|
||||||
cookie_jar = MozillaCookieJar(path)
|
cookie_jar = MozillaCookieJar(path)
|
||||||
cookie_jar.load()
|
cookie_jar.load()
|
||||||
for cookie in cookies:
|
for cookie in cookies:
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from urllib.parse import urljoin
|
|||||||
|
|
||||||
from Cryptodome.Cipher import AES
|
from Cryptodome.Cipher import AES
|
||||||
from Cryptodome.Util.Padding import unpad
|
from Cryptodome.Util.Padding import unpad
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from m3u8.model import Key
|
from m3u8.model import Key
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
|
||||||
@@ -69,8 +70,8 @@ class ClearKey:
|
|||||||
"""
|
"""
|
||||||
if not isinstance(m3u_key, Key):
|
if not isinstance(m3u_key, Key):
|
||||||
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
|
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
|
||||||
if not isinstance(session, (Session, type(None))):
|
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||||
raise TypeError(f"Expected session to be a {Session}, not a {type(session)}")
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not a {type(session)}")
|
||||||
|
|
||||||
if not m3u_key.method.startswith("AES"):
|
if not m3u_key.method.startswith("AES"):
|
||||||
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")
|
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from uuid import UUID
|
|||||||
from zlib import crc32
|
from zlib import crc32
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from langcodes import Language, tag_is_valid
|
from langcodes import Language, tag_is_valid
|
||||||
from lxml.etree import Element, ElementTree
|
from lxml.etree import Element, ElementTree
|
||||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||||
@@ -47,7 +48,7 @@ class DASH:
|
|||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
|
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH:
|
||||||
if not url:
|
if not url:
|
||||||
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
||||||
if not isinstance(url, str):
|
if not isinstance(url, str):
|
||||||
@@ -55,8 +56,8 @@ class DASH:
|
|||||||
|
|
||||||
if not session:
|
if not session:
|
||||||
session = Session()
|
session = Session()
|
||||||
elif not isinstance(session, Session):
|
elif not isinstance(session, (Session, CurlSession)):
|
||||||
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||||
|
|
||||||
res = session.get(url, **args)
|
res = session.get(url, **args)
|
||||||
if res.url != url:
|
if res.url != url:
|
||||||
@@ -103,6 +104,10 @@ class DASH:
|
|||||||
continue
|
continue
|
||||||
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
|
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
|
||||||
continue
|
continue
|
||||||
|
if "urn:amazon:primevideo:cachingBreadth" in [
|
||||||
|
x.get("schemeIdUri") for x in period.findall("SupplementalProperty")
|
||||||
|
]:
|
||||||
|
continue
|
||||||
|
|
||||||
for adaptation_set in period.findall("AdaptationSet"):
|
for adaptation_set in period.findall("AdaptationSet"):
|
||||||
if self.is_trick_mode(adaptation_set):
|
if self.is_trick_mode(adaptation_set):
|
||||||
|
|||||||
@@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from zlib import crc32
|
from zlib import crc32
|
||||||
|
|
||||||
import httpx
|
|
||||||
import m3u8
|
import m3u8
|
||||||
import requests
|
import requests
|
||||||
|
from curl_cffi.requests import Response as CurlResponse
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from langcodes import Language, tag_is_valid
|
from langcodes import Language, tag_is_valid
|
||||||
from m3u8 import M3U8
|
from m3u8 import M3U8
|
||||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||||
@@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u
|
|||||||
|
|
||||||
|
|
||||||
class HLS:
|
class HLS:
|
||||||
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
|
def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None):
|
||||||
if not manifest:
|
if not manifest:
|
||||||
raise ValueError("HLS manifest must be provided.")
|
raise ValueError("HLS manifest must be provided.")
|
||||||
if not isinstance(manifest, M3U8):
|
if not isinstance(manifest, M3U8):
|
||||||
@@ -47,7 +48,7 @@ class HLS:
|
|||||||
self.session = session or Session()
|
self.session = session or Session()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
|
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS:
|
||||||
if not url:
|
if not url:
|
||||||
raise requests.URLRequired("HLS manifest URL must be provided.")
|
raise requests.URLRequired("HLS manifest URL must be provided.")
|
||||||
if not isinstance(url, str):
|
if not isinstance(url, str):
|
||||||
@@ -55,22 +56,22 @@ class HLS:
|
|||||||
|
|
||||||
if not session:
|
if not session:
|
||||||
session = Session()
|
session = Session()
|
||||||
elif not isinstance(session, (Session, httpx.Client)):
|
elif not isinstance(session, (Session, CurlSession)):
|
||||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||||
|
|
||||||
res = session.get(url, **args)
|
res = session.get(url, **args)
|
||||||
|
|
||||||
# Handle both requests and httpx response objects
|
# Handle requests and curl_cffi response objects
|
||||||
if isinstance(res, requests.Response):
|
if isinstance(res, requests.Response):
|
||||||
if not res.ok:
|
if not res.ok:
|
||||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||||
content = res.text
|
content = res.text
|
||||||
elif isinstance(res, httpx.Response):
|
elif isinstance(res, CurlResponse):
|
||||||
if res.status_code >= 400:
|
if not res.ok:
|
||||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||||
content = res.text
|
content = res.text
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
|
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}")
|
||||||
|
|
||||||
master = m3u8.loads(content, uri=url)
|
master = m3u8.loads(content, uri=url)
|
||||||
|
|
||||||
@@ -229,7 +230,7 @@ class HLS:
|
|||||||
save_path: Path,
|
save_path: Path,
|
||||||
save_dir: Path,
|
save_dir: Path,
|
||||||
progress: partial,
|
progress: partial,
|
||||||
session: Optional[Union[Session, httpx.Client]] = None,
|
session: Optional[Union[Session, CurlSession]] = None,
|
||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
max_workers: Optional[int] = None,
|
max_workers: Optional[int] = None,
|
||||||
license_widevine: Optional[Callable] = None,
|
license_widevine: Optional[Callable] = None,
|
||||||
@@ -238,15 +239,13 @@ class HLS:
|
|||||||
) -> None:
|
) -> None:
|
||||||
if not session:
|
if not session:
|
||||||
session = Session()
|
session = Session()
|
||||||
elif not isinstance(session, (Session, httpx.Client)):
|
elif not isinstance(session, (Session, CurlSession)):
|
||||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||||
|
|
||||||
if proxy:
|
if proxy:
|
||||||
# Handle proxies differently based on session type
|
# Handle proxies differently based on session type
|
||||||
if isinstance(session, Session):
|
if isinstance(session, Session):
|
||||||
session.proxies.update({"all": proxy})
|
session.proxies.update({"all": proxy})
|
||||||
elif isinstance(session, httpx.Client):
|
|
||||||
session.proxies = {"http://": proxy, "https://": proxy}
|
|
||||||
|
|
||||||
log = logging.getLogger("HLS")
|
log = logging.getLogger("HLS")
|
||||||
|
|
||||||
@@ -257,13 +256,8 @@ class HLS:
|
|||||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
playlist_text = response.text
|
playlist_text = response.text
|
||||||
elif isinstance(response, httpx.Response):
|
|
||||||
if response.status_code >= 400:
|
|
||||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
|
||||||
sys.exit(1)
|
|
||||||
playlist_text = response.text
|
|
||||||
else:
|
else:
|
||||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
|
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}")
|
||||||
|
|
||||||
master = m3u8.loads(playlist_text, uri=track.url)
|
master = m3u8.loads(playlist_text, uri=track.url)
|
||||||
|
|
||||||
@@ -533,13 +527,9 @@ class HLS:
|
|||||||
if isinstance(res, requests.Response):
|
if isinstance(res, requests.Response):
|
||||||
res.raise_for_status()
|
res.raise_for_status()
|
||||||
init_content = res.content
|
init_content = res.content
|
||||||
elif isinstance(res, httpx.Response):
|
|
||||||
if res.status_code >= 400:
|
|
||||||
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
|
|
||||||
init_content = res.content
|
|
||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
|
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
map_data = (segment.init_section, init_content)
|
map_data = (segment.init_section, init_content)
|
||||||
@@ -707,7 +697,7 @@ class HLS:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def parse_session_data_keys(
|
def parse_session_data_keys(
|
||||||
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
|
manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None
|
||||||
) -> list[m3u8.model.Key]:
|
) -> list[m3u8.model.Key]:
|
||||||
"""Parse `com.apple.hls.keys` session data and return Key objects."""
|
"""Parse `com.apple.hls.keys` session data and return Key objects."""
|
||||||
keys: list[m3u8.model.Key] = []
|
keys: list[m3u8.model.Key] = []
|
||||||
@@ -798,7 +788,8 @@ class HLS:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_drm(
|
def get_drm(
|
||||||
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
|
key: Union[m3u8.model.SessionKey, m3u8.model.Key],
|
||||||
|
session: Optional[Union[Session, CurlSession]] = None,
|
||||||
) -> DRM_T:
|
) -> DRM_T:
|
||||||
"""
|
"""
|
||||||
Convert HLS EXT-X-KEY data to an initialized DRM object.
|
Convert HLS EXT-X-KEY data to an initialized DRM object.
|
||||||
@@ -810,8 +801,8 @@ class HLS:
|
|||||||
|
|
||||||
Raises a NotImplementedError if the key system is not supported.
|
Raises a NotImplementedError if the key system is not supported.
|
||||||
"""
|
"""
|
||||||
if not isinstance(session, (Session, httpx.Client, type(None))):
|
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
|
||||||
if not session:
|
if not session:
|
||||||
session = Session()
|
session = Session()
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from pathlib import Path
|
|||||||
from typing import Any, Callable, Optional, Union
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from langcodes import Language, tag_is_valid
|
from langcodes import Language, tag_is_valid
|
||||||
from lxml.etree import Element
|
from lxml.etree import Element
|
||||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||||
@@ -34,11 +35,13 @@ class ISM:
|
|||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
|
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
|
||||||
if not url:
|
if not url:
|
||||||
raise requests.URLRequired("ISM manifest URL must be provided")
|
raise requests.URLRequired("ISM manifest URL must be provided")
|
||||||
if not session:
|
if not session:
|
||||||
session = Session()
|
session = Session()
|
||||||
|
elif not isinstance(session, (Session, CurlSession)):
|
||||||
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||||
res = session.get(url, **kwargs)
|
res = session.get(url, **kwargs)
|
||||||
if res.url != url:
|
if res.url != url:
|
||||||
url = res.url
|
url = res.url
|
||||||
|
|||||||
@@ -2,9 +2,10 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional, Union
|
||||||
|
|
||||||
import m3u8
|
import m3u8
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from requests import Session
|
from requests import Session
|
||||||
|
|
||||||
from unshackle.core.manifests.hls import HLS
|
from unshackle.core.manifests.hls import HLS
|
||||||
@@ -15,7 +16,7 @@ def parse(
|
|||||||
master: m3u8.M3U8,
|
master: m3u8.M3U8,
|
||||||
language: str,
|
language: str,
|
||||||
*,
|
*,
|
||||||
session: Optional[Session] = None,
|
session: Optional[Union[Session, CurlSession]] = None,
|
||||||
) -> Tracks:
|
) -> Tracks:
|
||||||
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
|
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
|
||||||
tracks = HLS(master, session=session).to_tracks(language)
|
tracks = HLS(master, session=session).to_tracks(language)
|
||||||
|
|||||||
79
unshackle/core/session.py
Normal file
79
unshackle/core/session.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"""Session utilities for creating HTTP sessions with different backends."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
|
|
||||||
|
from unshackle.core.config import config
|
||||||
|
|
||||||
|
# Globally suppress curl_cffi HTTPS proxy warnings since some proxy providers
|
||||||
|
# (like NordVPN) require HTTPS URLs but curl_cffi expects HTTP format
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning, module="curl_cffi.*"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Session(CurlSession):
|
||||||
|
"""curl_cffi Session with warning suppression."""
|
||||||
|
|
||||||
|
def request(self, method, url, **kwargs):
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings(
|
||||||
|
"ignore", message="Make sure you are using https over https proxy.*", category=RuntimeWarning
|
||||||
|
)
|
||||||
|
return super().request(method, url, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def session(browser: str | None = None, **kwargs) -> Session:
|
||||||
|
"""
|
||||||
|
Create a curl_cffi session that impersonates a browser.
|
||||||
|
|
||||||
|
This is a full replacement for requests.Session with browser impersonation
|
||||||
|
and anti-bot capabilities. The session uses curl-impersonate under the hood
|
||||||
|
to mimic real browser behavior.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
browser: Browser to impersonate (e.g. "chrome124", "firefox", "safari").
|
||||||
|
Uses the configured default from curl_impersonate.browser if not specified.
|
||||||
|
See https://github.com/lexiforest/curl_cffi#sessions for available options.
|
||||||
|
**kwargs: Additional arguments passed to CurlSession constructor:
|
||||||
|
- headers: Additional headers (dict)
|
||||||
|
- cookies: Cookie jar or dict
|
||||||
|
- auth: HTTP basic auth tuple (username, password)
|
||||||
|
- proxies: Proxy configuration dict
|
||||||
|
- verify: SSL certificate verification (bool, default True)
|
||||||
|
- timeout: Request timeout in seconds (float or tuple)
|
||||||
|
- allow_redirects: Follow redirects (bool, default True)
|
||||||
|
- max_redirects: Maximum redirect count (int)
|
||||||
|
- cert: Client certificate (str or tuple)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
curl_cffi.requests.Session configured with browser impersonation, common headers,
|
||||||
|
and equivalent retry behavior to requests.Session.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
from unshackle.core.session import session
|
||||||
|
|
||||||
|
class MyService(Service):
|
||||||
|
@staticmethod
|
||||||
|
def get_session():
|
||||||
|
return session() # Uses config default browser
|
||||||
|
"""
|
||||||
|
if browser is None:
|
||||||
|
browser = config.curl_impersonate.get("browser", "chrome124")
|
||||||
|
|
||||||
|
session_config = {
|
||||||
|
"impersonate": browser,
|
||||||
|
"timeout": 30.0,
|
||||||
|
"allow_redirects": True,
|
||||||
|
"max_redirects": 15,
|
||||||
|
"verify": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
session_config.update(kwargs)
|
||||||
|
session_obj = Session(**session_config)
|
||||||
|
session_obj.headers.update(config.headers)
|
||||||
|
|
||||||
|
return session_obj
|
||||||
@@ -13,6 +13,7 @@ from typing import Any, Callable, Iterable, Optional, Union
|
|||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from zlib import crc32
|
from zlib import crc32
|
||||||
|
|
||||||
|
from curl_cffi.requests import Session as CurlSession
|
||||||
from langcodes import Language
|
from langcodes import Language
|
||||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||||
from pywidevine.cdm import Cdm as WidevineCdm
|
from pywidevine.cdm import Cdm as WidevineCdm
|
||||||
@@ -585,8 +586,8 @@ class Track:
|
|||||||
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
|
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
|
||||||
if not isinstance(byte_range, (str, type(None))):
|
if not isinstance(byte_range, (str, type(None))):
|
||||||
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
|
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
|
||||||
if not isinstance(session, (Session, type(None))):
|
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||||
raise TypeError(f"Expected session to be a {Session}, not {type(session)}")
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
|
||||||
|
|
||||||
if not url:
|
if not url:
|
||||||
if self.descriptor != self.Descriptor.URL:
|
if self.descriptor != self.Descriptor.URL:
|
||||||
|
|||||||
Reference in New Issue
Block a user