mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2025-10-23 15:11:08 +00:00
feat: add curl_cffi session support with browser impersonation
Add new session utility with curl_cffi support for anti-bot protection Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions Add browser impersonation support (Chrome, Firefox, Safari) Fix cookie handling compatibility between requests and curl_cffi Suppress HTTPS proxy warnings for better UX Maintain full backward compatibility with requests.Session
This commit is contained in:
@@ -15,6 +15,7 @@ from uuid import UUID
|
||||
from zlib import crc32
|
||||
|
||||
import requests
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element, ElementTree
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
@@ -47,7 +48,7 @@ class DASH:
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> DASH:
|
||||
if not url:
|
||||
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
|
||||
if not isinstance(url, str):
|
||||
@@ -55,8 +56,8 @@ class DASH:
|
||||
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, Session):
|
||||
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
|
||||
res = session.get(url, **args)
|
||||
if res.url != url:
|
||||
@@ -103,6 +104,10 @@ class DASH:
|
||||
continue
|
||||
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
|
||||
continue
|
||||
if "urn:amazon:primevideo:cachingBreadth" in [
|
||||
x.get("schemeIdUri") for x in period.findall("SupplementalProperty")
|
||||
]:
|
||||
continue
|
||||
|
||||
for adaptation_set in period.findall("AdaptationSet"):
|
||||
if self.is_trick_mode(adaptation_set):
|
||||
|
||||
@@ -14,9 +14,10 @@ from typing import Any, Callable, Optional, Union
|
||||
from urllib.parse import urljoin
|
||||
from zlib import crc32
|
||||
|
||||
import httpx
|
||||
import m3u8
|
||||
import requests
|
||||
from curl_cffi.requests import Response as CurlResponse
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from m3u8 import M3U8
|
||||
from pyplayready.cdm import Cdm as PlayReadyCdm
|
||||
@@ -35,7 +36,7 @@ from unshackle.core.utilities import get_extension, is_close_match, try_ensure_u
|
||||
|
||||
|
||||
class HLS:
|
||||
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
|
||||
def __init__(self, manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None):
|
||||
if not manifest:
|
||||
raise ValueError("HLS manifest must be provided.")
|
||||
if not isinstance(manifest, M3U8):
|
||||
@@ -47,7 +48,7 @@ class HLS:
|
||||
self.session = session or Session()
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **args: Any) -> HLS:
|
||||
if not url:
|
||||
raise requests.URLRequired("HLS manifest URL must be provided.")
|
||||
if not isinstance(url, str):
|
||||
@@ -55,22 +56,22 @@ class HLS:
|
||||
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, httpx.Client)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
|
||||
res = session.get(url, **args)
|
||||
|
||||
# Handle both requests and httpx response objects
|
||||
# Handle requests and curl_cffi response objects
|
||||
if isinstance(res, requests.Response):
|
||||
if not res.ok:
|
||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||
content = res.text
|
||||
elif isinstance(res, httpx.Response):
|
||||
if res.status_code >= 400:
|
||||
elif isinstance(res, CurlResponse):
|
||||
if not res.ok:
|
||||
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
|
||||
content = res.text
|
||||
else:
|
||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
|
||||
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(res)}")
|
||||
|
||||
master = m3u8.loads(content, uri=url)
|
||||
|
||||
@@ -229,7 +230,7 @@ class HLS:
|
||||
save_path: Path,
|
||||
save_dir: Path,
|
||||
progress: partial,
|
||||
session: Optional[Union[Session, httpx.Client]] = None,
|
||||
session: Optional[Union[Session, CurlSession]] = None,
|
||||
proxy: Optional[str] = None,
|
||||
max_workers: Optional[int] = None,
|
||||
license_widevine: Optional[Callable] = None,
|
||||
@@ -238,15 +239,13 @@ class HLS:
|
||||
) -> None:
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, httpx.Client)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
|
||||
if proxy:
|
||||
# Handle proxies differently based on session type
|
||||
if isinstance(session, Session):
|
||||
session.proxies.update({"all": proxy})
|
||||
elif isinstance(session, httpx.Client):
|
||||
session.proxies = {"http://": proxy, "https://": proxy}
|
||||
|
||||
log = logging.getLogger("HLS")
|
||||
|
||||
@@ -257,13 +256,8 @@ class HLS:
|
||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||
sys.exit(1)
|
||||
playlist_text = response.text
|
||||
elif isinstance(response, httpx.Response):
|
||||
if response.status_code >= 400:
|
||||
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
|
||||
sys.exit(1)
|
||||
playlist_text = response.text
|
||||
else:
|
||||
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
|
||||
raise TypeError(f"Expected response to be a requests.Response or curl_cffi.Response, not {type(response)}")
|
||||
|
||||
master = m3u8.loads(playlist_text, uri=track.url)
|
||||
|
||||
@@ -533,13 +527,9 @@ class HLS:
|
||||
if isinstance(res, requests.Response):
|
||||
res.raise_for_status()
|
||||
init_content = res.content
|
||||
elif isinstance(res, httpx.Response):
|
||||
if res.status_code >= 400:
|
||||
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
|
||||
init_content = res.content
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
|
||||
f"Expected response to be requests.Response or curl_cffi.Response, not {type(res)}"
|
||||
)
|
||||
|
||||
map_data = (segment.init_section, init_content)
|
||||
@@ -707,7 +697,7 @@ class HLS:
|
||||
|
||||
@staticmethod
|
||||
def parse_session_data_keys(
|
||||
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
|
||||
manifest: M3U8, session: Optional[Union[Session, CurlSession]] = None
|
||||
) -> list[m3u8.model.Key]:
|
||||
"""Parse `com.apple.hls.keys` session data and return Key objects."""
|
||||
keys: list[m3u8.model.Key] = []
|
||||
@@ -798,7 +788,8 @@ class HLS:
|
||||
|
||||
@staticmethod
|
||||
def get_drm(
|
||||
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
|
||||
key: Union[m3u8.model.SessionKey, m3u8.model.Key],
|
||||
session: Optional[Union[Session, CurlSession]] = None,
|
||||
) -> DRM_T:
|
||||
"""
|
||||
Convert HLS EXT-X-KEY data to an initialized DRM object.
|
||||
@@ -810,8 +801,8 @@ class HLS:
|
||||
|
||||
Raises a NotImplementedError if the key system is not supported.
|
||||
"""
|
||||
if not isinstance(session, (Session, httpx.Client, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
|
||||
if not isinstance(session, (Session, CurlSession, type(None))):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {type(session)}")
|
||||
if not session:
|
||||
session = Session()
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import requests
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from langcodes import Language, tag_is_valid
|
||||
from lxml.etree import Element
|
||||
from pyplayready.system.pssh import PSSH as PR_PSSH
|
||||
@@ -34,11 +35,13 @@ class ISM:
|
||||
self.url = url
|
||||
|
||||
@classmethod
|
||||
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
|
||||
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
|
||||
if not url:
|
||||
raise requests.URLRequired("ISM manifest URL must be provided")
|
||||
if not session:
|
||||
session = Session()
|
||||
elif not isinstance(session, (Session, CurlSession)):
|
||||
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
||||
res = session.get(url, **kwargs)
|
||||
if res.url != url:
|
||||
url = res.url
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
from typing import Optional, Union
|
||||
|
||||
import m3u8
|
||||
from curl_cffi.requests import Session as CurlSession
|
||||
from requests import Session
|
||||
|
||||
from unshackle.core.manifests.hls import HLS
|
||||
@@ -15,7 +16,7 @@ def parse(
|
||||
master: m3u8.M3U8,
|
||||
language: str,
|
||||
*,
|
||||
session: Optional[Session] = None,
|
||||
session: Optional[Union[Session, CurlSession]] = None,
|
||||
) -> Tracks:
|
||||
"""Parse a variant playlist to ``Tracks`` with basic information, defer DRM loading."""
|
||||
tracks = HLS(master, session=session).to_tracks(language)
|
||||
|
||||
Reference in New Issue
Block a user