mirror of
https://github.com/unshackle-dl/unshackle.git
synced 2025-10-23 15:11:08 +00:00
Add new session utility with curl_cffi support for anti-bot protection Update all manifest parsers (DASH, HLS, ISM, M3U8) to accept curl_cffi sessions Add browser impersonation support (Chrome, Firefox, Safari) Fix cookie handling compatibility between requests and curl_cffi Suppress HTTPS proxy warnings for better UX Maintain full backward compatibility with requests.Session
339 lines
14 KiB
Python
339 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import base64
|
|
import hashlib
|
|
import html
|
|
import shutil
|
|
import urllib.parse
|
|
from functools import partial
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Optional, Union
|
|
|
|
import requests
|
|
from curl_cffi.requests import Session as CurlSession
|
|
from langcodes import Language, tag_is_valid
|
|
from lxml.etree import Element
|
|
from pyplayready.system.pssh import PSSH as PR_PSSH
|
|
from pywidevine.pssh import PSSH
|
|
from requests import Session
|
|
|
|
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
|
|
from unshackle.core.drm import DRM_T, PlayReady, Widevine
|
|
from unshackle.core.events import events
|
|
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
|
|
from unshackle.core.utilities import try_ensure_utf8
|
|
from unshackle.core.utils.xml import load_xml
|
|
|
|
|
|
class ISM:
|
|
def __init__(self, manifest: Element, url: str) -> None:
|
|
if manifest.tag != "SmoothStreamingMedia":
|
|
raise TypeError(f"Expected 'SmoothStreamingMedia' document, got '{manifest.tag}'")
|
|
if not url:
|
|
raise requests.URLRequired("ISM manifest URL must be provided for relative paths")
|
|
self.manifest = manifest
|
|
self.url = url
|
|
|
|
@classmethod
|
|
def from_url(cls, url: str, session: Optional[Union[Session, CurlSession]] = None, **kwargs: Any) -> "ISM":
|
|
if not url:
|
|
raise requests.URLRequired("ISM manifest URL must be provided")
|
|
if not session:
|
|
session = Session()
|
|
elif not isinstance(session, (Session, CurlSession)):
|
|
raise TypeError(f"Expected session to be a {Session} or {CurlSession}, not {session!r}")
|
|
res = session.get(url, **kwargs)
|
|
if res.url != url:
|
|
url = res.url
|
|
res.raise_for_status()
|
|
return cls(load_xml(res.content), url)
|
|
|
|
@classmethod
|
|
def from_text(cls, text: str, url: str) -> "ISM":
|
|
if not text:
|
|
raise ValueError("ISM manifest text must be provided")
|
|
if not url:
|
|
raise requests.URLRequired("ISM manifest URL must be provided for relative paths")
|
|
return cls(load_xml(text), url)
|
|
|
|
@staticmethod
|
|
def _get_drm(headers: list[Element]) -> list[DRM_T]:
|
|
drm: list[DRM_T] = []
|
|
for header in headers:
|
|
system_id = (header.get("SystemID") or header.get("SystemId") or "").lower()
|
|
data = "".join(header.itertext()).strip()
|
|
if not data:
|
|
continue
|
|
if system_id == "edef8ba9-79d6-4ace-a3c8-27dcd51d21ed":
|
|
try:
|
|
pssh = PSSH(base64.b64decode(data))
|
|
except Exception:
|
|
continue
|
|
kid = next(iter(pssh.key_ids), None)
|
|
drm.append(Widevine(pssh=pssh, kid=kid))
|
|
elif system_id == "9a04f079-9840-4286-ab92-e65be0885f95":
|
|
try:
|
|
pr_pssh = PR_PSSH(data)
|
|
except Exception:
|
|
continue
|
|
drm.append(PlayReady(pssh=pr_pssh, pssh_b64=data))
|
|
return drm
|
|
|
|
def to_tracks(self, language: Optional[Union[str, Language]] = None) -> Tracks:
|
|
tracks = Tracks()
|
|
base_url = self.url
|
|
duration = int(self.manifest.get("Duration") or 0)
|
|
drm = self._get_drm(self.manifest.xpath(".//ProtectionHeader"))
|
|
|
|
for stream_index in self.manifest.findall("StreamIndex"):
|
|
content_type = stream_index.get("Type")
|
|
if not content_type:
|
|
raise ValueError("No content type value could be found")
|
|
for ql in stream_index.findall("QualityLevel"):
|
|
codec = ql.get("FourCC")
|
|
if codec == "TTML":
|
|
codec = "STPP"
|
|
track_lang = None
|
|
lang = (stream_index.get("Language") or "").strip()
|
|
if lang and tag_is_valid(lang) and not lang.startswith("und"):
|
|
track_lang = Language.get(lang)
|
|
|
|
track_urls: list[str] = []
|
|
fragment_time = 0
|
|
fragments = stream_index.findall("c")
|
|
# Some manifests omit the first fragment in the <c> list but
|
|
# still expect a request for start time 0 which contains the
|
|
# initialization segment. If the first declared fragment is not
|
|
# at time 0, prepend the missing initialization URL.
|
|
if fragments:
|
|
first_time = int(fragments[0].get("t") or 0)
|
|
if first_time != 0:
|
|
track_urls.append(
|
|
urllib.parse.urljoin(
|
|
base_url,
|
|
stream_index.get("Url").format_map(
|
|
{
|
|
"bitrate": ql.get("Bitrate"),
|
|
"start time": "0",
|
|
}
|
|
),
|
|
)
|
|
)
|
|
|
|
for idx, frag in enumerate(fragments):
|
|
fragment_time = int(frag.get("t", fragment_time))
|
|
repeat = int(frag.get("r", 1))
|
|
duration_frag = int(frag.get("d") or 0)
|
|
if not duration_frag:
|
|
try:
|
|
next_time = int(fragments[idx + 1].get("t"))
|
|
except (IndexError, AttributeError):
|
|
next_time = duration
|
|
duration_frag = (next_time - fragment_time) / repeat
|
|
for _ in range(repeat):
|
|
track_urls.append(
|
|
urllib.parse.urljoin(
|
|
base_url,
|
|
stream_index.get("Url").format_map(
|
|
{
|
|
"bitrate": ql.get("Bitrate"),
|
|
"start time": str(fragment_time),
|
|
}
|
|
),
|
|
)
|
|
)
|
|
fragment_time += duration_frag
|
|
|
|
track_id = hashlib.md5(
|
|
f"{codec}-{track_lang}-{ql.get('Bitrate') or 0}-{ql.get('Index') or 0}".encode()
|
|
).hexdigest()
|
|
|
|
data = {
|
|
"ism": {
|
|
"manifest": self.manifest,
|
|
"stream_index": stream_index,
|
|
"quality_level": ql,
|
|
"segments": track_urls,
|
|
}
|
|
}
|
|
|
|
if content_type == "video":
|
|
try:
|
|
vcodec = Video.Codec.from_mime(codec) if codec else None
|
|
except ValueError:
|
|
vcodec = None
|
|
tracks.add(
|
|
Video(
|
|
id_=track_id,
|
|
url=self.url,
|
|
codec=vcodec,
|
|
language=track_lang or language,
|
|
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
|
|
bitrate=ql.get("Bitrate"),
|
|
width=int(ql.get("MaxWidth") or 0) or int(stream_index.get("MaxWidth") or 0),
|
|
height=int(ql.get("MaxHeight") or 0) or int(stream_index.get("MaxHeight") or 0),
|
|
descriptor=Video.Descriptor.ISM,
|
|
drm=drm,
|
|
data=data,
|
|
)
|
|
)
|
|
elif content_type == "audio":
|
|
try:
|
|
acodec = Audio.Codec.from_mime(codec) if codec else None
|
|
except ValueError:
|
|
acodec = None
|
|
tracks.add(
|
|
Audio(
|
|
id_=track_id,
|
|
url=self.url,
|
|
codec=acodec,
|
|
language=track_lang or language,
|
|
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
|
|
bitrate=ql.get("Bitrate"),
|
|
channels=ql.get("Channels"),
|
|
descriptor=Track.Descriptor.ISM,
|
|
drm=drm,
|
|
data=data,
|
|
)
|
|
)
|
|
else:
|
|
try:
|
|
scodec = Subtitle.Codec.from_mime(codec) if codec else None
|
|
except ValueError:
|
|
scodec = None
|
|
tracks.add(
|
|
Subtitle(
|
|
id_=track_id,
|
|
url=self.url,
|
|
codec=scodec,
|
|
language=track_lang or language,
|
|
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
|
|
descriptor=Track.Descriptor.ISM,
|
|
drm=drm,
|
|
data=data,
|
|
)
|
|
)
|
|
return tracks
|
|
|
|
@staticmethod
|
|
def download_track(
|
|
track: AnyTrack,
|
|
save_path: Path,
|
|
save_dir: Path,
|
|
progress: partial,
|
|
session: Optional[Session] = None,
|
|
proxy: Optional[str] = None,
|
|
max_workers: Optional[int] = None,
|
|
license_widevine: Optional[Callable] = None,
|
|
*,
|
|
cdm: Optional[object] = None,
|
|
) -> None:
|
|
if not session:
|
|
session = Session()
|
|
elif not isinstance(session, Session):
|
|
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
|
|
|
|
if proxy:
|
|
session.proxies.update({"all": proxy})
|
|
|
|
segments: list[str] = track.data["ism"]["segments"]
|
|
|
|
session_drm = None
|
|
if track.drm:
|
|
# Mirror HLS.download_track: pick the DRM matching the provided CDM
|
|
# (or the first available) and license it if supported.
|
|
session_drm = track.get_drm_for_cdm(cdm)
|
|
if isinstance(session_drm, (Widevine, PlayReady)):
|
|
try:
|
|
if not license_widevine:
|
|
raise ValueError("license_widevine func must be supplied to use DRM")
|
|
progress(downloaded="LICENSING")
|
|
license_widevine(session_drm)
|
|
progress(downloaded="[yellow]LICENSED")
|
|
except Exception:
|
|
DOWNLOAD_CANCELLED.set()
|
|
progress(downloaded="[red]FAILED")
|
|
raise
|
|
|
|
if DOWNLOAD_LICENCE_ONLY.is_set():
|
|
progress(downloaded="[yellow]SKIPPED")
|
|
return
|
|
|
|
progress(total=len(segments))
|
|
|
|
downloader = track.downloader
|
|
skip_merge = False
|
|
downloader_args = dict(
|
|
urls=[{"url": url} for url in segments],
|
|
output_dir=save_dir,
|
|
filename="{i:0%d}.mp4" % len(str(len(segments))),
|
|
headers=session.headers,
|
|
cookies=session.cookies,
|
|
proxy=proxy,
|
|
max_workers=max_workers,
|
|
)
|
|
|
|
if downloader.__name__ == "n_m3u8dl_re":
|
|
skip_merge = True
|
|
downloader_args.update(
|
|
{
|
|
"filename": track.id,
|
|
"track": track,
|
|
"content_keys": session_drm.content_keys if session_drm else None,
|
|
}
|
|
)
|
|
|
|
for status_update in downloader(**downloader_args):
|
|
file_downloaded = status_update.get("file_downloaded")
|
|
if file_downloaded:
|
|
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
|
|
else:
|
|
downloaded = status_update.get("downloaded")
|
|
if downloaded and downloaded.endswith("/s"):
|
|
status_update["downloaded"] = f"ISM {downloaded}"
|
|
progress(**status_update)
|
|
|
|
for control_file in save_dir.glob("*.aria2__temp"):
|
|
control_file.unlink()
|
|
|
|
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
|
|
|
|
if skip_merge:
|
|
shutil.move(segments_to_merge[0], save_path)
|
|
else:
|
|
with open(save_path, "wb") as f:
|
|
for segment_file in segments_to_merge:
|
|
segment_data = segment_file.read_bytes()
|
|
if (
|
|
not session_drm
|
|
and isinstance(track, Subtitle)
|
|
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
|
):
|
|
segment_data = try_ensure_utf8(segment_data)
|
|
segment_data = (
|
|
segment_data.decode("utf8")
|
|
.replace("‎", html.unescape("‎"))
|
|
.replace("‏", html.unescape("‏"))
|
|
.encode("utf8")
|
|
)
|
|
f.write(segment_data)
|
|
f.flush()
|
|
segment_file.unlink()
|
|
progress(advance=1)
|
|
|
|
track.path = save_path
|
|
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
|
|
|
|
if not skip_merge and session_drm:
|
|
progress(downloaded="Decrypting", completed=0, total=100)
|
|
session_drm.decrypt(save_path)
|
|
track.drm = None
|
|
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=session_drm, segment=None)
|
|
progress(downloaded="Decrypting", advance=100)
|
|
|
|
save_dir.rmdir()
|
|
progress(downloaded="Downloaded")
|
|
|
|
|
|
__all__ = ("ISM",)
|