Initial Commit

This commit is contained in:
Andy
2025-07-18 00:46:05 +00:00
commit d37014f53f
94 changed files with 17458 additions and 0 deletions

4
unshackle/__main__.py Normal file
View File

@@ -0,0 +1,4 @@
if __name__ == "__main__":
from unshackle.core.__main__ import main
main()

View File

90
unshackle/commands/cfg.py Normal file
View File

@@ -0,0 +1,90 @@
import ast
import logging
import sys
import click
from ruamel.yaml import YAML
from unshackle.core.config import config, get_config_path
from unshackle.core.constants import context_settings
@click.command(
short_help="Manage configuration values for the program and its services.", context_settings=context_settings
)
@click.argument("key", type=str, required=False)
@click.argument("value", type=str, required=False)
@click.option("--unset", is_flag=True, default=False, help="Unset/remove the configuration value.")
@click.option("--list", "list_", is_flag=True, default=False, help="List all set configuration values.")
@click.pass_context
def cfg(ctx: click.Context, key: str, value: str, unset: bool, list_: bool) -> None:
"""
Manage configuration values for the program and its services.
\b
Known Issues:
- Config changes remove all comments of the changed files, which may hold critical data. (#14)
"""
if not key and not value and not list_:
raise click.UsageError("Nothing to do.", ctx)
if value:
try:
value = ast.literal_eval(value)
except (ValueError, SyntaxError):
pass # probably a str without quotes or similar, assume it's a string value
log = logging.getLogger("cfg")
yaml, data = YAML(), None
yaml.default_flow_style = False
config_path = get_config_path() or config.directories.user_configs / config.filenames.root_config
if config_path.exists():
data = yaml.load(config_path)
if not data:
log.warning("No config file was found or it has no data, yet")
# yaml.load() returns `None` if the input data is blank instead of a usable object
# force a usable object by making one and removing the only item within it
data = yaml.load("""__TEMP__: null""")
del data["__TEMP__"]
if list_:
yaml.dump(data, sys.stdout)
return
key_items = key.split(".")
parent_key = key_items[:-1]
trailing_key = key_items[-1]
is_write = value is not None
is_delete = unset
if is_write and is_delete:
raise click.ClickException("You cannot set a value and use --unset at the same time.")
if not is_write and not is_delete:
data = data.mlget(key_items, default=KeyError)
if data == KeyError:
raise click.ClickException(f"Key '{key}' does not exist in the config.")
yaml.dump(data, sys.stdout)
else:
try:
parent_data = data
if parent_key:
parent_data = data.mlget(parent_key, default=data)
if parent_data == data:
for key in parent_key:
if not hasattr(parent_data, key):
parent_data[key] = {}
parent_data = parent_data[key]
if is_write:
parent_data[trailing_key] = value
log.info(f"Set {key} to {repr(value)}")
elif is_delete:
del parent_data[trailing_key]
log.info(f"Unset {key}")
except KeyError:
raise click.ClickException(f"Key '{key}' does not exist in the config.")
config_path.parent.mkdir(parents=True, exist_ok=True)
yaml.dump(data, config_path)

1250
unshackle/commands/dl.py Normal file

File diff suppressed because it is too large Load Diff

139
unshackle/commands/env.py Normal file
View File

@@ -0,0 +1,139 @@
import logging
import os
import shutil
import sys
from pathlib import Path
from typing import Optional
import click
from rich.padding import Padding
from rich.table import Table
from rich.tree import Tree
from unshackle.core.config import POSSIBLE_CONFIG_PATHS, config, config_path
from unshackle.core.console import console
from unshackle.core.constants import context_settings
from unshackle.core.services import Services
from unshackle.core.utils.osenvironment import get_os_arch
@click.group(short_help="Manage and configure the project environment.", context_settings=context_settings)
def env() -> None:
"""Manage and configure the project environment."""
@env.command()
def check() -> None:
"""Checks environment for the required dependencies."""
table = Table(title="Dependencies", expand=True)
table.add_column("Name", no_wrap=True)
table.add_column("Installed", justify="center")
table.add_column("Path", no_wrap=False, overflow="fold")
# builds shaka-packager based on os, arch
packager_dep = get_os_arch("packager")
# Helper function to find binary with multiple possible names
def find_binary(*names):
for name in names:
if shutil.which(name):
return name
return names[0] # Return first name as fallback for display
dependencies = [
{"name": "CCExtractor", "binary": "ccextractor"},
{"name": "FFMpeg", "binary": "ffmpeg"},
{"name": "MKVToolNix", "binary": "mkvmerge"},
{"name": "Shaka-Packager", "binary": packager_dep},
{"name": "N_m3u8DL-RE", "binary": find_binary("N_m3u8DL-RE", "n-m3u8dl-re")},
{"name": "Aria2(c)", "binary": "aria2c"},
]
for dep in dependencies:
path = shutil.which(dep["binary"])
if path:
installed = "[green]:heavy_check_mark:[/green]"
path_output = path.lower()
else:
installed = "[red]:x:[/red]"
path_output = "Not Found"
# Add to the table
table.add_row(dep["name"], installed, path_output)
# Display the result
console.print(Padding(table, (1, 5)))
@env.command()
def info() -> None:
"""Displays information about the current environment."""
log = logging.getLogger("env")
if config_path:
log.info(f"Config loaded from {config_path}")
else:
tree = Tree("No config file found, you can use any of the following locations:")
for i, path in enumerate(POSSIBLE_CONFIG_PATHS, start=1):
tree.add(f"[repr.number]{i}.[/] [text2]{path.resolve()}[/]")
console.print(Padding(tree, (0, 5)))
table = Table(title="Directories", expand=True)
table.add_column("Name", no_wrap=True)
table.add_column("Path", no_wrap=False, overflow="fold")
path_vars = {
x: Path(os.getenv(x))
for x in ("TEMP", "APPDATA", "LOCALAPPDATA", "USERPROFILE")
if sys.platform == "win32" and os.getenv(x)
}
for name in sorted(dir(config.directories)):
if name.startswith("__") or name == "app_dirs":
continue
path = getattr(config.directories, name).resolve()
for var, var_path in path_vars.items():
if path.is_relative_to(var_path):
path = rf"%{var}%\{path.relative_to(var_path)}"
break
table.add_row(name.title(), str(path))
console.print(Padding(table, (1, 5)))
@env.group(name="clear", short_help="Clear an environment directory.", context_settings=context_settings)
def clear() -> None:
"""Clear an environment directory."""
@clear.command()
@click.argument("service", type=str, required=False)
def cache(service: Optional[str]) -> None:
"""Clear the environment cache directory."""
log = logging.getLogger("env")
cache_dir = config.directories.cache
if service:
cache_dir = cache_dir / Services.get_tag(service)
log.info(f"Clearing cache directory: {cache_dir}")
files_count = len(list(cache_dir.glob("**/*")))
if not files_count:
log.info("No files to delete")
else:
log.info(f"Deleting {files_count} files...")
shutil.rmtree(cache_dir)
log.info("Cleared")
@clear.command()
def temp() -> None:
"""Clear the environment temp directory."""
log = logging.getLogger("env")
log.info(f"Clearing temp directory: {config.directories.temp}")
files_count = len(list(config.directories.temp.glob("**/*")))
if not files_count:
log.info("No files to delete")
else:
log.info(f"Deleting {files_count} files...")
shutil.rmtree(config.directories.temp)
log.info("Cleared")

200
unshackle/commands/kv.py Normal file
View File

@@ -0,0 +1,200 @@
import logging
import re
from pathlib import Path
from typing import Optional
import click
from unshackle.core.config import config
from unshackle.core.constants import context_settings
from unshackle.core.services import Services
from unshackle.core.vault import Vault
from unshackle.core.vaults import Vaults
@click.group(short_help="Manage and configure Key Vaults.", context_settings=context_settings)
def kv() -> None:
"""Manage and configure Key Vaults."""
@kv.command()
@click.argument("to_vault", type=str)
@click.argument("from_vaults", nargs=-1, type=click.UNPROCESSED)
@click.option("-s", "--service", type=str, default=None, help="Only copy data to and from a specific service.")
def copy(to_vault: str, from_vaults: list[str], service: Optional[str] = None) -> None:
"""
Copy data from multiple Key Vaults into a single Key Vault.
Rows with matching KIDs are skipped unless there's no KEY set.
Existing data is not deleted or altered.
The `to_vault` argument is the key vault you wish to copy data to.
It should be the name of a Key Vault defined in the config.
The `from_vaults` argument is the key vault(s) you wish to take
data from. You may supply multiple key vaults.
"""
if not from_vaults:
raise click.ClickException("No Vaults were specified to copy data from.")
log = logging.getLogger("kv")
vaults = Vaults()
for vault_name in [to_vault] + list(from_vaults):
vault = next((x for x in config.key_vaults if x["name"] == vault_name), None)
if not vault:
raise click.ClickException(f"Vault ({vault_name}) is not defined in the config.")
vault_type = vault["type"]
vault_args = vault.copy()
del vault_args["type"]
vaults.load(vault_type, **vault_args)
to_vault: Vault = vaults.vaults[0]
from_vaults: list[Vault] = vaults.vaults[1:]
log.info(f"Copying data from {', '.join([x.name for x in from_vaults])}, into {to_vault.name}")
if service:
service = Services.get_tag(service)
log.info(f"Only copying data for service {service}")
total_added = 0
for from_vault in from_vaults:
if service:
services = [service]
else:
services = from_vault.get_services()
for service_ in services:
log.info(f"Getting data from {from_vault} for {service_}")
content_keys = list(from_vault.get_keys(service_)) # important as it's a generator we iterate twice
bad_keys = {kid: key for kid, key in content_keys if not key or key.count("0") == len(key)}
for kid, key in bad_keys.items():
log.warning(f"Cannot add a NULL Content Key to a Vault, skipping: {kid}:{key}")
content_keys = {kid: key for kid, key in content_keys if kid not in bad_keys}
total_count = len(content_keys)
log.info(f"Adding {total_count} Content Keys to {to_vault} for {service_}")
try:
added = to_vault.add_keys(service_, content_keys)
except PermissionError:
log.warning(f" - No permission to create table ({service_}) in {to_vault}, skipping...")
continue
total_added += added
existed = total_count - added
log.info(f"{to_vault} ({service_}): {added} newly added, {existed} already existed (skipped)")
log.info(f"{to_vault}: {total_added} total newly added")
@kv.command()
@click.argument("vaults", nargs=-1, type=click.UNPROCESSED)
@click.option("-s", "--service", type=str, default=None, help="Only sync data to and from a specific service.")
@click.pass_context
def sync(ctx: click.Context, vaults: list[str], service: Optional[str] = None) -> None:
"""
Ensure multiple Key Vaults copies of all keys as each other.
It's essentially just a bi-way copy between each vault.
To see the precise details of what it's doing between each
provided vault, see the documentation for the `copy` command.
"""
if not len(vaults) > 1:
raise click.ClickException("You must provide more than one Vault to sync.")
ctx.invoke(copy, to_vault=vaults[0], from_vaults=vaults[1:], service=service)
for i in range(1, len(vaults)):
ctx.invoke(copy, to_vault=vaults[i], from_vaults=[vaults[i - 1]], service=service)
@kv.command()
@click.argument("file", type=Path)
@click.argument("service", type=str)
@click.argument("vaults", nargs=-1, type=click.UNPROCESSED)
def add(file: Path, service: str, vaults: list[str]) -> None:
"""
Add new Content Keys to Key Vault(s) by service.
File should contain one key per line in the format KID:KEY (HEX:HEX).
Each line should have nothing else within it except for the KID:KEY.
Encoding is presumed to be UTF8.
"""
if not file.exists():
raise click.ClickException(f"File provided ({file}) does not exist.")
if not file.is_file():
raise click.ClickException(f"File provided ({file}) is not a file.")
if not service or not isinstance(service, str):
raise click.ClickException(f"Service provided ({service}) is invalid.")
if len(vaults) < 1:
raise click.ClickException("You must provide at least one Vault.")
log = logging.getLogger("kv")
service = Services.get_tag(service)
vaults_ = Vaults()
for vault_name in vaults:
vault = next((x for x in config.key_vaults if x["name"] == vault_name), None)
if not vault:
raise click.ClickException(f"Vault ({vault_name}) is not defined in the config.")
vault_type = vault["type"]
vault_args = vault.copy()
del vault_args["type"]
vaults_.load(vault_type, **vault_args)
data = file.read_text(encoding="utf8")
kid_keys: dict[str, str] = {}
for line in data.splitlines(keepends=False):
line = line.strip()
match = re.search(r"^(?P<kid>[0-9a-fA-F]{32}):(?P<key>[0-9a-fA-F]{32})$", line)
if not match:
continue
kid = match.group("kid").lower()
key = match.group("key").lower()
kid_keys[kid] = key
total_count = len(kid_keys)
for vault in vaults_:
log.info(f"Adding {total_count} Content Keys to {vault}")
added_count = vault.add_keys(service, kid_keys)
existed_count = total_count - added_count
log.info(f"{vault}: {added_count} newly added, {existed_count} already existed (skipped)")
log.info("Done!")
@kv.command()
@click.argument("vaults", nargs=-1, type=click.UNPROCESSED)
def prepare(vaults: list[str]) -> None:
"""Create Service Tables on Vaults if not yet created."""
log = logging.getLogger("kv")
vaults_ = Vaults()
for vault_name in vaults:
vault = next((x for x in config.key_vaults if x["name"] == vault_name), None)
if not vault:
raise click.ClickException(f"Vault ({vault_name}) is not defined in the config.")
vault_type = vault["type"]
vault_args = vault.copy()
del vault_args["type"]
vaults_.load(vault_type, **vault_args)
for vault in vaults_:
if hasattr(vault, "has_table") and hasattr(vault, "create_table"):
for service_tag in Services.get_tags():
if vault.has_table(service_tag):
log.info(f"{vault} already has a {service_tag} Table")
else:
try:
vault.create_table(service_tag, commit=True)
log.info(f"{vault}: Created {service_tag} Table")
except PermissionError:
log.error(f"{vault} user has no create table permission, skipping...")
continue
else:
log.info(f"{vault} does not use tables, skipping...")
log.info("Done!")

271
unshackle/commands/prd.py Normal file
View File

@@ -0,0 +1,271 @@
import logging
from pathlib import Path
from typing import Optional
import click
import requests
from Crypto.Random import get_random_bytes
from pyplayready.cdm import Cdm
from pyplayready.crypto.ecc_key import ECCKey
from pyplayready.device import Device
from pyplayready.exceptions import InvalidCertificateChain, OutdatedDevice
from pyplayready.system.bcert import Certificate, CertificateChain
from pyplayready.system.pssh import PSSH
from unshackle.core.config import config
from unshackle.core.constants import context_settings
@click.group(
short_help="Manage creation of PRD (Playready Device) files.",
context_settings=context_settings,
)
def prd() -> None:
"""Manage creation of PRD (Playready Device) files."""
@prd.command()
@click.argument("paths", type=Path, nargs=-1)
@click.option(
"-e",
"--encryption_key",
type=Path,
required=False,
help="Optional Device ECC private encryption key",
)
@click.option(
"-s",
"--signing_key",
type=Path,
required=False,
help="Optional Device ECC private signing key",
)
@click.option("-o", "--output", type=Path, default=None, help="Output Directory")
@click.pass_context
def new(
ctx: click.Context,
paths: tuple[Path, ...],
encryption_key: Optional[Path],
signing_key: Optional[Path],
output: Optional[Path],
) -> None:
"""Create a new .PRD PlayReady Device file.
Accepts either paths to a group key and certificate or a single directory
containing ``zgpriv.dat`` and ``bgroupcert.dat``.
"""
if len(paths) == 1 and paths[0].is_dir():
device_dir = paths[0]
group_key = device_dir / "zgpriv.dat"
group_certificate = device_dir / "bgroupcert.dat"
if not group_key.is_file() or not group_certificate.is_file():
raise click.UsageError("Folder must contain zgpriv.dat and bgroupcert.dat", ctx)
elif len(paths) == 2:
group_key, group_certificate = paths
if not group_key.is_file():
raise click.UsageError("group_key: Not a path to a file, or it doesn't exist.", ctx)
if not group_certificate.is_file():
raise click.UsageError("group_certificate: Not a path to a file, or it doesn't exist.", ctx)
device_dir = None
else:
raise click.UsageError(
"Provide either a folder path or paths to group_key and group_certificate",
ctx,
)
if encryption_key and not encryption_key.is_file():
raise click.UsageError("encryption_key: Not a path to a file, or it doesn't exist.", ctx)
if signing_key and not signing_key.is_file():
raise click.UsageError("signing_key: Not a path to a file, or it doesn't exist.", ctx)
log = logging.getLogger("prd")
encryption_key_obj = ECCKey.load(encryption_key) if encryption_key else ECCKey.generate()
signing_key_obj = ECCKey.load(signing_key) if signing_key else ECCKey.generate()
group_key_obj = ECCKey.load(group_key)
certificate_chain = CertificateChain.load(group_certificate)
if certificate_chain.get(0).get_issuer_key() != group_key_obj.public_bytes():
raise InvalidCertificateChain("Group key does not match this certificate")
new_certificate = Certificate.new_leaf_cert(
cert_id=get_random_bytes(16),
security_level=certificate_chain.get_security_level(),
client_id=get_random_bytes(16),
signing_key=signing_key_obj,
encryption_key=encryption_key_obj,
group_key=group_key_obj,
parent=certificate_chain,
)
certificate_chain.prepend(new_certificate)
certificate_chain.verify()
device = Device(
group_key=group_key_obj.dumps(),
encryption_key=encryption_key_obj.dumps(),
signing_key=signing_key_obj.dumps(),
group_certificate=certificate_chain.dumps(),
)
if output and output.suffix:
if output.suffix.lower() != ".prd":
log.warning(
"Saving PRD with the file extension '%s' but '.prd' is recommended.",
output.suffix,
)
out_path = output
else:
out_dir = output or (device_dir or config.directories.prds)
out_path = out_dir / f"{device.get_name()}.prd"
if out_path.exists():
log.error("A file already exists at the path '%s', cannot overwrite.", out_path)
return
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_bytes(device.dumps())
log.info("Created Playready Device (.prd) file, %s", out_path.name)
log.info(" + Security Level: %s", device.security_level)
log.info(" + Group Key: %s bytes", len(device.group_key.dumps()))
log.info(" + Encryption Key: %s bytes", len(device.encryption_key.dumps()))
log.info(" + Signing Key: %s bytes", len(device.signing_key.dumps()))
log.info(" + Group Certificate: %s bytes", len(device.group_certificate.dumps()))
log.info(" + Saved to: %s", out_path.absolute())
@prd.command(name="reprovision")
@click.argument("prd_path", type=Path)
@click.option(
"-e",
"--encryption_key",
type=Path,
required=False,
help="Optional Device ECC private encryption key",
)
@click.option(
"-s",
"--signing_key",
type=Path,
required=False,
help="Optional Device ECC private signing key",
)
@click.option("-o", "--output", type=Path, default=None, help="Output Path or Directory")
@click.pass_context
def reprovision_device(
ctx: click.Context,
prd_path: Path,
encryption_key: Optional[Path],
signing_key: Optional[Path],
output: Optional[Path] = None,
) -> None:
"""Reprovision a Playready Device (.prd) file."""
if not prd_path.is_file():
raise click.UsageError("prd_path: Not a path to a file, or it doesn't exist.", ctx)
log = logging.getLogger("prd")
log.info("Reprovisioning Playready Device (.prd) file, %s", prd_path.name)
device = Device.load(prd_path)
if device.group_key is None:
raise OutdatedDevice(
"Device does not support reprovisioning, re-create it or use a Device with a version of 3 or higher"
)
device.group_certificate.remove(0)
encryption_key_obj = ECCKey.load(encryption_key) if encryption_key else ECCKey.generate()
signing_key_obj = ECCKey.load(signing_key) if signing_key else ECCKey.generate()
device.encryption_key = encryption_key_obj
device.signing_key = signing_key_obj
new_certificate = Certificate.new_leaf_cert(
cert_id=get_random_bytes(16),
security_level=device.group_certificate.get_security_level(),
client_id=get_random_bytes(16),
signing_key=signing_key_obj,
encryption_key=encryption_key_obj,
group_key=device.group_key,
parent=device.group_certificate,
)
device.group_certificate.prepend(new_certificate)
if output and output.suffix:
if output.suffix.lower() != ".prd":
log.warning(
"Saving PRD with the file extension '%s' but '.prd' is recommended.",
output.suffix,
)
out_path = output
else:
out_path = prd_path
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_bytes(device.dumps())
log.info("Reprovisioned Playready Device (.prd) file, %s", out_path.name)
@prd.command()
@click.argument("device", type=Path)
@click.option(
"-c",
"--ckt",
type=click.Choice(["aesctr", "aescbc"], case_sensitive=False),
default="aesctr",
help="Content Key Encryption Type",
)
@click.option(
"-sl",
"--security-level",
type=click.Choice(["150", "2000", "3000"], case_sensitive=False),
default="2000",
help="Minimum Security Level",
)
@click.pass_context
def test(
ctx: click.Context,
device: Path,
ckt: str,
security_level: str,
) -> None:
"""Test a Playready Device on the Microsoft demo server."""
if not device.is_file():
raise click.UsageError("device: Not a path to a file, or it doesn't exist.", ctx)
log = logging.getLogger("prd")
prd_device = Device.load(device)
log.info("Loaded Device: %s", prd_device.get_name())
cdm = Cdm.from_device(prd_device)
log.info("Loaded CDM")
session_id = cdm.open()
log.info("Opened Session")
pssh_b64 = "AAADfHBzc2gAAAAAmgTweZhAQoarkuZb4IhflQAAA1xcAwAAAQABAFIDPABXAFIATQBIAEUAQQBEAEUAUgAgAHgAbQBsAG4AcwA9ACIAaAB0AHQAcAA6AC8ALwBzAGMAaABlAG0AYQBzAC4AbQBpAGMAcgBvAHMAbwBmAHQALgBjAG8AbQAvAEQAUgBNAC8AMgAwADAANwAvADAAMwAvAFAAbABhAHkAUgBlAGEAZAB5AEgAZQBhAGQAZQByACIAIAB2AGUAcgBzAGkAbwBuAD0AIgA0AC4AMAAuADAALgAwACIAPgA8AEQAQQBUAEEAPgA8AFAAUgBPAFQARQBDAFQASQBOAEYATwA+ADwASwBFAFkATABFAE4APgAxADYAPAAvAEsARQBZAEwARQBOAD4APABBAEwARwBJAEQAPgBBAEUAUwBDAFQAUgA8AC8AQQBMAEcASQBEAD4APAAvAFAAUgBPAFQARQBDAFQASQBOAEYATwA+ADwASwBJAEQAPgA0AFIAcABsAGIAKwBUAGIATgBFAFMAOAB0AEcAawBOAEYAVwBUAEUASABBAD0APQA8AC8ASwBJAEQAPgA8AEMASABFAEMASwBTAFUATQA+AEsATABqADMAUQB6AFEAUAAvAE4AQQA9ADwALwBDAEgARQBDAEsAUwBVAE0APgA8AEwAQQBfAFUAUgBMAD4AaAB0AHQAcABzADoALwAvAHAAcgBvAGYAZgBpAGMAaQBhAGwAcwBpAHQAZQAuAGsAZQB5AGQAZQBsAGkAdgBlAHIAeQAuAG0AZQBkAGkAYQBzAGUAcgB2AGkAYwBlAHMALgB3AGkAbgBkAG8AdwBzAC4AbgBlAHQALwBQAGwAYQB5AFIAZQBhAGQAeQAvADwALwBMAEEAXwBVAFIATAA+ADwAQwBVAFMAVABPAE0AQQBUAFQAUgBJAEIAVQBUAEUAUwA+ADwASQBJAFMAXwBEAFIATQBfAFYARQBSAFMASQBPAE4APgA4AC4AMQAuADIAMwAwADQALgAzADEAPAAvAEkASQBTAF8ARABSAE0AXwBWAEUAUgBTAEkATwBOAD4APAAvAEMAVQBTAFQATwBNAEEAVABUAFIASQBCAFUAVABFAFMAPgA8AC8ARABBAFQAQQA+ADwALwBXAFIATQBIAEUAQQBEAEUAUgA+AA=="
pssh = PSSH(pssh_b64)
challenge = cdm.get_license_challenge(session_id, pssh.wrm_headers[0])
log.info("Created License Request")
license_server = f"https://test.playready.microsoft.com/service/rightsmanager.asmx?cfg=(persist:false,sl:{security_level},ckt:{ckt})"
response = requests.post(
url=license_server,
headers={"Content-Type": "text/xml; charset=UTF-8"},
data=challenge,
)
cdm.parse_license(session_id, response.text)
log.info("License Parsed Successfully")
for key in cdm.get_keys(session_id):
log.info(f"{key.key_id.hex}:{key.key.hex()}")
cdm.close(session_id)
log.info("Closed Session")

View File

@@ -0,0 +1,149 @@
from __future__ import annotations
import logging
import re
import sys
from typing import Any, Optional
import click
import yaml
from rich.padding import Padding
from rich.rule import Rule
from rich.tree import Tree
from unshackle.commands.dl import dl
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import context_settings
from unshackle.core.proxies import Basic, Hola, NordVPN
from unshackle.core.service import Service
from unshackle.core.services import Services
from unshackle.core.utils.click_types import ContextData
from unshackle.core.utils.collections import merge_dict
@click.command(
short_help="Search for titles from a Service.",
cls=Services,
context_settings=dict(**context_settings, token_normalize_func=Services.get_tag),
)
@click.option(
"-p", "--profile", type=str, default=None, help="Profile to use for Credentials and Cookies (if available)."
)
@click.option(
"--proxy",
type=str,
default=None,
help="Proxy URI to use. If a 2-letter country is provided, it will try get a proxy from the config.",
)
@click.option("--no-proxy", is_flag=True, default=False, help="Force disable all proxy use.")
@click.pass_context
def search(ctx: click.Context, no_proxy: bool, profile: Optional[str] = None, proxy: Optional[str] = None):
if not ctx.invoked_subcommand:
raise ValueError("A subcommand to invoke was not specified, the main code cannot continue.")
log = logging.getLogger("search")
service = Services.get_tag(ctx.invoked_subcommand)
profile = profile
if profile:
log.info(f"Using profile: '{profile}'")
with console.status("Loading Service Config...", spinner="dots"):
service_config_path = Services.get_path(service) / config.filenames.config
if service_config_path.exists():
service_config = yaml.safe_load(service_config_path.read_text(encoding="utf8"))
log.info("Service Config loaded")
else:
service_config = {}
merge_dict(config.services.get(service), service_config)
proxy_providers = []
if no_proxy:
ctx.params["proxy"] = None
else:
with console.status("Loading Proxy Providers...", spinner="dots"):
if config.proxy_providers.get("basic"):
proxy_providers.append(Basic(**config.proxy_providers["basic"]))
if config.proxy_providers.get("nordvpn"):
proxy_providers.append(NordVPN(**config.proxy_providers["nordvpn"]))
if binaries.HolaProxy:
proxy_providers.append(Hola())
for proxy_provider in proxy_providers:
log.info(f"Loaded {proxy_provider.__class__.__name__}: {proxy_provider}")
if proxy:
requested_provider = None
if re.match(r"^[a-z]+:.+$", proxy, re.IGNORECASE):
# requesting proxy from a specific proxy provider
requested_provider, proxy = proxy.split(":", maxsplit=1)
if re.match(r"^[a-z]{2}(?:\d+)?$", proxy, re.IGNORECASE):
proxy = proxy.lower()
with console.status(f"Getting a Proxy to {proxy}...", spinner="dots"):
if requested_provider:
proxy_provider = next(
(x for x in proxy_providers if x.__class__.__name__.lower() == requested_provider), None
)
if not proxy_provider:
log.error(f"The proxy provider '{requested_provider}' was not recognised.")
sys.exit(1)
proxy_uri = proxy_provider.get_proxy(proxy)
if not proxy_uri:
log.error(f"The proxy provider {requested_provider} had no proxy for {proxy}")
sys.exit(1)
proxy = ctx.params["proxy"] = proxy_uri
log.info(f"Using {proxy_provider.__class__.__name__} Proxy: {proxy}")
else:
for proxy_provider in proxy_providers:
proxy_uri = proxy_provider.get_proxy(proxy)
if proxy_uri:
proxy = ctx.params["proxy"] = proxy_uri
log.info(f"Using {proxy_provider.__class__.__name__} Proxy: {proxy}")
break
else:
log.info(f"Using explicit Proxy: {proxy}")
ctx.obj = ContextData(config=service_config, cdm=None, proxy_providers=proxy_providers, profile=profile)
@search.result_callback()
def result(service: Service, profile: Optional[str] = None, **_: Any) -> None:
log = logging.getLogger("search")
service_tag = service.__class__.__name__
with console.status("Authenticating with Service...", spinner="dots"):
cookies = dl.get_cookie_jar(service_tag, profile)
credential = dl.get_credentials(service_tag, profile)
service.authenticate(cookies, credential)
if cookies or credential:
log.info("Authenticated with Service")
search_results = Tree("Search Results", hide_root=True)
with console.status("Searching...", spinner="dots"):
for result in service.search():
result_text = f"[bold text]{result.title}[/]"
if result.url:
result_text = f"[link={result.url}]{result_text}[/link]"
if result.label:
result_text += f" [pink]{result.label}[/]"
if result.description:
result_text += f"\n[text2]{result.description}[/]"
result_text += f"\n[bright_black]id: {result.id}[/]"
search_results.add(result_text + "\n")
# update cookies
cookie_file = dl.get_cookie_path(service_tag, profile)
if cookie_file:
dl.save_cookies(cookie_file, service.session.cookies)
console.print(Padding(Rule(f"[rule.text]{len(search_results.children)} Search Results"), (1, 2)))
if search_results.children:
console.print(Padding(search_results, (0, 5)))
else:
console.print(
Padding("[bold text]No matches[/]\n[bright_black]Please check spelling and search again....[/]", (0, 5))
)

View File

@@ -0,0 +1,45 @@
import subprocess
import click
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.constants import context_settings
@click.command(short_help="Serve your Local Widevine Devices for Remote Access.", context_settings=context_settings)
@click.option("-h", "--host", type=str, default="0.0.0.0", help="Host to serve from.")
@click.option("-p", "--port", type=int, default=8786, help="Port to serve from.")
@click.option("--caddy", is_flag=True, default=False, help="Also serve with Caddy.")
def serve(host: str, port: int, caddy: bool) -> None:
"""
Serve your Local Widevine Devices for Remote Access.
\b
Host as 127.0.0.1 may block remote access even if port-forwarded.
Instead, use 0.0.0.0 and ensure the TCP port you choose is forwarded.
\b
You may serve with Caddy at the same time with --caddy. You can use Caddy
as a reverse-proxy to serve with HTTPS. The config used will be the Caddyfile
next to the unshackle config.
"""
from pywidevine import serve
if caddy:
if not binaries.Caddy:
raise click.ClickException('Caddy executable "caddy" not found but is required for --caddy.')
caddy_p = subprocess.Popen(
[binaries.Caddy, "run", "--config", str(config.directories.user_configs / "Caddyfile")]
)
else:
caddy_p = None
try:
if not config.serve.get("devices"):
config.serve["devices"] = []
config.serve["devices"].extend(list(config.directories.wvds.glob("*.wvd")))
serve.run(config.serve, host, port)
finally:
if caddy_p:
caddy_p.kill()

267
unshackle/commands/util.py Normal file
View File

@@ -0,0 +1,267 @@
import subprocess
from pathlib import Path
import click
from pymediainfo import MediaInfo
from unshackle.core import binaries
from unshackle.core.constants import context_settings
@click.group(short_help="Various helper scripts and programs.", context_settings=context_settings)
def util() -> None:
"""Various helper scripts and programs."""
@util.command()
@click.argument("path", type=Path)
@click.argument("aspect", type=str)
@click.option(
"--letter/--pillar",
default=True,
help="Specify which direction to crop. Top and Bottom would be --letter, Sides would be --pillar.",
)
@click.option("-o", "--offset", type=int, default=0, help="Fine tune the computed crop area if not perfectly centered.")
@click.option(
"-p",
"--preview",
is_flag=True,
default=False,
help="Instantly preview the newly-set aspect crop in MPV (or ffplay if mpv is unavailable).",
)
def crop(path: Path, aspect: str, letter: bool, offset: int, preview: bool) -> None:
"""
Losslessly crop H.264 and H.265 video files at the bit-stream level.
You may provide a path to a file, or a folder of mkv and/or mp4 files.
Note: If you notice that the values you put in are not quite working, try
tune -o/--offset. This may be necessary on videos with sub-sampled chroma.
Do note that you may not get an ideal lossless cropping result on some
cases, again due to sub-sampled chroma.
It's recommended that you try -o about 10 or so pixels and lower it until
you get as close in as possible. Do make sure it's not over-cropping either
as it may go from being 2px away from a perfect crop, to 20px over-cropping
again due to sub-sampled chroma.
"""
if not binaries.FFMPEG:
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
if path.is_dir():
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
else:
paths = [path]
for video_path in paths:
try:
video_track = next(iter(MediaInfo.parse(video_path).video_tracks or []))
except StopIteration:
raise click.ClickException("There's no video tracks in the provided file.")
crop_filter = {"HEVC": "hevc_metadata", "AVC": "h264_metadata"}.get(video_track.commercial_name)
if not crop_filter:
raise click.ClickException(f"{video_track.commercial_name} Codec not supported.")
aspect_w, aspect_h = list(map(float, aspect.split(":")))
if letter:
crop_value = (video_track.height - (video_track.width / (aspect_w * aspect_h))) / 2
left, top, right, bottom = map(int, [0, crop_value + offset, 0, crop_value - offset])
else:
crop_value = (video_track.width - (video_track.height * (aspect_w / aspect_h))) / 2
left, top, right, bottom = map(int, [crop_value + offset, 0, crop_value - offset, 0])
crop_filter += f"=crop_left={left}:crop_top={top}:crop_right={right}:crop_bottom={bottom}"
if min(left, top, right, bottom) < 0:
raise click.ClickException("Cannot crop less than 0, are you cropping in the right direction?")
if preview:
out_path = ["-f", "mpegts", "-"] # pipe
else:
out_path = [
str(
video_path.with_name(
".".join(
filter(
bool,
[
video_path.stem,
video_track.language,
"crop",
str(offset or ""),
{
# ffmpeg's MKV muxer does not yet support HDR
"HEVC": "h265",
"AVC": "h264",
}.get(video_track.commercial_name, ".mp4"),
],
)
)
)
)
]
ffmpeg_call = subprocess.Popen(
[binaries.FFMPEG, "-y", "-i", str(video_path), "-map", "0:v:0", "-c", "copy", "-bsf:v", crop_filter]
+ out_path,
stdout=subprocess.PIPE,
)
try:
if preview:
previewer = binaries.MPV or binaries.FFPlay
if not previewer:
raise click.ClickException("MPV/FFplay executables weren't found but are required for previewing.")
subprocess.Popen((previewer, "-"), stdin=ffmpeg_call.stdout)
finally:
if ffmpeg_call.stdout:
ffmpeg_call.stdout.close()
ffmpeg_call.wait()
@util.command(name="range")
@click.argument("path", type=Path)
@click.option("--full/--limited", is_flag=True, help="Full: 0..255, Limited: 16..235 (16..240 YUV luma)")
@click.option(
"-p",
"--preview",
is_flag=True,
default=False,
help="Instantly preview the newly-set video range in MPV (or ffplay if mpv is unavailable).",
)
def range_(path: Path, full: bool, preview: bool) -> None:
"""
Losslessly set the Video Range flag to full or limited at the bit-stream level.
You may provide a path to a file, or a folder of mkv and/or mp4 files.
If you ever notice blacks not being quite black, and whites not being quite white,
then you're video may have the range set to the wrong value. Flip its range to the
opposite value and see if that fixes it.
"""
if not binaries.FFMPEG:
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
if path.is_dir():
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
else:
paths = [path]
for video_path in paths:
try:
video_track = next(iter(MediaInfo.parse(video_path).video_tracks or []))
except StopIteration:
raise click.ClickException("There's no video tracks in the provided file.")
metadata_key = {"HEVC": "hevc_metadata", "AVC": "h264_metadata"}.get(video_track.commercial_name)
if not metadata_key:
raise click.ClickException(f"{video_track.commercial_name} Codec not supported.")
if preview:
out_path = ["-f", "mpegts", "-"] # pipe
else:
out_path = [
str(
video_path.with_name(
".".join(
filter(
bool,
[
video_path.stem,
video_track.language,
"range",
["limited", "full"][full],
{
# ffmpeg's MKV muxer does not yet support HDR
"HEVC": "h265",
"AVC": "h264",
}.get(video_track.commercial_name, ".mp4"),
],
)
)
)
)
]
ffmpeg_call = subprocess.Popen(
[
binaries.FFMPEG,
"-y",
"-i",
str(video_path),
"-map",
"0:v:0",
"-c",
"copy",
"-bsf:v",
f"{metadata_key}=video_full_range_flag={int(full)}",
]
+ out_path,
stdout=subprocess.PIPE,
)
try:
if preview:
previewer = binaries.MPV or binaries.FFPlay
if not previewer:
raise click.ClickException("MPV/FFplay executables weren't found but are required for previewing.")
subprocess.Popen((previewer, "-"), stdin=ffmpeg_call.stdout)
finally:
if ffmpeg_call.stdout:
ffmpeg_call.stdout.close()
ffmpeg_call.wait()
@util.command()
@click.argument("path", type=Path)
@click.option(
"-m", "--map", "map_", type=str, default="0", help="Test specific streams by setting FFmpeg's -map parameter."
)
def test(path: Path, map_: str) -> None:
"""
Decode an entire video and check for any corruptions or errors using FFmpeg.
You may provide a path to a file, or a folder of mkv and/or mp4 files.
Tests all streams within the file by default. Subtitles cannot be tested.
You may choose specific streams using the -m/--map parameter. E.g.,
'0:v:0' to test the first video stream, or '0:a' to test all audio streams.
"""
if not binaries.FFMPEG:
raise click.ClickException('FFmpeg executable "ffmpeg" not found but is required.')
if path.is_dir():
paths = list(path.glob("*.mkv")) + list(path.glob("*.mp4"))
else:
paths = [path]
for video_path in paths:
print("Starting...")
p = subprocess.Popen(
[
binaries.FFMPEG,
"-hide_banner",
"-benchmark",
"-i",
str(video_path),
"-map",
map_,
"-sn",
"-f",
"null",
"-",
],
stderr=subprocess.PIPE,
universal_newlines=True,
)
reached_output = False
errors = 0
for line in p.stderr:
line = line.strip()
if "speed=" in line:
reached_output = True
if not reached_output:
continue
if line.startswith("["): # error of some kind
errors += 1
stream, error = line.split("] ", maxsplit=1)
stream = stream.split(" @ ")[0]
line = f"{stream} ERROR: {error}"
print(line)
p.stderr.close()
print(f"Finished with {errors} Errors, Cleaning up...")
p.terminate()
p.wait()

272
unshackle/commands/wvd.py Normal file
View File

@@ -0,0 +1,272 @@
import logging
import shutil
from pathlib import Path
from typing import Optional
import click
import yaml
from google.protobuf.json_format import MessageToDict
from pywidevine.device import Device, DeviceTypes
from pywidevine.license_protocol_pb2 import FileHashes
from rich.prompt import Prompt
from unidecode import UnidecodeError, unidecode
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import context_settings
@click.group(
short_help="Manage configuration and creation of WVD (Widevine Device) files.", context_settings=context_settings
)
def wvd() -> None:
"""Manage configuration and creation of WVD (Widevine Device) files."""
@wvd.command()
@click.argument("paths", type=Path, nargs=-1)
def add(paths: list[Path]) -> None:
"""Add one or more WVD (Widevine Device) files to the WVDs Directory."""
log = logging.getLogger("wvd")
for path in paths:
dst_path = config.directories.wvds / path.name
if not path.exists():
log.error(f"The WVD path '{path}' does not exist...")
elif dst_path.exists():
log.error(f"WVD named '{path.stem}' already exists...")
else:
# TODO: Check for and log errors
_ = Device.load(path) # test if WVD is valid
dst_path.parent.mkdir(parents=True, exist_ok=True)
shutil.move(path, dst_path)
log.info(f"Added {path.stem}")
@wvd.command()
@click.argument("names", type=str, nargs=-1)
def delete(names: list[str]) -> None:
"""Delete one or more WVD (Widevine Device) files from the WVDs Directory."""
log = logging.getLogger("wvd")
for name in names:
path = (config.directories.wvds / name).with_suffix(".wvd")
if not path.exists():
log.error(f"No WVD file exists by the name '{name}'...")
continue
answer = Prompt.ask(
f"[red]Deleting '{name}'[/], are you sure you want to continue?",
choices=["y", "n"],
default="n",
console=console,
)
if answer == "n":
log.info("Aborting...")
continue
Path.unlink(path)
log.info(f"Deleted {name}")
@wvd.command()
@click.argument("path", type=Path)
def parse(path: Path) -> None:
"""
Parse a .WVD Widevine Device file to check information.
Relative paths are relative to the WVDs directory.
"""
try:
named = not path.suffix and path.relative_to(Path(""))
except ValueError:
named = False
if named:
path = config.directories.wvds / f"{path.name}.wvd"
log = logging.getLogger("wvd")
if not path.exists():
console.log(f"[bright_blue]{path.absolute()}[/] does not exist...")
return
device = Device.load(path)
log.info(f"System ID: {device.system_id}")
log.info(f"Security Level: {device.security_level}")
log.info(f"Type: {device.type}")
log.info(f"Flags: {device.flags}")
log.info(f"Private Key: {bool(device.private_key)}")
log.info(f"Client ID: {bool(device.client_id)}")
log.info(f"VMP: {bool(device.client_id.vmp_data)}")
log.info("Client ID:")
log.info(device.client_id)
log.info("VMP:")
if device.client_id.vmp_data:
file_hashes = FileHashes()
file_hashes.ParseFromString(device.client_id.vmp_data)
log.info(str(file_hashes))
else:
log.info("None")
@wvd.command()
@click.argument("wvd_paths", type=Path, nargs=-1)
@click.argument("out_dir", type=Path, nargs=1)
def dump(wvd_paths: list[Path], out_dir: Path) -> None:
"""
Extract data from a .WVD Widevine Device file to a folder structure.
If the path is relative, with no file extension, it will dump the WVD in the WVDs
directory.
"""
log = logging.getLogger("wvd")
if wvd_paths == ():
if not config.directories.wvds.exists():
console.log(f"[bright_blue]{config.directories.wvds.absolute()}[/] does not exist...")
wvd_paths = list(x for x in config.directories.wvds.iterdir() if x.is_file() and x.suffix.lower() == ".wvd")
if not wvd_paths:
console.log(f"[bright_blue]{config.directories.wvds.absolute()}[/] is empty...")
for i, (wvd_path, out_path) in enumerate(zip(wvd_paths, (out_dir / x.stem for x in wvd_paths))):
if i > 0:
log.info("")
try:
named = not wvd_path.suffix and wvd_path.relative_to(Path(""))
except ValueError:
named = False
if named:
wvd_path = config.directories.wvds / f"{wvd_path.stem}.wvd"
out_path.mkdir(parents=True, exist_ok=True)
log.info(f"Dumping: {wvd_path}")
device = Device.load(wvd_path)
log.info(f"L{device.security_level} {device.system_id} {device.type.name}")
log.info(f"Saving to: {out_path}")
device_meta = {
"wvd": {"device_type": device.type.name, "security_level": device.security_level, **device.flags},
"client_info": {},
"capabilities": MessageToDict(device.client_id, preserving_proto_field_name=True)["client_capabilities"],
}
for client_info in device.client_id.client_info:
device_meta["client_info"][client_info.name] = client_info.value
device_meta_path = out_path / "metadata.yml"
device_meta_path.write_text(yaml.dump(device_meta), encoding="utf8")
log.info(" + Device Metadata")
if device.private_key:
private_key_path = out_path / "private_key.pem"
private_key_path.write_text(data=device.private_key.export_key().decode(), encoding="utf8")
private_key_path.with_suffix(".der").write_bytes(device.private_key.export_key(format="DER"))
log.info(" + Private Key")
else:
log.warning(" - No Private Key available")
if device.client_id:
client_id_path = out_path / "client_id.bin"
client_id_path.write_bytes(device.client_id.SerializeToString())
log.info(" + Client ID")
else:
log.warning(" - No Client ID available")
if device.client_id.vmp_data:
vmp_path = out_path / "vmp.bin"
vmp_path.write_bytes(device.client_id.vmp_data)
log.info(" + VMP (File Hashes)")
else:
log.info(" - No VMP (File Hashes) available")
@wvd.command()
@click.argument("name", type=str)
@click.argument("private_key", type=Path)
@click.argument("client_id", type=Path)
@click.argument("file_hashes", type=Path, required=False)
@click.option(
"-t",
"--type",
"type_",
type=click.Choice([x.name for x in DeviceTypes], case_sensitive=False),
default="Android",
help="Device Type",
)
@click.option("-l", "--level", type=click.IntRange(1, 3), default=1, help="Device Security Level")
@click.option("-o", "--output", type=Path, default=None, help="Output Directory")
@click.pass_context
def new(
ctx: click.Context,
name: str,
private_key: Path,
client_id: Path,
file_hashes: Optional[Path],
type_: str,
level: int,
output: Optional[Path],
) -> None:
"""
Create a new .WVD Widevine provision file.
name: The origin device name of the provided data. e.g. `Nexus 6P`. You do not need to
specify the security level, that will be done automatically.
private_key: A PEM file of a Device's private key.
client_id: A binary blob file which follows the Widevine ClientIdentification protobuf
schema.
file_hashes: A binary blob file with follows the Widevine FileHashes protobuf schema.
Also known as VMP as it's used for VMP (Verified Media Path) assurance.
"""
try:
# TODO: Remove need for name, create name based on Client IDs ClientInfo values
name = unidecode(name.strip().lower().replace(" ", "_"))
except UnidecodeError as e:
raise click.UsageError(f"name: Failed to sanitize name, {e}", ctx)
if not name:
raise click.UsageError("name: Empty after sanitizing, please make sure the name is valid.", ctx)
if not private_key.is_file():
raise click.UsageError("private_key: Not a path to a file, or it doesn't exist.", ctx)
if not client_id.is_file():
raise click.UsageError("client_id: Not a path to a file, or it doesn't exist.", ctx)
if file_hashes and not file_hashes.is_file():
raise click.UsageError("file_hashes: Not a path to a file, or it doesn't exist.", ctx)
device = Device(
type_=DeviceTypes[type_.upper()],
security_level=level,
flags=None,
private_key=private_key.read_bytes(),
client_id=client_id.read_bytes(),
)
if file_hashes:
device.client_id.vmp_data = file_hashes.read_bytes()
out_path = (output or config.directories.wvds) / f"{name}_{device.system_id}_l{device.security_level}.wvd"
device.dump(out_path)
log = logging.getLogger("wvd")
log.info(f"Created binary WVD file, {out_path.name}")
log.info(f" + Saved to: {out_path.absolute()}")
log.info(f"System ID: {device.system_id}")
log.info(f"Security Level: {device.security_level}")
log.info(f"Type: {device.type}")
log.info(f"Flags: {device.flags}")
log.info(f"Private Key: {bool(device.private_key)}")
log.info(f"Client ID: {bool(device.client_id)}")
log.info(f"VMP: {bool(device.client_id.vmp_data)}")
log.info("Client ID:")
log.info(device.client_id)
log.info("VMP:")
if device.client_id.vmp_data:
file_hashes = FileHashes()
file_hashes.ParseFromString(device.client_id.vmp_data)
log.info(str(file_hashes))
else:
log.info("None")

View File

@@ -0,0 +1 @@
__version__ = "1.0.1"

View File

@@ -0,0 +1,92 @@
import atexit
import logging
from datetime import datetime
from pathlib import Path
import click
import urllib3
from rich import traceback
from rich.console import Group
from rich.padding import Padding
from rich.text import Text
from urllib3.exceptions import InsecureRequestWarning
from unshackle.core import __version__
from unshackle.core.commands import Commands
from unshackle.core.config import config
from unshackle.core.console import ComfyRichHandler, console
from unshackle.core.constants import context_settings
from unshackle.core.utilities import rotate_log_file
LOGGING_PATH = None
@click.command(cls=Commands, invoke_without_command=True, context_settings=context_settings)
@click.option("-v", "--version", is_flag=True, default=False, help="Print version information.")
@click.option("-d", "--debug", is_flag=True, default=False, help="Enable DEBUG level logs.")
@click.option(
"--log",
"log_path",
type=Path,
default=config.directories.logs / config.filenames.log,
help="Log path (or filename). Path can contain the following f-string args: {name} {time}.",
)
def main(version: bool, debug: bool, log_path: Path) -> None:
"""unshackle—Modular Movie, TV, and Music Archival Software."""
logging.basicConfig(
level=logging.DEBUG if debug else logging.INFO,
format="%(message)s",
handlers=[
ComfyRichHandler(
show_time=False,
show_path=debug,
console=console,
rich_tracebacks=True,
tracebacks_suppress=[click],
log_renderer=console._log_render, # noqa
)
],
)
if log_path:
global LOGGING_PATH
console.record = True
new_log_path = rotate_log_file(log_path)
LOGGING_PATH = new_log_path
urllib3.disable_warnings(InsecureRequestWarning)
traceback.install(console=console, width=80, suppress=[click])
console.print(
Padding(
Group(
Text(
r"▄• ▄▌ ▐ ▄ .▄▄ · ▄ .▄ ▄▄▄· ▄▄· ▄ •▄ ▄▄▌ ▄▄▄ ." + "\n"
r"█▪██▌•█▌▐█▐█ ▀. ██▪▐█▐█ ▀█ ▐█ ▌▪█▌▄▌▪██• ▀▄.▀·" + "\n"
r"█▌▐█▌▐█▐▐▌▄▀▀▀█▄██▀▐█▄█▀▀█ ██ ▄▄▐▀▀▄·██▪ ▐▀▀▪▄" + "\n"
r"▐█▄█▌██▐█▌▐█▄▪▐███▌▐▀▐█ ▪▐▌▐███▌▐█.█▌▐█▌▐▌▐█▄▄▌" + "\n"
r" ▀▀▀ ▀▀ █▪ ▀▀▀▀ ▀▀▀ · ▀ ▀ ·▀▀▀ ·▀ ▀.▀▀▀ ▀▀▀ " + "\n",
style="ascii.art",
),
f"v[repr.number]{__version__}[/] - {datetime.now().year} - sp4rk.y",
),
(1, 11, 1, 10),
expand=True,
),
justify="center",
)
if version:
return
@atexit.register
def save_log():
if console.record and LOGGING_PATH:
# TODO: Currently semi-bust. Everything that refreshes gets duplicated.
console.save_text(LOGGING_PATH)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,50 @@
import shutil
import sys
from pathlib import Path
from typing import Optional
__shaka_platform = {"win32": "win", "darwin": "osx"}.get(sys.platform, sys.platform)
def find(*names: str) -> Optional[Path]:
"""Find the path of the first found binary name."""
for name in names:
path = shutil.which(name)
if path:
return Path(path)
return None
FFMPEG = find("ffmpeg")
FFProbe = find("ffprobe")
FFPlay = find("ffplay")
SubtitleEdit = find("SubtitleEdit")
ShakaPackager = find(
"shaka-packager",
"packager",
f"packager-{__shaka_platform}",
f"packager-{__shaka_platform}-arm64",
f"packager-{__shaka_platform}-x64",
)
Aria2 = find("aria2c", "aria2")
CCExtractor = find("ccextractor", "ccextractorwin", "ccextractorwinfull")
HolaProxy = find("hola-proxy")
MPV = find("mpv")
Caddy = find("caddy")
N_m3u8DL_RE = find("N_m3u8DL-RE", "n-m3u8dl-re")
__all__ = (
"FFMPEG",
"FFProbe",
"FFPlay",
"SubtitleEdit",
"ShakaPackager",
"Aria2",
"CCExtractor",
"HolaProxy",
"MPV",
"Caddy",
"N_m3u8DL_RE",
"find",
)

156
unshackle/core/cacher.py Normal file
View File

@@ -0,0 +1,156 @@
from __future__ import annotations
import zlib
from datetime import datetime, timedelta
from os import stat_result
from pathlib import Path
from typing import Any, Optional, Union
import jsonpickle
import jwt
from unshackle.core.config import config
EXP_T = Union[datetime, str, int, float]
class Cacher:
"""Cacher for Services to get and set arbitrary data with expiration dates."""
def __init__(
self,
service_tag: str,
key: Optional[str] = None,
version: Optional[int] = 1,
data: Optional[Any] = None,
expiration: Optional[datetime] = None,
) -> None:
self.service_tag = service_tag
self.key = key
self.version = version
self.data = data or {}
self.expiration = expiration
if self.expiration and self.expired:
# if its expired, remove the data for safety and delete cache file
self.data = None
self.path.unlink()
def __bool__(self) -> bool:
return bool(self.data)
@property
def path(self) -> Path:
"""Get the path at which the cache will be read and written."""
return (config.directories.cache / self.service_tag / self.key).with_suffix(".json")
@property
def expired(self) -> bool:
return self.expiration and self.expiration < datetime.now()
def get(self, key: str, version: int = 1) -> Cacher:
"""
Get Cached data for the Service by Key.
:param key: the filename to save the data to, should be url-safe.
:param version: the config data version you expect to use.
:returns: Cache object containing the cached data or None if the file does not exist.
"""
cache = Cacher(self.service_tag, key, version)
if cache.path.is_file():
data = jsonpickle.loads(cache.path.read_text(encoding="utf8"))
payload = data.copy()
del payload["crc32"]
checksum = data["crc32"]
calculated = zlib.crc32(jsonpickle.dumps(payload).encode("utf8"))
if calculated != checksum:
raise ValueError(
f"The checksum of the Cache payload mismatched. Checksum: {checksum} !== Calculated: {calculated}"
)
cache.data = data["data"]
cache.expiration = data["expiration"]
cache.version = data["version"]
if cache.version != version:
raise ValueError(
f"The version of your {self.service_tag} {key} cache is outdated. Please delete: {cache.path}"
)
return cache
def set(self, data: Any, expiration: Optional[EXP_T] = None) -> Any:
"""
Set Cached data for the Service by Key.
:param data: absolutely anything including None.
:param expiration: when the data expires, optional. Can be ISO 8601, seconds
til expiration, unix timestamp, or a datetime object.
:returns: the data provided for quick wrapping of functions or vars.
"""
self.data = data
if not expiration:
try:
expiration = jwt.decode(self.data, options={"verify_signature": False})["exp"]
except jwt.DecodeError:
pass
self.expiration = self._resolve_datetime(expiration) if expiration else None
payload = {"data": self.data, "expiration": self.expiration, "version": self.version}
payload["crc32"] = zlib.crc32(jsonpickle.dumps(payload).encode("utf8"))
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.write_text(jsonpickle.dumps(payload))
return self.data
def stat(self) -> stat_result:
"""
Get Cache file OS Stat data like Creation Time, Modified Time, and such.
:returns: an os.stat_result tuple
"""
return self.path.stat()
@staticmethod
def _resolve_datetime(timestamp: EXP_T) -> datetime:
"""
Resolve multiple formats of a Datetime or Timestamp to an absolute Datetime.
Examples:
>>> now = datetime.now()
datetime.datetime(2022, 6, 27, 9, 49, 13, 657208)
>>> iso8601 = now.isoformat()
'2022-06-27T09:49:13.657208'
>>> Cacher._resolve_datetime(iso8601)
datetime.datetime(2022, 6, 27, 9, 49, 13, 657208)
>>> Cacher._resolve_datetime(iso8601 + "Z")
datetime.datetime(2022, 6, 27, 9, 49, 13, 657208)
>>> Cacher._resolve_datetime(3600)
datetime.datetime(2022, 6, 27, 10, 52, 50, 657208)
>>> Cacher._resolve_datetime('3600')
datetime.datetime(2022, 6, 27, 10, 52, 51, 657208)
>>> Cacher._resolve_datetime(7800.113)
datetime.datetime(2022, 6, 27, 11, 59, 13, 770208)
In the int/float examples you may notice that it did not return now + 3600 seconds
but rather something a bit more than that. This is because it did not resolve 3600
seconds from the `now` variable but from right now as the function was called.
"""
if isinstance(timestamp, datetime):
return timestamp
if isinstance(timestamp, str):
if timestamp.endswith("Z"):
# fromisoformat doesn't accept the final Z
timestamp = timestamp.split("Z")[0]
try:
return datetime.fromisoformat(timestamp)
except ValueError:
timestamp = float(timestamp)
try:
if len(str(int(timestamp))) == 13: # JS-style timestamp
timestamp /= 1000
timestamp = datetime.fromtimestamp(timestamp)
except ValueError:
raise ValueError(f"Unrecognized Timestamp value {timestamp!r}")
if timestamp < datetime.now():
# timestamp is likely an amount of seconds til expiration
# or, it's an already expired timestamp which is unlikely
timestamp = timestamp + timedelta(seconds=datetime.now().timestamp())
return timestamp

View File

@@ -0,0 +1,3 @@
from .decrypt_labs_remote_cdm import DecryptLabsRemoteCDM
__all__ = ["DecryptLabsRemoteCDM"]

View File

@@ -0,0 +1,143 @@
import base64
import secrets
from typing import Optional, Type, Union
from uuid import UUID
import requests
from pywidevine import PSSH, Device, DeviceTypes, Key, RemoteCdm
from pywidevine.license_protocol_pb2 import SignedDrmCertificate, SignedMessage
# Copyright 2024 by DevYukine.
class DecryptLabsRemoteCDM(RemoteCdm):
def __init__(
self,
device_type: Union[DeviceTypes, str],
system_id: int,
security_level: int,
host: str,
secret: str,
device_name: str,
service_name: str,
):
self.response_counter = 0
self.pssh = None
self.api_session_ids = {}
self.license_request = None
self.service_name = service_name
self.keys = {}
try:
super().__init__(device_type, system_id, security_level, host, secret, device_name)
except Exception:
pass
self.req_session = requests.Session()
self.req_session.headers.update({"decrypt-labs-api-key": secret})
@classmethod
def from_device(cls, device: Device) -> Type["DecryptLabsRemoteCDM"]:
raise NotImplementedError("You cannot load a DecryptLabsRemoteCDM from a local Device file.")
def open(self) -> bytes:
# We stub this method to return a random session ID for now, later we save the api session id and resolve by our random generated one.
return bytes.fromhex(secrets.token_hex(16))
def close(self, session_id: bytes) -> None:
# We stub this method to do nothing.
pass
def set_service_certificate(self, session_id: bytes, certificate: Optional[Union[bytes, str]]) -> str:
if isinstance(certificate, bytes):
certificate = base64.b64encode(certificate).decode()
# certificate needs to be base64 to be sent off to the API.
# it needs to intentionally be kept as base64 encoded SignedMessage.
self.req_session.signed_device_certificate = certificate
self.req_session.privacy_mode = True
return "success"
def get_service_certificate(self, session_id: bytes) -> Optional[SignedDrmCertificate]:
raise NotImplementedError("This method is not implemented in this CDM")
def get_license_challenge(
self, session_id: bytes, pssh: PSSH, license_type: str = "STREAMING", privacy_mode: bool = True
) -> bytes:
self.pssh = pssh
res = self.session(
self.host + "/get-request",
{
"init_data": self.pssh.dumps(),
"service_certificate": self.req_session.signed_device_certificate,
"scheme": "widevine",
"service": self.service_name,
},
)
self.license_request = res["challenge"]
self.api_session_ids[session_id] = res["session_id"]
return base64.b64decode(self.license_request)
def parse_license(self, session_id: bytes, license_message: Union[SignedMessage, bytes, str]) -> None:
session_id_api = self.api_session_ids[session_id]
if session_id not in self.keys:
self.keys[session_id] = []
session_keys = self.keys[session_id]
if isinstance(license_message, dict) and "keys" in license_message:
session_keys.extend(
[
Key(kid=Key.kid_to_uuid(x["kid"]), type_=x.get("type", "CONTENT"), key=bytes.fromhex(x["key"]))
for x in license_message["keys"]
]
)
else:
res = self.session(
self.host + "/decrypt-response",
{
"session_id": session_id_api,
"init_data": self.pssh.dumps(),
"license_request": self.license_request,
"license_response": license_message,
"scheme": "widevine",
},
)
original_keys = res["keys"].replace("\n", " ")
keys_separated = original_keys.split("--key ")
formatted_keys = []
for k in keys_separated:
if ":" in k:
key = k.strip()
formatted_keys.append(key)
for keys in formatted_keys:
session_keys.append(
(
Key(
kid=UUID(bytes=bytes.fromhex(keys.split(":")[0])),
type_="CONTENT",
key=bytes.fromhex(keys.split(":")[1]),
)
)
)
def get_keys(self, session_id: bytes, type_: Optional[Union[int, str]] = None) -> list[Key]:
return self.keys[session_id]
def session(self, url, data, retries=3):
res = self.req_session.post(url, json=data).json()
if res.get("message") != "success":
if "License Response Decryption Process Failed at the very beginning" in res.get("Error", ""):
if retries > 0:
return self.session(url, data, retries=retries - 1)
else:
raise ValueError(f"CDM API returned an error: {res['Error']}")
else:
raise ValueError(f"CDM API returned an error: {res['Error']}")
return res

View File

@@ -0,0 +1,35 @@
from typing import Optional
import click
from unshackle.core.config import config
from unshackle.core.utilities import import_module_by_path
_COMMANDS = sorted(
(path for path in config.directories.commands.glob("*.py") if path.stem.lower() != "__init__"), key=lambda x: x.stem
)
_MODULES = {path.stem: getattr(import_module_by_path(path), path.stem) for path in _COMMANDS}
class Commands(click.MultiCommand):
"""Lazy-loaded command group of project commands."""
def list_commands(self, ctx: click.Context) -> list[str]:
"""Returns a list of command names from the command filenames."""
return [x.stem for x in _COMMANDS]
def get_command(self, ctx: click.Context, name: str) -> Optional[click.Command]:
"""Load the command code and return the main click command function."""
module = _MODULES.get(name)
if not module:
raise click.ClickException(f"Unable to find command by the name '{name}'")
if hasattr(module, "cli"):
return module.cli
return module
# Hide direct access to commands from quick import form, they shouldn't be accessed directly
__all__ = ("Commands",)

117
unshackle/core/config.py Normal file
View File

@@ -0,0 +1,117 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Optional
import yaml
from appdirs import AppDirs
class Config:
class _Directories:
# default directories, do not modify here, set via config
app_dirs = AppDirs("unshackle", False)
core_dir = Path(__file__).resolve().parent
namespace_dir = core_dir.parent
commands = namespace_dir / "commands"
services = namespace_dir / "services"
vaults = namespace_dir / "vaults"
fonts = namespace_dir / "fonts"
user_configs = core_dir.parent
data = core_dir.parent
downloads = core_dir.parent.parent / "downloads"
temp = core_dir.parent.parent / "temp"
cache = data / "cache"
cookies = data / "cookies"
logs = data / "logs"
wvds = data / "WVDs"
prds = data / "PRDs"
dcsl = data / "DCSL"
class _Filenames:
# default filenames, do not modify here, set via config
log = "unshackle_{name}_{time}.log" # Directories.logs
config = "config.yaml" # Directories.services / tag
root_config = "unshackle.yaml" # Directories.user_configs
chapters = "Chapters_{title}_{random}.txt" # Directories.temp
subtitle = "Subtitle_{id}_{language}.srt" # Directories.temp
def __init__(self, **kwargs: Any):
self.dl: dict = kwargs.get("dl") or {}
self.aria2c: dict = kwargs.get("aria2c") or {}
self.n_m3u8dl_re: dict = kwargs.get("n_m3u8dl_re") or {}
self.cdm: dict = kwargs.get("cdm") or {}
self.chapter_fallback_name: str = kwargs.get("chapter_fallback_name") or ""
self.curl_impersonate: dict = kwargs.get("curl_impersonate") or {}
self.remote_cdm: list[dict] = kwargs.get("remote_cdm") or []
self.credentials: dict = kwargs.get("credentials") or {}
self.directories = self._Directories()
for name, path in (kwargs.get("directories") or {}).items():
if name.lower() in ("app_dirs", "core_dir", "namespace_dir", "user_configs", "data"):
# these must not be modified by the user
continue
setattr(self.directories, name, Path(path).expanduser())
downloader_cfg = kwargs.get("downloader") or "requests"
if isinstance(downloader_cfg, dict):
self.downloader_map = {k.upper(): v for k, v in downloader_cfg.items()}
self.downloader = self.downloader_map.get("DEFAULT", "requests")
else:
self.downloader_map = {}
self.downloader = downloader_cfg
self.filenames = self._Filenames()
for name, filename in (kwargs.get("filenames") or {}).items():
setattr(self.filenames, name, filename)
self.headers: dict = kwargs.get("headers") or {}
self.key_vaults: list[dict[str, Any]] = kwargs.get("key_vaults", [])
self.muxing: dict = kwargs.get("muxing") or {}
self.nordvpn: dict = kwargs.get("nordvpn") or {}
self.proxy_providers: dict = kwargs.get("proxy_providers") or {}
self.serve: dict = kwargs.get("serve") or {}
self.services: dict = kwargs.get("services") or {}
self.set_terminal_bg: bool = kwargs.get("set_terminal_bg", True)
self.tag: str = kwargs.get("tag") or ""
self.tmdb_api_key: str = kwargs.get("tmdb_api_key") or ""
@classmethod
def from_yaml(cls, path: Path) -> Config:
if not path.exists():
raise FileNotFoundError(f"Config file path ({path}) was not found")
if not path.is_file():
raise FileNotFoundError(f"Config file path ({path}) is not to a file.")
return cls(**yaml.safe_load(path.read_text(encoding="utf8")) or {})
# noinspection PyProtectedMember
POSSIBLE_CONFIG_PATHS = (
# The unshackle Namespace Folder (e.g., %appdata%/Python/Python311/site-packages/unshackle)
Config._Directories.namespace_dir / Config._Filenames.root_config,
# The Parent Folder to the unshackle Namespace Folder (e.g., %appdata%/Python/Python311/site-packages)
Config._Directories.namespace_dir.parent / Config._Filenames.root_config,
# The AppDirs User Config Folder (e.g., %localappdata%/unshackle)
Config._Directories.user_configs / Config._Filenames.root_config,
)
def get_config_path() -> Optional[Path]:
"""
Get Path to Config from any one of the possible locations.
Returns None if no config file could be found.
"""
for path in POSSIBLE_CONFIG_PATHS:
if path.exists():
return path
return None
config_path = get_config_path()
if config_path:
config = Config.from_yaml(config_path)
else:
config = Config()
__all__ = ("config",)

351
unshackle/core/console.py Normal file
View File

@@ -0,0 +1,351 @@
import logging
from datetime import datetime
from types import ModuleType
from typing import IO, Callable, Iterable, List, Literal, Mapping, Optional, Union
from rich._log_render import FormatTimeCallable, LogRender
from rich.console import Console, ConsoleRenderable, HighlighterType, RenderableType
from rich.emoji import EmojiVariant
from rich.highlighter import Highlighter, ReprHighlighter
from rich.live import Live
from rich.logging import RichHandler
from rich.padding import Padding, PaddingDimensions
from rich.status import Status
from rich.style import StyleType
from rich.table import Table
from rich.text import Text, TextType
from rich.theme import Theme
from unshackle.core.config import config
class ComfyLogRenderer(LogRender):
def __call__(
self,
console: "Console",
renderables: Iterable["ConsoleRenderable"],
log_time: Optional[datetime] = None,
time_format: Optional[Union[str, FormatTimeCallable]] = None,
level: TextType = "",
path: Optional[str] = None,
line_no: Optional[int] = None,
link_path: Optional[str] = None,
) -> "Table":
from rich.containers import Renderables
output = Table.grid(padding=(0, 5), pad_edge=True)
output.expand = True
if self.show_time:
output.add_column(style="log.time")
if self.show_level:
output.add_column(style="log.level", width=self.level_width)
output.add_column(ratio=1, style="log.message", overflow="fold")
if self.show_path and path:
output.add_column(style="log.path")
row: List["RenderableType"] = []
if self.show_time:
log_time = log_time or console.get_datetime()
time_format = time_format or self.time_format
if callable(time_format):
log_time_display = time_format(log_time)
else:
log_time_display = Text(log_time.strftime(time_format))
if log_time_display == self._last_time and self.omit_repeated_times:
row.append(Text(" " * len(log_time_display)))
else:
row.append(log_time_display)
self._last_time = log_time_display
if self.show_level:
row.append(level)
row.append(Renderables(renderables))
if self.show_path and path:
path_text = Text()
path_text.append(path, style=f"link file://{link_path}" if link_path else "")
if line_no:
path_text.append(":")
path_text.append(
f"{line_no}",
style=f"link file://{link_path}#{line_no}" if link_path else "",
)
row.append(path_text)
output.add_row(*row)
return output
class ComfyRichHandler(RichHandler):
def __init__(
self,
level: Union[int, str] = logging.NOTSET,
console: Optional[Console] = None,
*,
show_time: bool = True,
omit_repeated_times: bool = True,
show_level: bool = True,
show_path: bool = True,
enable_link_path: bool = True,
highlighter: Optional[Highlighter] = None,
markup: bool = False,
rich_tracebacks: bool = False,
tracebacks_width: Optional[int] = None,
tracebacks_extra_lines: int = 3,
tracebacks_theme: Optional[str] = None,
tracebacks_word_wrap: bool = True,
tracebacks_show_locals: bool = False,
tracebacks_suppress: Iterable[Union[str, ModuleType]] = (),
locals_max_length: int = 10,
locals_max_string: int = 80,
log_time_format: Union[str, FormatTimeCallable] = "[%x %X]",
keywords: Optional[List[str]] = None,
log_renderer: Optional[LogRender] = None,
) -> None:
super().__init__(
level=level,
console=console,
show_time=show_time,
omit_repeated_times=omit_repeated_times,
show_level=show_level,
show_path=show_path,
enable_link_path=enable_link_path,
highlighter=highlighter,
markup=markup,
rich_tracebacks=rich_tracebacks,
tracebacks_width=tracebacks_width,
tracebacks_extra_lines=tracebacks_extra_lines,
tracebacks_theme=tracebacks_theme,
tracebacks_word_wrap=tracebacks_word_wrap,
tracebacks_show_locals=tracebacks_show_locals,
tracebacks_suppress=tracebacks_suppress,
locals_max_length=locals_max_length,
locals_max_string=locals_max_string,
log_time_format=log_time_format,
keywords=keywords,
)
if log_renderer:
self._log_render = log_renderer
class ComfyConsole(Console):
"""A comfy high level console interface.
Args:
color_system (str, optional): The color system supported by your terminal,
either ``"standard"``, ``"256"`` or ``"truecolor"``. Leave as ``"auto"`` to autodetect.
force_terminal (Optional[bool], optional): Enable/disable terminal control codes, or None to auto-detect
terminal. Defaults to None.
force_jupyter (Optional[bool], optional): Enable/disable Jupyter rendering, or None to auto-detect Jupyter.
Defaults to None.
force_interactive (Optional[bool], optional): Enable/disable interactive mode, or None to auto-detect.
Defaults to None.
soft_wrap (Optional[bool], optional): Set soft wrap default on print method. Defaults to False.
theme (Theme, optional): An optional style theme object, or ``None`` for default theme.
stderr (bool, optional): Use stderr rather than stdout if ``file`` is not specified. Defaults to False.
file (IO, optional): A file object where the console should write to. Defaults to stdout.
quiet (bool, Optional): Boolean to suppress all output. Defaults to False.
width (int, optional): The width of the terminal. Leave as default to auto-detect width.
height (int, optional): The height of the terminal. Leave as default to auto-detect height.
style (StyleType, optional): Style to apply to all output, or None for no style. Defaults to None.
no_color (Optional[bool], optional): Enabled no color mode, or None to auto-detect. Defaults to None.
tab_size (int, optional): Number of spaces used to replace a tab character. Defaults to 8.
record (bool, optional): Boolean to enable recording of terminal output,
required to call :meth:`export_html`, :meth:`export_svg`, and :meth:`export_text`. Defaults to False.
markup (bool, optional): Boolean to enable :ref:`console_markup`. Defaults to True.
emoji (bool, optional): Enable emoji code. Defaults to True.
emoji_variant (str, optional): Optional emoji variant, either "text" or "emoji". Defaults to None.
highlight (bool, optional): Enable automatic highlighting. Defaults to True.
log_time (bool, optional): Boolean to enable logging of time by :meth:`log` methods. Defaults to True.
log_path (bool, optional): Boolean to enable the logging of the caller by :meth:`log`. Defaults to True.
log_time_format (Union[str, TimeFormatterCallable], optional): If ``log_time`` is enabled, either string for
strftime or callable that formats the time. Defaults to "[%X] ".
highlighter (HighlighterType, optional): Default highlighter.
legacy_windows (bool, optional): Enable legacy Windows mode, or ``None`` to auto-detect. Defaults to ``None``.
safe_box (bool, optional): Restrict box options that don't render on legacy Windows.
get_datetime (Callable[[], datetime], optional): Callable that gets the current time as a datetime.datetime
object (used by Console.log), or None for datetime.now.
get_time (Callable[[], time], optional): Callable that gets the current time in seconds, default uses
time.monotonic.
"""
def __init__(
self,
*,
color_system: Optional[Literal["auto", "standard", "256", "truecolor", "windows"]] = "auto",
force_terminal: Optional[bool] = None,
force_jupyter: Optional[bool] = None,
force_interactive: Optional[bool] = None,
soft_wrap: bool = False,
theme: Optional[Theme] = None,
stderr: bool = False,
file: Optional[IO[str]] = None,
quiet: bool = False,
width: Optional[int] = None,
height: Optional[int] = None,
style: Optional[StyleType] = None,
no_color: Optional[bool] = None,
tab_size: int = 8,
record: bool = False,
markup: bool = True,
emoji: bool = True,
emoji_variant: Optional[EmojiVariant] = None,
highlight: bool = True,
log_time: bool = True,
log_path: bool = True,
log_time_format: Union[str, FormatTimeCallable] = "[%X]",
highlighter: Optional["HighlighterType"] = ReprHighlighter(),
legacy_windows: Optional[bool] = None,
safe_box: bool = True,
get_datetime: Optional[Callable[[], datetime]] = None,
get_time: Optional[Callable[[], float]] = None,
_environ: Optional[Mapping[str, str]] = None,
log_renderer: Optional[LogRender] = None,
):
super().__init__(
color_system=color_system,
force_terminal=force_terminal,
force_jupyter=force_jupyter,
force_interactive=force_interactive,
soft_wrap=soft_wrap,
theme=theme,
stderr=stderr,
file=file,
quiet=quiet,
width=width,
height=height,
style=style,
no_color=no_color,
tab_size=tab_size,
record=record,
markup=markup,
emoji=emoji,
emoji_variant=emoji_variant,
highlight=highlight,
log_time=log_time,
log_path=log_path,
log_time_format=log_time_format,
highlighter=highlighter,
legacy_windows=legacy_windows,
safe_box=safe_box,
get_datetime=get_datetime,
get_time=get_time,
_environ=_environ,
)
if log_renderer:
self._log_render = log_renderer
def status(
self,
status: RenderableType,
*,
spinner: str = "dots",
spinner_style: str = "status.spinner",
speed: float = 1.0,
refresh_per_second: float = 12.5,
pad: PaddingDimensions = (0, 5),
) -> Union[Live, Status]:
"""Display a comfy status and spinner.
Args:
status (RenderableType): A status renderable (str or Text typically).
spinner (str, optional): Name of spinner animation (see python -m rich.spinner). Defaults to "dots".
spinner_style (StyleType, optional): Style of spinner. Defaults to "status.spinner".
speed (float, optional): Speed factor for spinner animation. Defaults to 1.0.
refresh_per_second (float, optional): Number of refreshes per second. Defaults to 12.5.
pad (Union[int, Tuple[int]]): Padding for top, right, bottom, and left borders.
May be specified with 1, 2, or 4 integers (CSS style).
Returns:
Status: A Status object that may be used as a context manager.
"""
status_renderable = super().status(
status=status,
spinner=spinner,
spinner_style=spinner_style,
speed=speed,
refresh_per_second=refresh_per_second,
)
if pad:
top, right, bottom, left = Padding.unpack(pad)
renderable_width = len(status_renderable.status)
spinner_width = len(status_renderable.renderable.text)
status_width = spinner_width + renderable_width
available_width = self.width - status_width
if available_width > right:
# fill up the available width with padding to apply bg color
right = available_width - right
padding = Padding(status_renderable, (top, right, bottom, left))
return Live(padding, console=self, transient=True)
return status_renderable
catppuccin_mocha = {
# Colors based on "CatppuccinMocha" from Gogh themes
"bg": "rgb(30,30,46)",
"text": "rgb(205,214,244)",
"text2": "rgb(162,169,193)", # slightly darker
"black": "rgb(69,71,90)",
"bright_black": "rgb(88,91,112)",
"red": "rgb(243,139,168)",
"green": "rgb(166,227,161)",
"yellow": "rgb(249,226,175)",
"blue": "rgb(137,180,250)",
"pink": "rgb(245,194,231)",
"cyan": "rgb(148,226,213)",
"gray": "rgb(166,173,200)",
"bright_gray": "rgb(186,194,222)",
"dark_gray": "rgb(54,54,84)",
}
primary_scheme = catppuccin_mocha
primary_scheme["none"] = primary_scheme["text"]
primary_scheme["grey23"] = primary_scheme["black"]
primary_scheme["magenta"] = primary_scheme["pink"]
primary_scheme["bright_red"] = primary_scheme["red"]
primary_scheme["bright_green"] = primary_scheme["green"]
primary_scheme["bright_yellow"] = primary_scheme["yellow"]
primary_scheme["bright_blue"] = primary_scheme["blue"]
primary_scheme["bright_magenta"] = primary_scheme["pink"]
primary_scheme["bright_cyan"] = primary_scheme["cyan"]
if config.set_terminal_bg:
primary_scheme["none"] += f" on {primary_scheme['bg']}"
custom_colors = {"ascii.art": primary_scheme["pink"]}
if config.set_terminal_bg:
custom_colors["ascii.art"] += f" on {primary_scheme['bg']}"
console = ComfyConsole(
log_time=False,
log_path=False,
width=80,
theme=Theme(
{
"bar.back": primary_scheme["dark_gray"],
"bar.complete": primary_scheme["pink"],
"bar.finished": primary_scheme["green"],
"bar.pulse": primary_scheme["bright_black"],
"black": primary_scheme["black"],
"inspect.async_def": f"italic {primary_scheme['cyan']}",
"progress.data.speed": "dark_orange",
"repr.number": f"bold not italic {primary_scheme['cyan']}",
"repr.number_complex": f"bold not italic {primary_scheme['cyan']}",
"rule.line": primary_scheme["dark_gray"],
"rule.text": primary_scheme["pink"],
"tree.line": primary_scheme["dark_gray"],
"status.spinner": primary_scheme["pink"],
"progress.spinner": primary_scheme["pink"],
**primary_scheme,
**custom_colors,
}
),
log_renderer=ComfyLogRenderer(show_time=False, show_path=False),
)
__all__ = ("ComfyLogRenderer", "ComfyRichHandler", "ComfyConsole", "console")

View File

@@ -0,0 +1,25 @@
from threading import Event
from typing import TypeVar, Union
DOWNLOAD_CANCELLED = Event()
DOWNLOAD_LICENCE_ONLY = Event()
DRM_SORT_MAP = ["ClearKey", "Widevine"]
LANGUAGE_MAX_DISTANCE = 5 # this is max to be considered "same", e.g., en, en-US, en-AU
VIDEO_CODEC_MAP = {"AVC": "H.264", "HEVC": "H.265"}
DYNAMIC_RANGE_MAP = {"HDR10": "HDR", "HDR10+": "HDR", "Dolby Vision": "DV"}
AUDIO_CODEC_MAP = {"E-AC-3": "DDP", "AC-3": "DD"}
context_settings = dict(
help_option_names=["-?", "-h", "--help"], # default only has --help
max_content_width=116, # max PEP8 line-width, -4 to adjust for initial indent
)
# For use in signatures of functions which take one specific type of track at a time
# (it can't be a list that contains e.g. both Video and Audio objects)
TrackT = TypeVar("TrackT", bound="Track") # noqa: F821
# For general use in lists that can contain mixed types of tracks.
# list[Track] won't work because list is invariant.
# TODO: Add Chapter?
AnyTrack = Union["Video", "Audio", "Subtitle"] # noqa: F821

View File

@@ -0,0 +1,87 @@
from __future__ import annotations
import base64
import hashlib
import re
from pathlib import Path
from typing import Optional, Union
class Credential:
"""Username (or Email) and Password Credential."""
def __init__(self, username: str, password: str, extra: Optional[str] = None):
self.username = username
self.password = password
self.extra = extra
self.sha1 = hashlib.sha1(self.dumps().encode()).hexdigest()
def __bool__(self) -> bool:
return bool(self.username) and bool(self.password)
def __str__(self) -> str:
return self.dumps()
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__, items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def dumps(self) -> str:
"""Return credential data as a string."""
return f"{self.username}:{self.password}" + (f":{self.extra}" if self.extra else "")
def dump(self, path: Union[Path, str]) -> int:
"""Write credential data to a file."""
if isinstance(path, str):
path = Path(path)
return path.write_text(self.dumps(), encoding="utf8")
def as_base64(self, with_extra: bool = False, encode_password: bool = False, encode_extra: bool = False) -> str:
"""
Dump Credential as a Base64-encoded string in Basic Authorization style.
encode_password and encode_extra will also Base64-encode the password and extra respectively.
"""
value = f"{self.username}:"
if encode_password:
value += base64.b64encode(self.password.encode()).decode()
else:
value += self.password
if with_extra and self.extra:
if encode_extra:
value += f":{base64.b64encode(self.extra.encode()).decode()}"
else:
value += f":{self.extra}"
return base64.b64encode(value.encode()).decode()
@classmethod
def loads(cls, text: str) -> Credential:
"""
Load credential from a text string.
Format: {username}:{password}
Rules:
Only one Credential must be in this text contents.
All whitespace before and after all text will be removed.
Any whitespace between text will be kept and used.
The credential can be spanned across one or multiple lines as long as it
abides with all the above rules and the format.
Example that follows the format and rules:
`\tJohnd\noe@gm\n\rail.com\n:Pass1\n23\n\r \t \t`
>>>Credential(username='Johndoe@gmail.com', password='Pass123')
"""
text = "".join([x.strip() for x in text.splitlines(keepends=False)]).strip()
credential = re.fullmatch(r"^([^:]+?):([^:]+?)(?::(.+))?$", text)
if credential:
return cls(*credential.groups())
raise ValueError("No credentials found in text string. Expecting the format `username:password`")
@classmethod
def load(cls, path: Path) -> Credential:
"""
Load Credential from a file path.
Use Credential.loads() for loading from text content and seeing the rules and
format expected to be found in the URIs contents.
"""
return cls.loads(path.read_text("utf8"))

View File

@@ -0,0 +1,6 @@
from .aria2c import aria2c
from .curl_impersonate import curl_impersonate
from .n_m3u8dl_re import n_m3u8dl_re
from .requests import requests
__all__ = ("aria2c", "curl_impersonate", "requests", "n_m3u8dl_re")

View File

@@ -0,0 +1,331 @@
import os
import subprocess
import textwrap
import time
from functools import partial
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Callable, Generator, MutableMapping, Optional, Union
from urllib.parse import urlparse
import requests
from Crypto.Random import get_random_bytes
from requests import Session
from requests.cookies import cookiejar_from_dict, get_cookie_header
from rich import filesize
from rich.text import Text
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_extension, get_free_port
def rpc(caller: Callable, secret: str, method: str, params: Optional[list[Any]] = None) -> Any:
"""Make a call to Aria2's JSON-RPC API."""
try:
rpc_res = caller(
json={
"jsonrpc": "2.0",
"id": get_random_bytes(16).hex(),
"method": method,
"params": [f"token:{secret}", *(params or [])],
}
).json()
if rpc_res.get("code"):
# wrap to console width - padding - '[Aria2c]: '
error_pretty = "\n ".join(
textwrap.wrap(
f"RPC Error: {rpc_res['message']} ({rpc_res['code']})".strip(),
width=console.width - 20,
initial_indent="",
)
)
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
return rpc_res["result"]
except requests.exceptions.ConnectionError:
# absorb, process likely ended as it was calling RPC
return
def download(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
) -> Generator[dict[str, Any], None, None]:
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not max_workers:
max_workers = min(32, (os.cpu_count() or 1) + 4)
elif not isinstance(max_workers, int):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
if not binaries.Aria2:
raise EnvironmentError("Aria2c executable not found...")
if proxy and not proxy.lower().startswith("http://"):
raise ValueError("Only HTTP proxies are supported by aria2(c)")
if cookies and not isinstance(cookies, CookieJar):
cookies = cookiejar_from_dict(cookies)
url_files = []
for i, url in enumerate(urls):
if isinstance(url, str):
url_data = {"url": url}
else:
url_data: dict[str, Any] = url
url_filename = filename.format(i=i, ext=get_extension(url_data["url"]))
url_text = url_data["url"]
url_text += f"\n\tdir={output_dir}"
url_text += f"\n\tout={url_filename}"
if cookies:
mock_request = requests.Request(url=url_data["url"])
cookie_header = get_cookie_header(cookies, mock_request)
if cookie_header:
url_text += f"\n\theader=Cookie: {cookie_header}"
for key, value in url_data.items():
if key == "url":
continue
if key == "headers":
for header_name, header_value in value.items():
url_text += f"\n\theader={header_name}: {header_value}"
else:
url_text += f"\n\t{key}={value}"
url_files.append(url_text)
url_file = "\n".join(url_files)
rpc_port = get_free_port()
rpc_secret = get_random_bytes(16).hex()
rpc_uri = f"http://127.0.0.1:{rpc_port}/jsonrpc"
rpc_session = Session()
max_concurrent_downloads = int(config.aria2c.get("max_concurrent_downloads", max_workers))
max_connection_per_server = int(config.aria2c.get("max_connection_per_server", 1))
split = int(config.aria2c.get("split", 5))
file_allocation = config.aria2c.get("file_allocation", "prealloc")
if len(urls) > 1:
split = 1
file_allocation = "none"
arguments = [
# [Basic Options]
"--input-file",
"-",
"--all-proxy",
proxy or "",
"--continue=true",
# [Connection Options]
f"--max-concurrent-downloads={max_concurrent_downloads}",
f"--max-connection-per-server={max_connection_per_server}",
f"--split={split}", # each split uses their own connection
"--max-file-not-found=5", # counted towards --max-tries
"--max-tries=5",
"--retry-wait=2",
# [Advanced Options]
"--allow-overwrite=true",
"--auto-file-renaming=false",
"--console-log-level=warn",
"--download-result=default",
f"--file-allocation={file_allocation}",
"--summary-interval=0",
# [RPC Options]
"--enable-rpc=true",
f"--rpc-listen-port={rpc_port}",
f"--rpc-secret={rpc_secret}",
]
for header, value in (headers or {}).items():
if header.lower() == "cookie":
raise ValueError("You cannot set Cookies as a header manually, please use the `cookies` param.")
if header.lower() == "accept-encoding":
# we cannot set an allowed encoding, or it will return compressed
# and the code is not set up to uncompress the data
continue
if header.lower() == "referer":
arguments.extend(["--referer", value])
continue
if header.lower() == "user-agent":
arguments.extend(["--user-agent", value])
continue
arguments.extend(["--header", f"{header}: {value}"])
yield dict(total=len(urls))
try:
p = subprocess.Popen([binaries.Aria2, *arguments], stdin=subprocess.PIPE, stdout=subprocess.DEVNULL)
p.stdin.write(url_file.encode())
p.stdin.close()
while p.poll() is None:
global_stats: dict[str, Any] = (
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.getGlobalStat")
or {}
)
number_stopped = int(global_stats.get("numStoppedTotal", 0))
download_speed = int(global_stats.get("downloadSpeed", -1))
if number_stopped:
yield dict(completed=number_stopped)
if download_speed != -1:
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
stopped_downloads: list[dict[str, Any]] = (
rpc(
caller=partial(rpc_session.post, url=rpc_uri),
secret=rpc_secret,
method="aria2.tellStopped",
params=[0, 999999],
)
or []
)
for dl in stopped_downloads:
if dl["status"] == "error":
used_uri = next(
uri["uri"]
for file in dl["files"]
if file["selected"] == "true"
for uri in file["uris"]
if uri["status"] == "used"
)
error = f"Download Error (#{dl['gid']}): {dl['errorMessage']} ({dl['errorCode']}), {used_uri}"
error_pretty = "\n ".join(
textwrap.wrap(error, width=console.width - 20, initial_indent="")
)
console.log(Text.from_ansi("\n[Aria2c]: " + error_pretty))
raise ValueError(error)
if number_stopped == len(urls):
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
break
time.sleep(1)
p.wait()
if p.returncode != 0:
raise subprocess.CalledProcessError(p.returncode, arguments)
except ConnectionResetError:
# interrupted while passing URI to download
raise KeyboardInterrupt()
except subprocess.CalledProcessError as e:
if e.returncode in (7, 0xC000013A):
# 7 is when Aria2(c) handled the CTRL+C
# 0xC000013A is when it never got the chance to
raise KeyboardInterrupt()
raise
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILED")
raise
finally:
rpc(caller=partial(rpc_session.post, url=rpc_uri), secret=rpc_secret, method="aria2.shutdown")
def aria2c(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
) -> Generator[dict[str, Any], None, None]:
"""
Download files using Aria2(c).
https://aria2.github.io
Yields the following download status updates while chunks are downloading:
- {total: 100} (100% download total)
- {completed: 1} (1% download progress out of 100%)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
The data is in the same format accepted by rich's progress.update() function.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for all downloads.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
proxy: An optional proxy URI to route connections through for all downloads.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)). Use for the --max-concurrent-downloads option.
"""
if proxy and not proxy.lower().startswith("http://"):
# Only HTTP proxies are supported by aria2(c)
proxy = urlparse(proxy)
port = get_free_port()
username, password = get_random_bytes(8).hex(), get_random_bytes(8).hex()
local_proxy = f"http://{username}:{password}@localhost:{port}"
scheme = {"https": "http+ssl", "socks5h": "socks"}.get(proxy.scheme, proxy.scheme)
remote_server = f"{scheme}://{proxy.hostname}"
if proxy.port:
remote_server += f":{proxy.port}"
if proxy.username or proxy.password:
remote_server += "#"
if proxy.username:
remote_server += proxy.username
if proxy.password:
remote_server += f":{proxy.password}"
p = subprocess.Popen(
["pproxy", "-l", f"http://:{port}#{username}:{password}", "-r", remote_server],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
try:
yield from download(urls, output_dir, filename, headers, cookies, local_proxy, max_workers)
finally:
p.kill()
p.wait()
return
yield from download(urls, output_dir, filename, headers, cookies, proxy, max_workers)
__all__ = ("aria2c",)

View File

@@ -0,0 +1,259 @@
import math
import time
from concurrent import futures
from concurrent.futures.thread import ThreadPoolExecutor
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Generator, MutableMapping, Optional, Union
from curl_cffi.requests import Session
from rich import filesize
from unshackle.core.config import config
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_extension
MAX_ATTEMPTS = 5
RETRY_WAIT = 2
CHUNK_SIZE = 1024
PROGRESS_WINDOW = 5
BROWSER = config.curl_impersonate.get("browser", "chrome124")
def download(url: str, save_path: Path, session: Session, **kwargs: Any) -> Generator[dict[str, Any], None, None]:
"""
Download files using Curl Impersonate.
https://github.com/lwthiker/curl-impersonate
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function. The
`downloaded` key is custom and is not natively accepted by all rich progress bars.
Parameters:
url: Web URL of a file to download.
save_path: The path to save the file to. If the save path's directory does not
exist then it will be made automatically.
session: The Requests or Curl-Impersonate Session to make HTTP requests with.
Useful to set Header, Cookie, and Proxy data. Connections are saved and
re-used with the session so long as the server keeps the connection alive.
kwargs: Any extra keyword arguments to pass to the session.get() call. Use this
for one-time request changes like a header, cookie, or proxy. For example,
to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
"""
save_dir = save_path.parent
control_file = save_path.with_name(f"{save_path.name}.!dev")
save_dir.mkdir(parents=True, exist_ok=True)
if control_file.exists():
# consider the file corrupt if the control file exists
save_path.unlink(missing_ok=True)
control_file.unlink()
elif save_path.exists():
# if it exists, and no control file, then it should be safe
yield dict(file_downloaded=save_path, written=save_path.stat().st_size)
# TODO: Design a control file format so we know how much of the file is missing
control_file.write_bytes(b"")
attempts = 1
try:
while True:
written = 0
download_sizes = []
last_speed_refresh = time.time()
try:
stream = session.get(url, stream=True, **kwargs)
stream.raise_for_status()
try:
content_length = int(stream.headers.get("Content-Length", "0"))
except ValueError:
content_length = 0
if content_length > 0:
yield dict(total=math.ceil(content_length / CHUNK_SIZE))
else:
# we have no data to calculate total chunks
yield dict(total=None) # indeterminate mode
with open(save_path, "wb") as f:
for chunk in stream.iter_content(chunk_size=CHUNK_SIZE):
download_size = len(chunk)
f.write(chunk)
written += download_size
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
download_sizes.append(download_size)
if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE:
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
if content_length and written < content_length:
raise IOError(f"Failed to read {content_length} bytes from the track URI.")
yield dict(file_downloaded=save_path, written=written)
break
except Exception as e:
save_path.unlink(missing_ok=True)
if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
raise e
time.sleep(RETRY_WAIT)
attempts += 1
finally:
control_file.unlink()
def curl_impersonate(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
) -> Generator[dict[str, Any], None, None]:
"""
Download files using Curl Impersonate.
https://github.com/lwthiker/curl-impersonate
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function.
However, The `downloaded`, `file_downloaded` and `written` keys are custom and not
natively accepted by rich progress bars.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for all downloads.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
proxy: An optional proxy URI to route connections through for all downloads.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)).
"""
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not isinstance(max_workers, (int, type(None))):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
urls = [
dict(save_path=save_path, **url) if isinstance(url, dict) else dict(url=url, save_path=save_path)
for i, url in enumerate(urls)
for save_path in [
output_dir / filename.format(i=i, ext=get_extension(url["url"] if isinstance(url, dict) else url))
]
]
session = Session(impersonate=BROWSER)
if headers:
headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"}
session.headers.update(headers)
if cookies:
session.cookies.update(cookies)
if proxy:
session.proxies.update({"all": proxy})
yield dict(total=len(urls))
download_sizes = []
last_speed_refresh = time.time()
with ThreadPoolExecutor(max_workers=max_workers) as pool:
for i, future in enumerate(
futures.as_completed((pool.submit(download, session=session, **url) for url in urls))
):
file_path, download_size = None, None
try:
for status_update in future.result():
if status_update.get("file_downloaded") and status_update.get("written"):
file_path = status_update["file_downloaded"]
download_size = status_update["written"]
elif len(urls) == 1:
# these are per-chunk updates, only useful if it's one big file
yield status_update
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[yellow]CANCELLED")
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[red]FAILED")
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise
else:
yield dict(file_downloaded=file_path)
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
if download_size: # no size == skipped dl
download_sizes.append(download_size)
if download_sizes and (time_since > PROGRESS_WINDOW or i == len(urls)):
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
__all__ = ("curl_impersonate",)

View File

@@ -0,0 +1,299 @@
import logging
import os
import re
import subprocess
import warnings
from http.cookiejar import CookieJar
from itertools import chain
from pathlib import Path
from typing import Any, Generator, MutableMapping, Optional, Union
import requests
from requests.cookies import cookiejar_from_dict, get_cookie_header
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import DOWNLOAD_CANCELLED
# Ignore FutureWarnings
warnings.simplefilter(action="ignore", category=FutureWarning)
AUDIO_CODEC_MAP = {"AAC": "mp4a", "AC3": "ac-3", "EC3": "ec-3"}
VIDEO_CODEC_MAP = {"AVC": "avc", "HEVC": "hvc", "DV": "dvh", "HLG": "hev"}
def track_selection(track: object) -> list[str]:
"""Return the N_m3u8DL-RE stream selection arguments for a track."""
if "dash" in track.data:
adaptation_set = track.data["dash"]["adaptation_set"]
representation = track.data["dash"]["representation"]
track_type = track.__class__.__name__
codec = track.codec.name
bitrate = track.bitrate // 1000
language = track.language
width = track.width if track_type == "Video" else None
height = track.height if track_type == "Video" else None
range = track.range.name if track_type == "Video" else None
elif "ism" in track.data:
stream_index = track.data["ism"]["stream_index"]
quality_level = track.data["ism"]["quality_level"]
track_type = track.__class__.__name__
codec = track.codec.name
bitrate = track.bitrate // 1000
language = track.language
width = track.width if track_type == "Video" else None
height = track.height if track_type == "Video" else None
range = track.range.name if track_type == "Video" else None
adaptation_set = stream_index
representation = quality_level
else:
return []
if track_type == "Audio":
codecs = AUDIO_CODEC_MAP.get(codec)
langs = adaptation_set.findall("lang") + representation.findall("lang")
track_ids = list(
set(
v
for x in chain(adaptation_set, representation)
for v in (x.get("audioTrackId"), x.get("id"))
if v is not None
)
)
roles = adaptation_set.findall("Role") + representation.findall("Role")
role = ":role=main" if next((i for i in roles if i.get("value").lower() == "main"), None) else ""
bandwidth = f"bwMin={bitrate}:bwMax={bitrate + 5}"
if langs:
track_selection = ["-sa", f"lang={language}:codecs={codecs}:{bandwidth}{role}"]
elif len(track_ids) == 1:
track_selection = ["-sa", f"id={track_ids[0]}"]
else:
track_selection = ["-sa", f"for=best{role}"]
return track_selection
if track_type == "Video":
# adjust codec based on range
codec_adjustments = {("HEVC", "DV"): "DV", ("HEVC", "HLG"): "HLG"}
codec = codec_adjustments.get((codec, range), codec)
codecs = VIDEO_CODEC_MAP.get(codec)
bandwidth = f"bwMin={bitrate}:bwMax={bitrate + 5}"
if width and height:
resolution = f"{width}x{height}"
elif width:
resolution = f"{width}*"
else:
resolution = "for=best"
if resolution.startswith("for="):
track_selection = ["-sv", resolution]
track_selection.append(f"codecs={codecs}:{bandwidth}")
else:
track_selection = ["-sv", f"res={resolution}:codecs={codecs}:{bandwidth}"]
return track_selection
def download(
urls: Union[str, dict[str, Any], list[str], list[dict[str, Any]]],
track: object,
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
content_keys: Optional[dict[str, Any]] = None,
) -> Generator[dict[str, Any], None, None]:
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not max_workers:
max_workers = min(32, (os.cpu_count() or 1) + 4)
elif not isinstance(max_workers, int):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
if not binaries.N_m3u8DL_RE:
raise EnvironmentError("N_m3u8DL-RE executable not found...")
if cookies and not isinstance(cookies, CookieJar):
cookies = cookiejar_from_dict(cookies)
track_type = track.__class__.__name__
thread_count = str(config.n_m3u8dl_re.get("thread_count", max_workers))
ad_keyword = config.n_m3u8dl_re.get("ad_keyword")
arguments = [
track.url,
"--save-dir",
output_dir,
"--tmp-dir",
output_dir,
"--thread-count",
thread_count,
"--no-log",
"--write-meta-json",
"false",
]
for header, value in (headers or {}).items():
if header.lower() in ("accept-encoding", "cookie"):
continue
arguments.extend(["--header", f"{header}: {value}"])
if cookies:
cookie_header = get_cookie_header(cookies, requests.Request(url=track.url))
if cookie_header:
arguments.extend(["--header", f"Cookie: {cookie_header}"])
if proxy:
arguments.extend(["--custom-proxy", proxy])
if content_keys:
for kid, key in content_keys.items():
keys = f"{kid.hex}:{key.lower()}"
arguments.extend(["--key", keys])
arguments.extend(["--use-shaka-packager"])
if ad_keyword:
arguments.extend(["--ad-keyword", ad_keyword])
if track.descriptor.name == "URL":
error = f"[N_m3u8DL-RE]: {track.descriptor} is currently not supported"
raise ValueError(error)
elif track.descriptor.name == "DASH":
arguments.extend(track_selection(track))
# TODO: improve this nonsense
percent_re = re.compile(r"(\d+\.\d+%)")
speed_re = re.compile(r"(?<!/)(\d+\.\d+MB)(?!.*\/)")
warn = re.compile(r"(WARN : Response.*)")
error = re.compile(r"(ERROR.*)")
size_patterns = [
re.compile(r"(\d+\.\d+MB/\d+\.\d+GB)"),
re.compile(r"(\d+\.\d+GB/\d+\.\d+GB)"),
re.compile(r"(\d+\.\d+MB/\d+\.\d+MB)"),
]
yield dict(total=100)
try:
with subprocess.Popen(
[binaries.N_m3u8DL_RE, *arguments], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
) as p:
for line in p.stdout:
output = line.strip()
if output:
percent = percent_re.search(output)
speed = speed_re.search(output)
size = next(
(pattern.search(output).group(1) for pattern in size_patterns if pattern.search(output)), ""
)
if speed:
yield dict(downloaded=f"{speed.group(1)}ps {size}")
if percent:
progress = int(percent.group(1).split(".")[0])
yield dict(completed=progress) if progress < 100 else dict(downloaded="Merging")
if warn.search(output):
console.log(f"{track_type} " + warn.search(output).group(1))
p.wait()
if p.returncode != 0:
if error.search(output):
raise ValueError(f"[N_m3u8DL-RE]: {error.search(output).group(1)}")
raise subprocess.CalledProcessError(p.returncode, arguments)
except ConnectionResetError:
# interrupted while passing URI to download
raise KeyboardInterrupt()
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILED")
raise
def n_m3u8dl_re(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
track: object,
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
content_keys: Optional[dict[str, Any]] = None,
) -> Generator[dict[str, Any], None, None]:
"""
Download files using N_m3u8DL-RE.
https://github.com/nilaoda/N_m3u8DL-RE
Yields the following download status updates while chunks are downloading:
- {total: 100} (100% download total)
- {completed: 1} (1% download progress out of 100%)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
The data is in the same format accepted by rich's progress.update() function.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
track: The track to download. Used to get track attributes for the selection
process. Note that Track.Descriptor.URL is not supported by N_m3u8DL-RE.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for the download.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for the download.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)). Can be set in config with --thread-count option.
content_keys: The content keys to use for decryption.
"""
track_type = track.__class__.__name__
log = logging.getLogger("N_m3u8DL-RE")
if proxy and not config.n_m3u8dl_re.get("use_proxy", True):
log.warning(f"{track_type}: Ignoring proxy as N_m3u8DL-RE is set to use_proxy=False")
proxy = None
yield from download(urls, track, output_dir, filename, headers, cookies, proxy, max_workers, content_keys)
__all__ = ("n_m3u8dl_re",)

View File

@@ -0,0 +1,266 @@
import math
import os
import time
from concurrent.futures import as_completed
from concurrent.futures.thread import ThreadPoolExecutor
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any, Generator, MutableMapping, Optional, Union
from requests import Session
from requests.adapters import HTTPAdapter
from rich import filesize
from unshackle.core.constants import DOWNLOAD_CANCELLED
from unshackle.core.utilities import get_extension
MAX_ATTEMPTS = 5
RETRY_WAIT = 2
CHUNK_SIZE = 1024
PROGRESS_WINDOW = 5
DOWNLOAD_SIZES = []
LAST_SPEED_REFRESH = time.time()
def download(
url: str, save_path: Path, session: Optional[Session] = None, segmented: bool = False, **kwargs: Any
) -> Generator[dict[str, Any], None, None]:
"""
Download a file using Python Requests.
https://requests.readthedocs.io
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function. The
`downloaded` key is custom and is not natively accepted by all rich progress bars.
Parameters:
url: Web URL of a file to download.
save_path: The path to save the file to. If the save path's directory does not
exist then it will be made automatically.
session: The Requests Session to make HTTP requests with. Useful to set Header,
Cookie, and Proxy data. Connections are saved and re-used with the session
so long as the server keeps the connection alive.
segmented: If downloads are segments or parts of one bigger file.
kwargs: Any extra keyword arguments to pass to the session.get() call. Use this
for one-time request changes like a header, cookie, or proxy. For example,
to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`.
"""
global LAST_SPEED_REFRESH
session = session or Session()
save_dir = save_path.parent
control_file = save_path.with_name(f"{save_path.name}.!dev")
save_dir.mkdir(parents=True, exist_ok=True)
if control_file.exists():
# consider the file corrupt if the control file exists
save_path.unlink(missing_ok=True)
control_file.unlink()
elif save_path.exists():
# if it exists, and no control file, then it should be safe
yield dict(file_downloaded=save_path, written=save_path.stat().st_size)
# TODO: This should return, potential recovery bug
# TODO: Design a control file format so we know how much of the file is missing
control_file.write_bytes(b"")
attempts = 1
try:
while True:
written = 0
# these are for single-url speed calcs only
download_sizes = []
last_speed_refresh = time.time()
try:
stream = session.get(url, stream=True, **kwargs)
stream.raise_for_status()
if not segmented:
try:
content_length = int(stream.headers.get("Content-Length", "0"))
except ValueError:
content_length = 0
if content_length > 0:
yield dict(total=math.ceil(content_length / CHUNK_SIZE))
else:
# we have no data to calculate total chunks
yield dict(total=None) # indeterminate mode
with open(save_path, "wb") as f:
for chunk in stream.iter_content(chunk_size=CHUNK_SIZE):
download_size = len(chunk)
f.write(chunk)
written += download_size
if not segmented:
yield dict(advance=1)
now = time.time()
time_since = now - last_speed_refresh
download_sizes.append(download_size)
if time_since > PROGRESS_WINDOW or download_size < CHUNK_SIZE:
data_size = sum(download_sizes)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
last_speed_refresh = now
download_sizes.clear()
if content_length and written < content_length:
raise IOError(f"Failed to read {content_length} bytes from the track URI.")
yield dict(file_downloaded=save_path, written=written)
if segmented:
yield dict(advance=1)
now = time.time()
time_since = now - LAST_SPEED_REFRESH
if written: # no size == skipped dl
DOWNLOAD_SIZES.append(written)
if DOWNLOAD_SIZES and time_since > PROGRESS_WINDOW:
data_size = sum(DOWNLOAD_SIZES)
download_speed = math.ceil(data_size / (time_since or 1))
yield dict(downloaded=f"{filesize.decimal(download_speed)}/s")
LAST_SPEED_REFRESH = now
DOWNLOAD_SIZES.clear()
break
except Exception as e:
save_path.unlink(missing_ok=True)
if DOWNLOAD_CANCELLED.is_set() or attempts == MAX_ATTEMPTS:
raise e
time.sleep(RETRY_WAIT)
attempts += 1
finally:
control_file.unlink()
def requests(
urls: Union[str, list[str], dict[str, Any], list[dict[str, Any]]],
output_dir: Path,
filename: str,
headers: Optional[MutableMapping[str, Union[str, bytes]]] = None,
cookies: Optional[Union[MutableMapping[str, str], CookieJar]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
) -> Generator[dict[str, Any], None, None]:
"""
Download a file using Python Requests.
https://requests.readthedocs.io
Yields the following download status updates while chunks are downloading:
- {total: 123} (there are 123 chunks to download)
- {total: None} (there are an unknown number of chunks to download)
- {advance: 1} (one chunk was downloaded)
- {downloaded: "10.1 MB/s"} (currently downloading at a rate of 10.1 MB/s)
- {file_downloaded: Path(...), written: 1024} (download finished, has the save path and size)
The data is in the same format accepted by rich's progress.update() function.
However, The `downloaded`, `file_downloaded` and `written` keys are custom and not
natively accepted by rich progress bars.
Parameters:
urls: Web URL(s) to file(s) to download. You can use a dictionary with the key
"url" for the URI, and other keys for extra arguments to use per-URL.
output_dir: The folder to save the file into. If the save path's directory does
not exist then it will be made automatically.
filename: The filename or filename template to use for each file. The variables
you can use are `i` for the URL index and `ext` for the URL extension.
headers: A mapping of HTTP Header Key/Values to use for all downloads.
cookies: A mapping of Cookie Key/Values or a Cookie Jar to use for all downloads.
proxy: An optional proxy URI to route connections through for all downloads.
max_workers: The maximum amount of threads to use for downloads. Defaults to
min(32,(cpu_count+4)).
"""
if not urls:
raise ValueError("urls must be provided and not empty")
elif not isinstance(urls, (str, dict, list)):
raise TypeError(f"Expected urls to be {str} or {dict} or a list of one of them, not {type(urls)}")
if not output_dir:
raise ValueError("output_dir must be provided")
elif not isinstance(output_dir, Path):
raise TypeError(f"Expected output_dir to be {Path}, not {type(output_dir)}")
if not filename:
raise ValueError("filename must be provided")
elif not isinstance(filename, str):
raise TypeError(f"Expected filename to be {str}, not {type(filename)}")
if not isinstance(headers, (MutableMapping, type(None))):
raise TypeError(f"Expected headers to be {MutableMapping}, not {type(headers)}")
if not isinstance(cookies, (MutableMapping, CookieJar, type(None))):
raise TypeError(f"Expected cookies to be {MutableMapping} or {CookieJar}, not {type(cookies)}")
if not isinstance(proxy, (str, type(None))):
raise TypeError(f"Expected proxy to be {str}, not {type(proxy)}")
if not isinstance(max_workers, (int, type(None))):
raise TypeError(f"Expected max_workers to be {int}, not {type(max_workers)}")
if not isinstance(urls, list):
urls = [urls]
if not max_workers:
max_workers = min(32, (os.cpu_count() or 1) + 4)
urls = [
dict(save_path=save_path, **url) if isinstance(url, dict) else dict(url=url, save_path=save_path)
for i, url in enumerate(urls)
for save_path in [
output_dir / filename.format(i=i, ext=get_extension(url["url"] if isinstance(url, dict) else url))
]
]
session = Session()
session.mount("https://", HTTPAdapter(pool_connections=max_workers, pool_maxsize=max_workers, pool_block=True))
session.mount("http://", session.adapters["https://"])
if headers:
headers = {k: v for k, v in headers.items() if k.lower() != "accept-encoding"}
session.headers.update(headers)
if cookies:
session.cookies.update(cookies)
if proxy:
session.proxies.update({"all": proxy})
yield dict(total=len(urls))
try:
with ThreadPoolExecutor(max_workers=max_workers) as pool:
for future in as_completed(pool.submit(download, session=session, segmented=False, **url) for url in urls):
try:
yield from future.result()
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[yellow]CANCELLING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[yellow]CANCELLED")
# tell dl that it was cancelled
# the pool is already shut down, so exiting loop is fine
raise
except Exception:
DOWNLOAD_CANCELLED.set() # skip pending track downloads
yield dict(downloaded="[red]FAILING")
pool.shutdown(wait=True, cancel_futures=True)
yield dict(downloaded="[red]FAILED")
# tell dl that it failed
# the pool is already shut down, so exiting loop is fine
raise
finally:
DOWNLOAD_SIZES.clear()
__all__ = ("requests",)

View File

@@ -0,0 +1,10 @@
from typing import Union
from unshackle.core.drm.clearkey import ClearKey
from unshackle.core.drm.playready import PlayReady
from unshackle.core.drm.widevine import Widevine
DRM_T = Union[ClearKey, Widevine, PlayReady]
__all__ = ("ClearKey", "Widevine", "PlayReady", "DRM_T")

View File

@@ -0,0 +1,111 @@
from __future__ import annotations
import base64
import shutil
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urljoin
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import unpad
from m3u8.model import Key
from requests import Session
class ClearKey:
"""AES Clear Key DRM System."""
def __init__(self, key: Union[bytes, str], iv: Optional[Union[bytes, str]] = None):
"""
Generally IV should be provided where possible. If not provided, it will be
set to \x00 of the same bit-size of the key.
"""
if isinstance(key, str):
key = bytes.fromhex(key.replace("0x", ""))
if not isinstance(key, bytes):
raise ValueError(f"Expected AES Key to be bytes, not {key!r}")
if not iv:
iv = b"\x00"
if isinstance(iv, str):
iv = bytes.fromhex(iv.replace("0x", ""))
if not isinstance(iv, bytes):
raise ValueError(f"Expected IV to be bytes, not {iv!r}")
if len(iv) < len(key):
iv = iv * (len(key) - len(iv) + 1)
self.key: bytes = key
self.iv: bytes = iv
def decrypt(self, path: Path) -> None:
"""Decrypt a Track with AES Clear Key DRM."""
if not path or not path.exists():
raise ValueError("Tried to decrypt a file that does not exist.")
decrypted = AES.new(self.key, AES.MODE_CBC, self.iv).decrypt(path.read_bytes())
try:
decrypted = unpad(decrypted, AES.block_size)
except ValueError:
# the decrypted data is likely already in the block size boundary
pass
decrypted_path = path.with_suffix(f".decrypted{path.suffix}")
decrypted_path.write_bytes(decrypted)
path.unlink()
shutil.move(decrypted_path, path)
@classmethod
def from_m3u_key(cls, m3u_key: Key, session: Optional[Session] = None) -> ClearKey:
"""
Load a ClearKey from an M3U(8) Playlist's EXT-X-KEY.
Parameters:
m3u_key: A Key object parsed from a m3u(8) playlist using
the `m3u8` library.
session: Optional session used to request external URIs with.
Useful to set headers, proxies, cookies, and so forth.
"""
if not isinstance(m3u_key, Key):
raise ValueError(f"Provided M3U Key is in an unexpected type {m3u_key!r}")
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not a {type(session)}")
if not m3u_key.method.startswith("AES"):
raise ValueError(f"Provided M3U Key is not an AES Clear Key, {m3u_key.method}")
if not m3u_key.uri:
raise ValueError("No URI in M3U Key, unable to get Key.")
if not session:
session = Session()
if not session.headers.get("User-Agent"):
# commonly needed default for HLS playlists
session.headers["User-Agent"] = "smartexoplayer/1.1.0 (Linux;Android 8.0.0) ExoPlayerLib/2.13.3"
if m3u_key.uri.startswith("data:"):
media_types, data = m3u_key.uri[5:].split(",")
media_types = media_types.split(";")
if "base64" in media_types:
data = base64.b64decode(data)
key = data
else:
url = urljoin(m3u_key.base_uri, m3u_key.uri)
res = session.get(url)
res.raise_for_status()
if not res.content:
raise EOFError("Unexpected Empty Response by M3U Key URI.")
if len(res.content) < 16:
raise EOFError(f"Unexpected Length of Key ({len(res.content)} bytes) in M3U Key.")
key = res.content
if m3u_key.iv:
iv = bytes.fromhex(m3u_key.iv.replace("0x", ""))
else:
iv = None
return cls(key=key, iv=iv)
__all__ = ("ClearKey",)

View File

@@ -0,0 +1,281 @@
from __future__ import annotations
import base64
import shutil
import subprocess
import textwrap
from pathlib import Path
from typing import Any, Callable, Optional, Union
from uuid import UUID
import m3u8
from construct import Container
from pymp4.parser import Box
from pyplayready.cdm import Cdm as PlayReadyCdm
from pyplayready.system.pssh import PSSH
from requests import Session
from rich.text import Text
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import AnyTrack
from unshackle.core.utilities import get_boxes
from unshackle.core.utils.subprocess import ffprobe
class PlayReady:
"""PlayReady DRM System."""
def __init__(
self,
pssh: PSSH,
kid: Union[UUID, str, bytes, None] = None,
pssh_b64: Optional[str] = None,
**kwargs: Any,
):
if not pssh:
raise ValueError("Provided PSSH is empty.")
if not isinstance(pssh, PSSH):
raise TypeError(f"Expected pssh to be a {PSSH}, not {pssh!r}")
kids: list[UUID] = []
for header in pssh.wrm_headers:
try:
signed_ids, _, _, _ = header.read_attributes()
except Exception:
continue
for signed_id in signed_ids:
try:
kids.append(UUID(bytes_le=base64.b64decode(signed_id.value)))
except Exception:
continue
if kid:
if isinstance(kid, str):
kid = UUID(hex=kid)
elif isinstance(kid, bytes):
kid = UUID(bytes=kid)
if not isinstance(kid, UUID):
raise ValueError(f"Expected kid to be a {UUID}, str, or bytes, not {kid!r}")
if kid not in kids:
kids.append(kid)
self._pssh = pssh
self._kids = kids
if not self.kids:
raise PlayReady.Exceptions.KIDNotFound("No Key ID was found within PSSH and none were provided.")
self.content_keys: dict[UUID, str] = {}
self.data: dict = kwargs or {}
if pssh_b64:
self.data.setdefault("pssh_b64", pssh_b64)
@classmethod
def from_track(cls, track: AnyTrack, session: Optional[Session] = None) -> PlayReady:
if not session:
session = Session()
session.headers.update(config.headers)
kid: Optional[UUID] = None
pssh_boxes: list[Container] = []
tenc_boxes: list[Container] = []
if track.descriptor == track.Descriptor.HLS:
m3u_url = track.url
master = m3u8.loads(session.get(m3u_url).text, uri=m3u_url)
pssh_boxes.extend(
Box.parse(base64.b64decode(x.uri.split(",")[-1]))
for x in (master.session_keys or master.keys)
if x and x.keyformat and "playready" in x.keyformat.lower()
)
init_data = track.get_init_segment(session=session)
if init_data:
probe = ffprobe(init_data)
if probe:
for stream in probe.get("streams") or []:
enc_key_id = stream.get("tags", {}).get("enc_key_id")
if enc_key_id:
kid = UUID(bytes=base64.b64decode(enc_key_id))
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
if not pssh:
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
tenc = next(iter(tenc_boxes), None)
if not kid and tenc and tenc.key_ID.int != 0:
kid = tenc.key_ID
pssh_bytes = Box.build(pssh)
return cls(pssh=PSSH(pssh_bytes), kid=kid, pssh_b64=base64.b64encode(pssh_bytes).decode())
@classmethod
def from_init_data(cls, init_data: bytes) -> PlayReady:
if not init_data:
raise ValueError("Init data should be provided.")
if not isinstance(init_data, bytes):
raise TypeError(f"Expected init data to be bytes, not {init_data!r}")
kid: Optional[UUID] = None
pssh_boxes: list[Container] = list(get_boxes(init_data, b"pssh"))
tenc_boxes: list[Container] = list(get_boxes(init_data, b"tenc"))
probe = ffprobe(init_data)
if probe:
for stream in probe.get("streams") or []:
enc_key_id = stream.get("tags", {}).get("enc_key_id")
if enc_key_id:
kid = UUID(bytes=base64.b64decode(enc_key_id))
pssh = next((b for b in pssh_boxes if b.system_ID == PSSH.SYSTEM_ID.bytes), None)
if not pssh:
raise PlayReady.Exceptions.PSSHNotFound("PSSH was not found in track data.")
tenc = next(iter(tenc_boxes), None)
if not kid and tenc and tenc.key_ID.int != 0:
kid = tenc.key_ID
pssh_bytes = Box.build(pssh)
return cls(pssh=PSSH(pssh_bytes), kid=kid, pssh_b64=base64.b64encode(pssh_bytes).decode())
@property
def pssh(self) -> PSSH:
return self._pssh
@property
def pssh_b64(self) -> Optional[str]:
return self.data.get("pssh_b64")
@property
def kid(self) -> Optional[UUID]:
return next(iter(self.kids), None)
@property
def kids(self) -> list[UUID]:
return self._kids
def get_content_keys(self, cdm: PlayReadyCdm, certificate: Callable, licence: Callable) -> None:
for kid in self.kids:
if kid in self.content_keys:
continue
session_id = cdm.open()
try:
challenge = cdm.get_license_challenge(session_id, self.pssh.wrm_headers[0])
license_res = licence(challenge=challenge)
if isinstance(license_res, bytes):
license_str = license_res.decode(errors="ignore")
else:
license_str = str(license_res)
if "<License>" not in license_str:
try:
license_str = base64.b64decode(license_str + "===").decode()
except Exception:
pass
cdm.parse_license(session_id, license_str)
keys = {key.key_id: key.key.hex() for key in cdm.get_keys(session_id)}
self.content_keys.update(keys)
finally:
cdm.close(session_id)
if not self.content_keys:
raise PlayReady.Exceptions.EmptyLicense("No Content Keys were within the License")
def decrypt(self, path: Path) -> None:
if not self.content_keys:
raise ValueError("Cannot decrypt a Track without any Content Keys...")
if not binaries.ShakaPackager:
raise EnvironmentError("Shaka Packager executable not found but is required.")
if not path or not path.exists():
raise ValueError("Tried to decrypt a file that does not exist.")
output_path = path.with_stem(f"{path.stem}_decrypted")
config.directories.temp.mkdir(parents=True, exist_ok=True)
try:
arguments = [
f"input={path},stream=0,output={output_path},output_format=MP4",
"--enable_raw_key_decryption",
"--keys",
",".join(
[
*[
f"label={i}:key_id={kid.hex}:key={key.lower()}"
for i, (kid, key) in enumerate(self.content_keys.items())
],
*[
f"label={i}:key_id={'00' * 16}:key={key.lower()}"
for i, (kid, key) in enumerate(self.content_keys.items(), len(self.content_keys))
],
]
),
"--temp_dir",
config.directories.temp,
]
p = subprocess.Popen(
[binaries.ShakaPackager, *arguments],
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stream_skipped = False
had_error = False
shaka_log_buffer = ""
for line in iter(p.stderr.readline, ""):
line = line.strip()
if not line:
continue
if "Skip stream" in line:
stream_skipped = True
if ":INFO:" in line:
continue
if "I0" in line or "W0" in line:
continue
if ":ERROR:" in line:
had_error = True
if "Insufficient bits in bitstream for given AVC profile" in line:
continue
shaka_log_buffer += f"{line.strip()}\n"
if shaka_log_buffer:
shaka_log_buffer = "\n ".join(
textwrap.wrap(shaka_log_buffer.rstrip(), width=console.width - 22, initial_indent="")
)
console.log(Text.from_ansi("\n[PlayReady]: " + shaka_log_buffer))
p.wait()
if p.returncode != 0 or had_error:
raise subprocess.CalledProcessError(p.returncode, arguments)
path.unlink()
if not stream_skipped:
shutil.move(output_path, path)
except subprocess.CalledProcessError as e:
if e.returncode == 0xC000013A:
raise KeyboardInterrupt()
raise
class Exceptions:
class PSSHNotFound(Exception):
pass
class KIDNotFound(Exception):
pass
class CEKNotFound(Exception):
pass
class EmptyLicense(Exception):
pass
__all__ = ("PlayReady",)

View File

@@ -0,0 +1,334 @@
from __future__ import annotations
import base64
import shutil
import subprocess
import textwrap
from pathlib import Path
from typing import Any, Callable, Optional, Union
from uuid import UUID
import m3u8
from construct import Container
from pymp4.parser import Box
from pywidevine.cdm import Cdm as WidevineCdm
from pywidevine.pssh import PSSH
from requests import Session
from rich.text import Text
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import AnyTrack
from unshackle.core.utilities import get_boxes
from unshackle.core.utils.subprocess import ffprobe
class Widevine:
"""Widevine DRM System."""
def __init__(self, pssh: PSSH, kid: Union[UUID, str, bytes, None] = None, **kwargs: Any):
if not pssh:
raise ValueError("Provided PSSH is empty.")
if not isinstance(pssh, PSSH):
raise TypeError(f"Expected pssh to be a {PSSH}, not {pssh!r}")
if pssh.system_id == PSSH.SystemId.PlayReady:
pssh.to_widevine()
if kid:
if isinstance(kid, str):
kid = UUID(hex=kid)
elif isinstance(kid, bytes):
kid = UUID(bytes=kid)
if not isinstance(kid, UUID):
raise ValueError(f"Expected kid to be a {UUID}, str, or bytes, not {kid!r}")
pssh.set_key_ids([kid])
self._pssh = pssh
if not self.kids:
raise Widevine.Exceptions.KIDNotFound("No Key ID was found within PSSH and none were provided.")
self.content_keys: dict[UUID, str] = {}
self.data: dict = kwargs or {}
@classmethod
def from_track(cls, track: AnyTrack, session: Optional[Session] = None) -> Widevine:
"""
Get PSSH and KID from within the Initiation Segment of the Track Data.
It also tries to get PSSH and KID from other track data like M3U8 data
as well as through ffprobe.
Create a Widevine DRM System object from a track's information.
This should only be used if a PSSH could not be provided directly.
It is *rare* to need to use this.
You may provide your own requests session to be able to use custom
headers and more.
Raises:
PSSHNotFound - If the PSSH was not found within the data.
KIDNotFound - If the KID was not found within the data or PSSH.
"""
if not session:
session = Session()
session.headers.update(config.headers)
kid: Optional[UUID] = None
pssh_boxes: list[Container] = []
tenc_boxes: list[Container] = []
if track.descriptor == track.Descriptor.HLS:
m3u_url = track.url
master = m3u8.loads(session.get(m3u_url).text, uri=m3u_url)
pssh_boxes.extend(
Box.parse(base64.b64decode(x.uri.split(",")[-1]))
for x in (master.session_keys or master.keys)
if x and x.keyformat and x.keyformat.lower() == WidevineCdm.urn
)
init_data = track.get_init_segment(session=session)
if init_data:
# try get via ffprobe, needed for non mp4 data e.g. WEBM from Google Play
probe = ffprobe(init_data)
if probe:
for stream in probe.get("streams") or []:
enc_key_id = stream.get("tags", {}).get("enc_key_id")
if enc_key_id:
kid = UUID(bytes=base64.b64decode(enc_key_id))
pssh_boxes.extend(list(get_boxes(init_data, b"pssh")))
tenc_boxes.extend(list(get_boxes(init_data, b"tenc")))
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
pssh = next(iter(pssh_boxes), None)
if not pssh:
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
tenc = next(iter(tenc_boxes), None)
if not kid and tenc and tenc.key_ID.int != 0:
kid = tenc.key_ID
return cls(pssh=PSSH(pssh), kid=kid)
@classmethod
def from_init_data(cls, init_data: bytes) -> Widevine:
"""
Get PSSH and KID from within Initialization Segment Data.
This should only be used if a PSSH could not be provided directly.
It is *rare* to need to use this.
Raises:
PSSHNotFound - If the PSSH was not found within the data.
KIDNotFound - If the KID was not found within the data or PSSH.
"""
if not init_data:
raise ValueError("Init data should be provided.")
if not isinstance(init_data, bytes):
raise TypeError(f"Expected init data to be bytes, not {init_data!r}")
kid: Optional[UUID] = None
pssh_boxes: list[Container] = list(get_boxes(init_data, b"pssh"))
tenc_boxes: list[Container] = list(get_boxes(init_data, b"tenc"))
# try get via ffprobe, needed for non mp4 data e.g. WEBM from Google Play
probe = ffprobe(init_data)
if probe:
for stream in probe.get("streams") or []:
enc_key_id = stream.get("tags", {}).get("enc_key_id")
if enc_key_id:
kid = UUID(bytes=base64.b64decode(enc_key_id))
pssh_boxes.sort(key=lambda b: {PSSH.SystemId.Widevine: 0, PSSH.SystemId.PlayReady: 1}[b.system_ID])
pssh = next(iter(pssh_boxes), None)
if not pssh:
raise Widevine.Exceptions.PSSHNotFound("PSSH was not found in track data.")
tenc = next(iter(tenc_boxes), None)
if not kid and tenc and tenc.key_ID.int != 0:
kid = tenc.key_ID
return cls(pssh=PSSH(pssh), kid=kid)
@property
def pssh(self) -> PSSH:
"""Get Protection System Specific Header Box."""
return self._pssh
@property
def kid(self) -> Optional[UUID]:
"""Get first Key ID, if any."""
return next(iter(self.kids), None)
@property
def kids(self) -> list[UUID]:
"""Get all Key IDs."""
return self._pssh.key_ids
def get_content_keys(self, cdm: WidevineCdm, certificate: Callable, licence: Callable) -> None:
"""
Create a CDM Session and obtain Content Keys for this DRM Instance.
The certificate and license params are expected to be a function and will
be provided with the challenge and session ID.
"""
for kid in self.kids:
if kid in self.content_keys:
continue
session_id = cdm.open()
try:
cert = certificate(challenge=cdm.service_certificate_challenge)
if cert and hasattr(cdm, "set_service_certificate"):
cdm.set_service_certificate(session_id, cert)
cdm.parse_license(session_id, licence(challenge=cdm.get_license_challenge(session_id, self.pssh)))
self.content_keys = {key.kid: key.key.hex() for key in cdm.get_keys(session_id, "CONTENT")}
if not self.content_keys:
raise Widevine.Exceptions.EmptyLicense("No Content Keys were within the License")
if kid not in self.content_keys:
raise Widevine.Exceptions.CEKNotFound(f"No Content Key for KID {kid.hex} within the License")
finally:
cdm.close(session_id)
def get_NF_content_keys(self, cdm: WidevineCdm, certificate: Callable, licence: Callable) -> None:
"""
Create a CDM Session and obtain Content Keys for this DRM Instance.
The certificate and license params are expected to be a function and will
be provided with the challenge and session ID.
"""
for kid in self.kids:
if kid in self.content_keys:
continue
session_id = cdm.open()
try:
cert = certificate(challenge=cdm.service_certificate_challenge)
if cert and hasattr(cdm, "set_service_certificate"):
cdm.set_service_certificate(session_id, cert)
cdm.parse_license(
session_id,
licence(session_id=session_id, challenge=cdm.get_license_challenge(session_id, self.pssh)),
)
self.content_keys = {key.kid: key.key.hex() for key in cdm.get_keys(session_id, "CONTENT")}
if not self.content_keys:
raise Widevine.Exceptions.EmptyLicense("No Content Keys were within the License")
if kid not in self.content_keys:
raise Widevine.Exceptions.CEKNotFound(f"No Content Key for KID {kid.hex} within the License")
finally:
cdm.close(session_id)
def decrypt(self, path: Path) -> None:
"""
Decrypt a Track with Widevine DRM.
Raises:
EnvironmentError if the Shaka Packager executable could not be found.
ValueError if the track has not yet been downloaded.
SubprocessError if Shaka Packager returned a non-zero exit code.
"""
if not self.content_keys:
raise ValueError("Cannot decrypt a Track without any Content Keys...")
if not binaries.ShakaPackager:
raise EnvironmentError("Shaka Packager executable not found but is required.")
if not path or not path.exists():
raise ValueError("Tried to decrypt a file that does not exist.")
output_path = path.with_stem(f"{path.stem}_decrypted")
config.directories.temp.mkdir(parents=True, exist_ok=True)
try:
arguments = [
f"input={path},stream=0,output={output_path},output_format=MP4",
"--enable_raw_key_decryption",
"--keys",
",".join(
[
*[
"label={}:key_id={}:key={}".format(i, kid.hex, key.lower())
for i, (kid, key) in enumerate(self.content_keys.items())
],
*[
# some services use a blank KID on the file, but real KID for license server
"label={}:key_id={}:key={}".format(i, "00" * 16, key.lower())
for i, (kid, key) in enumerate(self.content_keys.items(), len(self.content_keys))
],
]
),
"--temp_dir",
config.directories.temp,
]
p = subprocess.Popen(
[binaries.ShakaPackager, *arguments],
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stream_skipped = False
had_error = False
shaka_log_buffer = ""
for line in iter(p.stderr.readline, ""):
line = line.strip()
if not line:
continue
if "Skip stream" in line:
# file/segment was so small that it didn't have any actual data, ignore
stream_skipped = True
if ":INFO:" in line:
continue
if "I0" in line or "W0" in line:
continue
if ":ERROR:" in line:
had_error = True
if "Insufficient bits in bitstream for given AVC profile" in line:
# this is a warning and is something we don't have to worry about
continue
shaka_log_buffer += f"{line.strip()}\n"
if shaka_log_buffer:
# wrap to console width - padding - '[Widevine]: '
shaka_log_buffer = "\n ".join(
textwrap.wrap(shaka_log_buffer.rstrip(), width=console.width - 22, initial_indent="")
)
console.log(Text.from_ansi("\n[Widevine]: " + shaka_log_buffer))
p.wait()
if p.returncode != 0 or had_error:
raise subprocess.CalledProcessError(p.returncode, arguments)
path.unlink()
if not stream_skipped:
shutil.move(output_path, path)
except subprocess.CalledProcessError as e:
if e.returncode == 0xC000013A: # STATUS_CONTROL_C_EXIT
raise KeyboardInterrupt()
raise
class Exceptions:
class PSSHNotFound(Exception):
"""PSSH (Protection System Specific Header) was not found."""
class KIDNotFound(Exception):
"""KID (Encryption Key ID) was not found."""
class CEKNotFound(Exception):
"""CEK (Content Encryption Key) for KID was not found in License."""
class EmptyLicense(Exception):
"""License returned no Content Encryption Keys."""
__all__ = ("Widevine",)

76
unshackle/core/events.py Normal file
View File

@@ -0,0 +1,76 @@
from __future__ import annotations
from copy import deepcopy
from enum import Enum
from typing import Any, Callable
class Events:
class Types(Enum):
_reserved = 0
# A Track's segment has finished downloading
SEGMENT_DOWNLOADED = 1
# Track has finished downloading
TRACK_DOWNLOADED = 2
# Track has finished decrypting
TRACK_DECRYPTED = 3
# Track has finished repacking
TRACK_REPACKED = 4
# Track is about to be Multiplexed into a Container
TRACK_MULTIPLEX = 5
def __init__(self):
self.__subscriptions: dict[Events.Types, list[Callable]] = {}
self.__ephemeral: dict[Events.Types, list[Callable]] = {}
self.reset()
def reset(self):
"""Reset Event Observer clearing all Subscriptions."""
self.__subscriptions = {k: [] for k in Events.Types.__members__.values()}
self.__ephemeral = deepcopy(self.__subscriptions)
def subscribe(self, event_type: Events.Types, callback: Callable, ephemeral: bool = False) -> None:
"""
Subscribe to an Event with a Callback.
Parameters:
event_type: The Events.Type to subscribe to.
callback: The function or lambda to call on event emit.
ephemeral: Unsubscribe the callback from the event on first emit.
Note that this is not thread-safe and may be called multiple
times at roughly the same time.
"""
[self.__subscriptions, self.__ephemeral][ephemeral][event_type].append(callback)
def unsubscribe(self, event_type: Events.Types, callback: Callable) -> None:
"""
Unsubscribe a Callback from an Event.
Parameters:
event_type: The Events.Type to unsubscribe from.
callback: The function or lambda to remove from event emit.
"""
if callback in self.__subscriptions[event_type]:
self.__subscriptions[event_type].remove(callback)
if callback in self.__ephemeral[event_type]:
self.__ephemeral[event_type].remove(callback)
def emit(self, event_type: Events.Types, *args: Any, **kwargs: Any) -> None:
"""
Emit an Event, executing all subscribed Callbacks.
Parameters:
event_type: The Events.Type to emit.
args: Positional arguments to pass to callbacks.
kwargs: Keyword arguments to pass to callbacks.
"""
if event_type not in self.__subscriptions:
raise ValueError(f'Event type "{event_type}" is invalid')
for callback in self.__subscriptions[event_type] + self.__ephemeral[event_type]:
callback(*args, **kwargs)
self.__ephemeral[event_type].clear()
events = Events()

View File

@@ -0,0 +1,5 @@
from .dash import DASH
from .hls import HLS
from .ism import ISM
__all__ = ("DASH", "HLS", "ISM")

View File

@@ -0,0 +1,800 @@
from __future__ import annotations
import base64
import html
import logging
import math
import re
import sys
from copy import copy
from functools import partial
from pathlib import Path
from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin, urlparse
from uuid import UUID
from zlib import crc32
import requests
from langcodes import Language, tag_is_valid
from lxml.etree import Element, ElementTree
from pyplayready.system.pssh import PSSH as PR_PSSH
from pywidevine.cdm import Cdm as WidevineCdm
from pywidevine.pssh import PSSH
from requests import Session
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
from unshackle.core.downloaders import requests as requests_downloader
from unshackle.core.drm import DRM_T, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
from unshackle.core.utilities import is_close_match, try_ensure_utf8
from unshackle.core.utils.xml import load_xml
class DASH:
def __init__(self, manifest, url: str):
if manifest is None:
raise ValueError("DASH manifest must be provided.")
if manifest.tag != "MPD":
raise TypeError(f"Expected 'MPD' document, but received a '{manifest.tag}' document instead.")
if not url:
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
if not isinstance(url, str):
raise TypeError(f"Expected url to be a {str}, not {url!r}")
self.manifest = manifest
self.url = url
@classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **args: Any) -> DASH:
if not url:
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
if not isinstance(url, str):
raise TypeError(f"Expected url to be a {str}, not {url!r}")
if not session:
session = Session()
elif not isinstance(session, Session):
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
res = session.get(url, **args)
if res.url != url:
url = res.url
if not res.ok:
raise requests.ConnectionError("Failed to request the MPD document.", response=res)
return DASH.from_text(res.text, url)
@classmethod
def from_text(cls, text: str, url: str) -> DASH:
if not text:
raise ValueError("DASH manifest Text must be provided.")
if not isinstance(text, str):
raise TypeError(f"Expected text to be a {str}, not {text!r}")
if not url:
raise requests.URLRequired("DASH manifest URL must be provided for relative path computations.")
if not isinstance(url, str):
raise TypeError(f"Expected url to be a {str}, not {url!r}")
manifest = load_xml(text)
return cls(manifest, url)
def to_tracks(
self, language: Optional[Union[str, Language]] = None, period_filter: Optional[Callable] = None
) -> Tracks:
"""
Convert an MPEG-DASH document to Video, Audio and Subtitle Track objects.
Parameters:
language: The Title's Original Recorded Language. It will also be used as a fallback
track language value if the manifest does not list language information.
period_filter: Filter out period's within the manifest.
All Track URLs will be a list of segment URLs.
"""
tracks = Tracks()
for period in self.manifest.findall("Period"):
if callable(period_filter) and period_filter(period):
continue
if next(iter(period.xpath("SegmentType/@value")), "content") != "content":
continue
for adaptation_set in period.findall("AdaptationSet"):
if self.is_trick_mode(adaptation_set):
# we don't want trick mode streams (they are only used for fast-forward/rewind)
continue
for rep in adaptation_set.findall("Representation"):
get = partial(self._get, adaptation_set=adaptation_set, representation=rep)
findall = partial(self._findall, adaptation_set=adaptation_set, representation=rep, both=True)
segment_base = rep.find("SegmentBase")
codecs = get("codecs")
content_type = get("contentType")
mime_type = get("mimeType")
if not content_type and mime_type:
content_type = mime_type.split("/")[0]
if not content_type and not mime_type:
raise ValueError("Unable to determine the format of a Representation, cannot continue...")
if mime_type == "application/mp4" or content_type == "application":
# likely mp4-boxed subtitles
# TODO: It may not actually be subtitles
try:
real_codec = Subtitle.Codec.from_mime(codecs)
content_type = "text"
mime_type = f"application/mp4; codecs='{real_codec.value.lower()}'"
except ValueError:
raise ValueError(f"Unsupported content type '{content_type}' with codecs of '{codecs}'")
if content_type == "text" and mime_type and "/mp4" not in mime_type:
# mimeType likely specifies the subtitle codec better than `codecs`
codecs = mime_type.split("/")[1]
if content_type == "video":
track_type = Video
track_codec = Video.Codec.from_codecs(codecs)
track_fps = get("frameRate")
if not track_fps and segment_base is not None:
track_fps = segment_base.get("timescale")
track_args = dict(
range_=self.get_video_range(
codecs, findall("SupplementalProperty"), findall("EssentialProperty")
),
bitrate=get("bandwidth") or None,
width=get("width") or 0,
height=get("height") or 0,
fps=track_fps or None,
)
elif content_type == "audio":
track_type = Audio
track_codec = Audio.Codec.from_codecs(codecs)
track_args = dict(
bitrate=get("bandwidth") or None,
channels=next(
iter(
rep.xpath("AudioChannelConfiguration/@value")
or adaptation_set.xpath("AudioChannelConfiguration/@value")
),
None,
),
joc=self.get_ddp_complexity_index(adaptation_set, rep),
descriptive=self.is_descriptive(adaptation_set),
)
elif content_type == "text":
track_type = Subtitle
track_codec = Subtitle.Codec.from_codecs(codecs or "vtt")
track_args = dict(
cc=self.is_closed_caption(adaptation_set),
sdh=self.is_sdh(adaptation_set),
forced=self.is_forced(adaptation_set),
)
elif content_type == "image":
# we don't want what's likely thumbnails for the seekbar
continue
else:
raise ValueError(f"Unknown Track Type '{content_type}'")
track_lang = self.get_language(adaptation_set, rep, fallback=language)
if not track_lang:
msg = "Language information could not be derived from a Representation."
if language is None:
msg += " No fallback language was provided when calling DASH.to_tracks()."
elif not tag_is_valid((str(language) or "").strip()) or str(language).startswith("und"):
msg += f" The fallback language provided is also invalid: {language}"
raise ValueError(msg)
# for some reason it's incredibly common for services to not provide
# a good and actually unique track ID, sometimes because of the lang
# dialect not being represented in the id, or the bitrate, or such.
# this combines all of them as one and hashes it to keep it small(ish).
track_id = hex(
crc32(
"{codec}-{lang}-{bitrate}-{base_url}-{ids}-{track_args}".format(
codec=codecs,
lang=track_lang,
bitrate=get("bitrate"),
base_url=(rep.findtext("BaseURL") or "").split("?")[0],
ids=[get("audioTrackId"), get("id"), period.get("id")],
track_args=track_args,
).encode()
)
)[2:]
tracks.add(
track_type(
id_=track_id,
url=self.url,
codec=track_codec,
language=track_lang,
is_original_lang=bool(language and is_close_match(track_lang, [language])),
descriptor=Video.Descriptor.DASH,
data={
"dash": {
"manifest": self.manifest,
"period": period,
"adaptation_set": adaptation_set,
"representation": rep,
}
},
**track_args,
)
)
# only get tracks from the first main-content period
break
return tracks
@staticmethod
def download_track(
track: AnyTrack,
save_path: Path,
save_dir: Path,
progress: partial,
session: Optional[Session] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
license_widevine: Optional[Callable] = None,
*,
cdm: Optional[object] = None,
):
if not session:
session = Session()
elif not isinstance(session, Session):
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
if proxy:
session.proxies.update({"all": proxy})
log = logging.getLogger("DASH")
manifest: ElementTree = track.data["dash"]["manifest"]
period: Element = track.data["dash"]["period"]
adaptation_set: Element = track.data["dash"]["adaptation_set"]
representation: Element = track.data["dash"]["representation"]
# Preserve existing DRM if it was set by the service, especially when service set Widevine
# but manifest only contains PlayReady protection (common scenario for some services)
existing_drm = track.drm
manifest_drm = DASH.get_drm(
representation.findall("ContentProtection") + adaptation_set.findall("ContentProtection")
)
# Only override existing DRM if:
# 1. No existing DRM was set, OR
# 2. Existing DRM contains same type as manifest DRM, OR
# 3. Existing DRM is not Widevine (preserve Widevine when service explicitly set it)
should_override_drm = (
not existing_drm
or (
existing_drm
and manifest_drm
and any(isinstance(existing, type(manifest)) for existing in existing_drm for manifest in manifest_drm)
)
or (existing_drm and not any(isinstance(drm, Widevine) for drm in existing_drm))
)
if should_override_drm:
track.drm = manifest_drm
else:
track.drm = existing_drm
manifest_base_url = manifest.findtext("BaseURL")
if not manifest_base_url:
manifest_base_url = track.url
elif not re.match("^https?://", manifest_base_url, re.IGNORECASE):
manifest_base_url = urljoin(track.url, f"./{manifest_base_url}")
period_base_url = urljoin(manifest_base_url, period.findtext("BaseURL"))
rep_base_url = urljoin(period_base_url, representation.findtext("BaseURL"))
period_duration = period.get("duration") or manifest.get("mediaPresentationDuration")
init_data: Optional[bytes] = None
segment_template = representation.find("SegmentTemplate")
if segment_template is None:
segment_template = adaptation_set.find("SegmentTemplate")
segment_list = representation.find("SegmentList")
if segment_list is None:
segment_list = adaptation_set.find("SegmentList")
segment_base = representation.find("SegmentBase")
if segment_base is None:
segment_base = adaptation_set.find("SegmentBase")
segments: list[tuple[str, Optional[str]]] = []
segment_timescale: float = 0
segment_durations: list[int] = []
track_kid: Optional[UUID] = None
if segment_template is not None:
segment_template = copy(segment_template)
start_number = int(segment_template.get("startNumber") or 1)
end_number = int(segment_template.get("endNumber") or 0) or None
segment_timeline = segment_template.find("SegmentTimeline")
segment_timescale = float(segment_template.get("timescale") or 1)
for item in ("initialization", "media"):
value = segment_template.get(item)
if not value:
continue
if not re.match("^https?://", value, re.IGNORECASE):
if not rep_base_url:
raise ValueError("Resolved Segment URL is not absolute, and no Base URL is available.")
value = urljoin(rep_base_url, value)
if not urlparse(value).query:
manifest_url_query = urlparse(track.url).query
if manifest_url_query:
value += f"?{manifest_url_query}"
segment_template.set(item, value)
init_url = segment_template.get("initialization")
if init_url:
res = session.get(
DASH.replace_fields(
init_url, Bandwidth=representation.get("bandwidth"), RepresentationID=representation.get("id")
)
)
res.raise_for_status()
init_data = res.content
track_kid = track.get_key_id(init_data)
if segment_timeline is not None:
current_time = 0
for s in segment_timeline.findall("S"):
if s.get("t"):
current_time = int(s.get("t"))
for _ in range(1 + (int(s.get("r") or 0))):
segment_durations.append(current_time)
current_time += int(s.get("d"))
if not end_number:
end_number = len(segment_durations)
for t, n in zip(segment_durations, range(start_number, end_number + 1)):
segments.append(
(
DASH.replace_fields(
segment_template.get("media"),
Bandwidth=representation.get("bandwidth"),
Number=n,
RepresentationID=representation.get("id"),
Time=t,
),
None,
)
)
else:
if not period_duration:
raise ValueError("Duration of the Period was unable to be determined.")
period_duration = DASH.pt_to_sec(period_duration)
segment_duration = float(segment_template.get("duration")) or 1
if not end_number:
end_number = math.ceil(period_duration / (segment_duration / segment_timescale))
for s in range(start_number, end_number + 1):
segments.append(
(
DASH.replace_fields(
segment_template.get("media"),
Bandwidth=representation.get("bandwidth"),
Number=s,
RepresentationID=representation.get("id"),
Time=s,
),
None,
)
)
# TODO: Should we floor/ceil/round, or is int() ok?
segment_durations.append(int(segment_duration))
elif segment_list is not None:
segment_timescale = float(segment_list.get("timescale") or 1)
init_data = None
initialization = segment_list.find("Initialization")
if initialization is not None:
source_url = initialization.get("sourceURL")
if not source_url:
source_url = rep_base_url
elif not re.match("^https?://", source_url, re.IGNORECASE):
source_url = urljoin(rep_base_url, f"./{source_url}")
if initialization.get("range"):
init_range_header = {"Range": f"bytes={initialization.get('range')}"}
else:
init_range_header = None
res = session.get(url=source_url, headers=init_range_header)
res.raise_for_status()
init_data = res.content
track_kid = track.get_key_id(init_data)
segment_urls = segment_list.findall("SegmentURL")
for segment_url in segment_urls:
media_url = segment_url.get("media")
if not media_url:
media_url = rep_base_url
elif not re.match("^https?://", media_url, re.IGNORECASE):
media_url = urljoin(rep_base_url, f"./{media_url}")
segments.append((media_url, segment_url.get("mediaRange")))
segment_durations.append(int(segment_url.get("duration") or 1))
elif segment_base is not None:
media_range = None
init_data = None
initialization = segment_base.find("Initialization")
if initialization is not None:
if initialization.get("range"):
init_range_header = {"Range": f"bytes={initialization.get('range')}"}
else:
init_range_header = None
res = session.get(url=rep_base_url, headers=init_range_header)
res.raise_for_status()
init_data = res.content
track_kid = track.get_key_id(init_data)
total_size = res.headers.get("Content-Range", "").split("/")[-1]
if total_size:
media_range = f"{len(init_data)}-{total_size}"
segments.append((rep_base_url, media_range))
elif rep_base_url:
segments.append((rep_base_url, None))
else:
log.error("Could not find a way to get segments from this MPD manifest.")
log.debug(track.url)
sys.exit(1)
# TODO: Should we floor/ceil/round, or is int() ok?
track.data["dash"]["timescale"] = int(segment_timescale)
track.data["dash"]["segment_durations"] = segment_durations
if not track.drm and isinstance(track, (Video, Audio)):
try:
track.drm = [Widevine.from_init_data(init_data)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
if track.drm:
track_kid = track_kid or track.get_key_id(url=segments[0][0], session=session)
drm = track.get_drm_for_cdm(cdm)
if isinstance(drm, (Widevine, PlayReady)):
# license and grab content keys
try:
if not license_widevine:
raise ValueError("license_widevine func must be supplied to use DRM")
progress(downloaded="LICENSING")
license_widevine(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
else:
drm = None
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
return
progress(total=len(segments))
downloader = track.downloader
if downloader.__name__ == "aria2c" and any(bytes_range is not None for url, bytes_range in segments):
# aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
downloader = requests_downloader
log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")
downloader_args = dict(
urls=[
{"url": url, "headers": {"Range": f"bytes={bytes_range}"} if bytes_range else {}}
for url, bytes_range in segments
],
output_dir=save_dir,
filename="{i:0%d}.mp4" % (len(str(len(segments)))),
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
max_workers=max_workers,
)
if downloader.__name__ == "n_m3u8dl_re":
downloader_args.update({"filename": track.id, "track": track})
for status_update in downloader(**downloader_args):
file_downloaded = status_update.get("file_downloaded")
if file_downloaded:
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
else:
downloaded = status_update.get("downloaded")
if downloaded and downloaded.endswith("/s"):
status_update["downloaded"] = f"DASH {downloaded}"
progress(**status_update)
# see https://github.com/devine-dl/devine/issues/71
for control_file in save_dir.glob("*.aria2__temp"):
control_file.unlink()
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
with open(save_path, "wb") as f:
if init_data:
f.write(init_data)
if len(segments_to_merge) > 1:
progress(downloaded="Merging", completed=0, total=len(segments_to_merge))
for segment_file in segments_to_merge:
segment_data = segment_file.read_bytes()
# TODO: fix encoding after decryption?
if (
not drm
and isinstance(track, Subtitle)
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
):
segment_data = try_ensure_utf8(segment_data)
segment_data = (
segment_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
f.write(segment_data)
f.flush()
segment_file.unlink()
progress(advance=1)
track.path = save_path
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
if drm:
progress(downloaded="Decrypting", completed=0, total=100)
drm.decrypt(save_path)
track.drm = None
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=None)
progress(downloaded="Decrypting", advance=100)
save_dir.rmdir()
progress(downloaded="Downloaded")
@staticmethod
def _get(item: str, adaptation_set: Element, representation: Optional[Element] = None) -> Optional[Any]:
"""Helper to get a requested item from the Representation, otherwise from the AdaptationSet."""
adaptation_set_item = adaptation_set.get(item)
if representation is None:
return adaptation_set_item
representation_item = representation.get(item)
if representation_item is not None:
return representation_item
return adaptation_set_item
@staticmethod
def _findall(
item: str, adaptation_set: Element, representation: Optional[Element] = None, both: bool = False
) -> list[Any]:
"""
Helper to get all requested items from the Representation, otherwise from the AdaptationSet.
Optionally, you may pass both=True to keep both values (where available).
"""
adaptation_set_items = adaptation_set.findall(item)
if representation is None:
return adaptation_set_items
representation_items = representation.findall(item)
if both:
return representation_items + adaptation_set_items
if representation_items:
return representation_items
return adaptation_set_items
@staticmethod
def get_language(
adaptation_set: Element,
representation: Optional[Element] = None,
fallback: Optional[Union[str, Language]] = None,
) -> Optional[Language]:
"""
Get Language (if any) from the AdaptationSet or Representation.
A fallback language may be provided if no language information could be
retrieved.
"""
options = []
if representation is not None:
options.append(representation.get("lang"))
# derive language from somewhat common id string format
# the format is typically "{rep_id}_{lang}={bitrate}" or similar
rep_id = representation.get("id")
if rep_id:
m = re.match(r"\w+_(\w+)=\d+", rep_id)
if m:
options.append(m.group(1))
options.append(adaptation_set.get("lang"))
if fallback:
options.append(fallback)
for option in options:
option = (str(option) or "").strip()
if not tag_is_valid(option) or option.startswith("und"):
continue
return Language.get(option)
@staticmethod
def get_video_range(
codecs: str, all_supplemental_props: list[Element], all_essential_props: list[Element]
) -> Video.Range:
if codecs.startswith(("dva1", "dvav", "dvhe", "dvh1")):
return Video.Range.DV
return Video.Range.from_cicp(
primaries=next(
(
int(x.get("value"))
for x in all_supplemental_props + all_essential_props
if x.get("schemeIdUri") == "urn:mpeg:mpegB:cicp:ColourPrimaries"
),
0,
),
transfer=next(
(
int(x.get("value"))
for x in all_supplemental_props + all_essential_props
if x.get("schemeIdUri") == "urn:mpeg:mpegB:cicp:TransferCharacteristics"
),
0,
),
matrix=next(
(
int(x.get("value"))
for x in all_supplemental_props + all_essential_props
if x.get("schemeIdUri") == "urn:mpeg:mpegB:cicp:MatrixCoefficients"
),
0,
),
)
@staticmethod
def is_trick_mode(adaptation_set: Element) -> bool:
"""Check if contents of Adaptation Set is a Trick-Mode stream."""
essential_props = adaptation_set.findall("EssentialProperty")
supplemental_props = adaptation_set.findall("SupplementalProperty")
return any(
prop.get("schemeIdUri") == "http://dashif.org/guidelines/trickmode"
for prop in essential_props + supplemental_props
)
@staticmethod
def is_descriptive(adaptation_set: Element) -> bool:
"""Check if contents of Adaptation Set is Descriptive."""
return any(
(x.get("schemeIdUri"), x.get("value"))
in (("urn:mpeg:dash:role:2011", "descriptive"), ("urn:tva:metadata:cs:AudioPurposeCS:2007", "1"))
for x in adaptation_set.findall("Accessibility")
)
@staticmethod
def is_forced(adaptation_set: Element) -> bool:
"""Check if contents of Adaptation Set is a Forced Subtitle."""
return any(
x.get("schemeIdUri") == "urn:mpeg:dash:role:2011"
and x.get("value") in ("forced-subtitle", "forced_subtitle")
for x in adaptation_set.findall("Role")
)
@staticmethod
def is_sdh(adaptation_set: Element) -> bool:
"""Check if contents of Adaptation Set is for the Hearing Impaired."""
return any(
(x.get("schemeIdUri"), x.get("value")) == ("urn:tva:metadata:cs:AudioPurposeCS:2007", "2")
for x in adaptation_set.findall("Accessibility")
)
@staticmethod
def is_closed_caption(adaptation_set: Element) -> bool:
"""Check if contents of Adaptation Set is a Closed Caption Subtitle."""
return any(
(x.get("schemeIdUri"), x.get("value")) == ("urn:mpeg:dash:role:2011", "caption")
for x in adaptation_set.findall("Role")
)
@staticmethod
def get_ddp_complexity_index(adaptation_set: Element, representation: Optional[Element]) -> Optional[int]:
"""Get the DD+ Complexity Index (if any) from the AdaptationSet or Representation."""
return next(
(
int(x.get("value"))
for x in DASH._findall("SupplementalProperty", adaptation_set, representation, both=True)
if x.get("schemeIdUri") == "tag:dolby.com,2018:dash:EC3_ExtensionComplexityIndex:2018"
),
None,
)
@staticmethod
def get_drm(protections: list[Element]) -> list[DRM_T]:
drm: list[DRM_T] = []
for protection in protections:
urn = (protection.get("schemeIdUri") or "").lower()
if urn == WidevineCdm.urn:
pssh_text = protection.findtext("pssh")
if not pssh_text:
continue
pssh = PSSH(pssh_text)
kid = protection.get("kid")
if kid:
kid = UUID(bytes=base64.b64decode(kid))
default_kid = protection.get("default_KID")
if default_kid:
kid = UUID(default_kid)
if not pssh.key_ids and not kid:
kid = next((UUID(p.get("default_KID")) for p in protections if p.get("default_KID")), None)
drm.append(Widevine(pssh=pssh, kid=kid))
elif urn in ("urn:uuid:9a04f079-9840-4286-ab92-e65be0885f95", "urn:microsoft:playready"):
pr_pssh_b64 = (
protection.findtext("pssh")
or protection.findtext("pro")
or protection.findtext("{urn:microsoft:playready}pro")
)
if not pr_pssh_b64:
continue
pr_pssh = PR_PSSH(pr_pssh_b64)
kid_b64 = protection.findtext("kid")
kid = None
if kid_b64:
try:
kid = UUID(bytes=base64.b64decode(kid_b64))
except Exception:
kid = None
drm.append(PlayReady(pssh=pr_pssh, kid=kid, pssh_b64=pr_pssh_b64))
return drm
@staticmethod
def pt_to_sec(d: Union[str, float]) -> float:
if isinstance(d, float):
return d
has_ymd = d[0:8] == "P0Y0M0DT"
if d[0:2] != "PT" and not has_ymd:
raise ValueError("Input data is not a valid time string.")
if has_ymd:
d = d[6:].upper() # skip `P0Y0M0DT`
else:
d = d[2:].upper() # skip `PT`
m = re.findall(r"([\d.]+.)", d)
return sum(float(x[0:-1]) * {"H": 60 * 60, "M": 60, "S": 1}[x[-1].upper()] for x in m)
@staticmethod
def replace_fields(url: str, **kwargs: Any) -> str:
for field, value in kwargs.items():
url = url.replace(f"${field}$", str(value))
m = re.search(rf"\${re.escape(field)}%([a-z0-9]+)\$", url, flags=re.I)
if m:
url = url.replace(m.group(), f"{value:{m.group(1)}}")
return url
__all__ = ("DASH",)

View File

@@ -0,0 +1,832 @@
from __future__ import annotations
import base64
import html
import json
import logging
import shutil
import subprocess
import sys
from functools import partial
from pathlib import Path
from typing import Any, Callable, Optional, Union
from urllib.parse import urljoin
from zlib import crc32
import httpx
import m3u8
import requests
from langcodes import Language, tag_is_valid
from m3u8 import M3U8
from pyplayready.cdm import Cdm as PlayReadyCdm
from pyplayready.system.pssh import PSSH as PR_PSSH
from pywidevine.cdm import Cdm as WidevineCdm
from pywidevine.pssh import PSSH as WV_PSSH
from requests import Session
from unshackle.core import binaries
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
from unshackle.core.downloaders import requests as requests_downloader
from unshackle.core.drm import DRM_T, ClearKey, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.tracks import Audio, Subtitle, Tracks, Video
from unshackle.core.utilities import get_extension, is_close_match, try_ensure_utf8
class HLS:
def __init__(self, manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None):
if not manifest:
raise ValueError("HLS manifest must be provided.")
if not isinstance(manifest, M3U8):
raise TypeError(f"Expected manifest to be a {M3U8}, not {manifest!r}")
if not manifest.is_variant:
raise ValueError("Expected the M3U(8) manifest to be a Variant Playlist.")
self.manifest = manifest
self.session = session or Session()
@classmethod
def from_url(cls, url: str, session: Optional[Union[Session, httpx.Client]] = None, **args: Any) -> HLS:
if not url:
raise requests.URLRequired("HLS manifest URL must be provided.")
if not isinstance(url, str):
raise TypeError(f"Expected url to be a {str}, not {url!r}")
if not session:
session = Session()
elif not isinstance(session, (Session, httpx.Client)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
res = session.get(url, **args)
# Handle both requests and httpx response objects
if isinstance(res, requests.Response):
if not res.ok:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
raise requests.ConnectionError("Failed to request the M3U(8) document.", response=res)
content = res.text
else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(res)}")
master = m3u8.loads(content, uri=url)
return cls(master, session)
@classmethod
def from_text(cls, text: str, url: str) -> HLS:
if not text:
raise ValueError("HLS manifest Text must be provided.")
if not isinstance(text, str):
raise TypeError(f"Expected text to be a {str}, not {text!r}")
if not url:
raise requests.URLRequired("HLS manifest URL must be provided for relative path computations.")
if not isinstance(url, str):
raise TypeError(f"Expected url to be a {str}, not {url!r}")
master = m3u8.loads(text, uri=url)
return cls(master)
def to_tracks(self, language: Union[str, Language]) -> Tracks:
"""
Convert a Variant Playlist M3U(8) document to Video, Audio and Subtitle Track objects.
Parameters:
language: Language you expect the Primary Track to be in.
All Track objects' URL will be to another M3U(8) document. However, these documents
will be Invariant Playlists and contain the list of segments URIs among other metadata.
"""
session_keys = list(self.manifest.session_keys or [])
if not session_keys:
session_keys = HLS.parse_session_data_keys(self.manifest, self.session)
session_drm = HLS.get_all_drm(session_keys)
audio_codecs_by_group_id: dict[str, Audio.Codec] = {}
tracks = Tracks()
for playlist in self.manifest.playlists:
audio_group = playlist.stream_info.audio
if audio_group:
audio_codec = Audio.Codec.from_codecs(playlist.stream_info.codecs)
audio_codecs_by_group_id[audio_group] = audio_codec
try:
# TODO: Any better way to figure out the primary track type?
if playlist.stream_info.codecs:
Video.Codec.from_codecs(playlist.stream_info.codecs)
except ValueError:
primary_track_type = Audio
else:
primary_track_type = Video
tracks.add(
primary_track_type(
id_=hex(crc32(str(playlist).encode()))[2:],
url=urljoin(playlist.base_uri, playlist.uri),
codec=(
primary_track_type.Codec.from_codecs(playlist.stream_info.codecs)
if playlist.stream_info.codecs
else None
),
language=language, # HLS manifests do not seem to have language info
is_original_lang=True, # TODO: All we can do is assume Yes
bitrate=playlist.stream_info.average_bandwidth or playlist.stream_info.bandwidth,
descriptor=Video.Descriptor.HLS,
drm=session_drm,
data={"hls": {"playlist": playlist}},
# video track args
**(
dict(
range_=Video.Range.DV
if any(
codec.split(".")[0] in ("dva1", "dvav", "dvhe", "dvh1")
for codec in (playlist.stream_info.codecs or "").lower().split(",")
)
else Video.Range.from_m3u_range_tag(playlist.stream_info.video_range),
width=playlist.stream_info.resolution[0] if playlist.stream_info.resolution else None,
height=playlist.stream_info.resolution[1] if playlist.stream_info.resolution else None,
fps=playlist.stream_info.frame_rate,
)
if primary_track_type is Video
else {}
),
)
)
for media in self.manifest.media:
if not media.uri:
continue
joc = 0
if media.type == "AUDIO":
track_type = Audio
codec = audio_codecs_by_group_id.get(media.group_id)
if media.channels and media.channels.endswith("/JOC"):
joc = int(media.channels.split("/JOC")[0])
media.channels = "5.1"
else:
track_type = Subtitle
codec = Subtitle.Codec.WebVTT # assuming WebVTT, codec info isn't shown
track_lang = next(
(
Language.get(option)
for x in (media.language, language)
for option in [(str(x) or "").strip()]
if tag_is_valid(option) and not option.startswith("und")
),
None,
)
if not track_lang:
msg = "Language information could not be derived for a media."
if language is None:
msg += " No fallback language was provided when calling HLS.to_tracks()."
elif not tag_is_valid((str(language) or "").strip()) or str(language).startswith("und"):
msg += f" The fallback language provided is also invalid: {language}"
raise ValueError(msg)
tracks.add(
track_type(
id_=hex(crc32(str(media).encode()))[2:],
url=urljoin(media.base_uri, media.uri),
codec=codec,
language=track_lang, # HLS media may not have language info, fallback if needed
is_original_lang=bool(language and is_close_match(track_lang, [language])),
descriptor=Audio.Descriptor.HLS,
drm=session_drm if media.type == "AUDIO" else None,
data={"hls": {"media": media}},
# audio track args
**(
dict(
bitrate=0, # TODO: M3U doesn't seem to state bitrate?
channels=media.channels,
joc=joc,
descriptive="public.accessibility.describes-video" in (media.characteristics or ""),
)
if track_type is Audio
else dict(
forced=media.forced == "YES",
sdh="public.accessibility.describes-music-and-sound" in (media.characteristics or ""),
)
if track_type is Subtitle
else {}
),
)
)
return tracks
@staticmethod
def download_track(
track: AnyTrack,
save_path: Path,
save_dir: Path,
progress: partial,
session: Optional[Union[Session, httpx.Client]] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
license_widevine: Optional[Callable] = None,
*,
cdm: Optional[object] = None,
) -> None:
if not session:
session = Session()
elif not isinstance(session, (Session, httpx.Client)):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {session!r}")
if proxy:
# Handle proxies differently based on session type
if isinstance(session, Session):
session.proxies.update({"all": proxy})
elif isinstance(session, httpx.Client):
session.proxies = {"http://": proxy, "https://": proxy}
log = logging.getLogger("HLS")
# Get the playlist text and handle both session types
response = session.get(track.url)
if isinstance(response, requests.Response):
if not response.ok:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
elif isinstance(response, httpx.Response):
if response.status_code >= 400:
log.error(f"Failed to request the invariant M3U8 playlist: {response.status_code}")
sys.exit(1)
playlist_text = response.text
else:
raise TypeError(f"Expected response to be a requests.Response or httpx.Response, not {type(response)}")
master = m3u8.loads(playlist_text, uri=track.url)
if not master.segments:
log.error("Track's HLS playlist has no segments, expecting an invariant M3U8 playlist.")
sys.exit(1)
if track.drm:
session_drm = track.get_drm_for_cdm(cdm)
if isinstance(session_drm, (Widevine, PlayReady)):
# license and grab content keys
try:
if not license_widevine:
raise ValueError("license_widevine func must be supplied to use DRM")
progress(downloaded="LICENSING")
license_widevine(session_drm)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
else:
session_drm = None
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
return
unwanted_segments = [
segment for segment in master.segments if callable(track.OnSegmentFilter) and track.OnSegmentFilter(segment)
]
total_segments = len(master.segments) - len(unwanted_segments)
progress(total=total_segments)
downloader = track.downloader
if downloader.__name__ == "aria2c" and any(x.byterange for x in master.segments if x not in unwanted_segments):
downloader = requests_downloader
log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")
urls: list[dict[str, Any]] = []
segment_durations: list[int] = []
range_offset = 0
for segment in master.segments:
if segment in unwanted_segments:
continue
segment_durations.append(int(segment.duration))
if segment.byterange:
byte_range = HLS.calculate_byte_range(segment.byterange, range_offset)
range_offset = byte_range.split("-")[0]
else:
byte_range = None
urls.append(
{
"url": urljoin(segment.base_uri, segment.uri),
"headers": {"Range": f"bytes={byte_range}"} if byte_range else {},
}
)
track.data["hls"]["segment_durations"] = segment_durations
segment_save_dir = save_dir / "segments"
skip_merge = False
downloader_args = dict(
urls=urls,
output_dir=segment_save_dir,
filename="{i:0%d}{ext}" % len(str(len(urls))),
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
max_workers=max_workers,
)
if downloader.__name__ == "n_m3u8dl_re":
skip_merge = True
downloader_args.update(
{
"output_dir": save_dir,
"filename": track.id,
"track": track,
"content_keys": session_drm.content_keys if session_drm else None,
}
)
for status_update in downloader(**downloader_args):
file_downloaded = status_update.get("file_downloaded")
if file_downloaded:
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
else:
downloaded = status_update.get("downloaded")
if downloaded and downloaded.endswith("/s"):
status_update["downloaded"] = f"HLS {downloaded}"
progress(**status_update)
# see https://github.com/devine-dl/devine/issues/71
for control_file in segment_save_dir.glob("*.aria2__temp"):
control_file.unlink()
if not skip_merge:
progress(total=total_segments, completed=0, downloaded="Merging")
name_len = len(str(total_segments))
discon_i = 0
range_offset = 0
map_data: Optional[tuple[m3u8.model.InitializationSection, bytes]] = None
if session_drm:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = (None, session_drm)
else:
encryption_data: Optional[tuple[Optional[m3u8.Key], DRM_T]] = None
i = -1
for real_i, segment in enumerate(master.segments):
if segment not in unwanted_segments:
i += 1
is_last_segment = (real_i + 1) == len(master.segments)
def merge(to: Path, via: list[Path], delete: bool = False, include_map_data: bool = False):
"""
Merge all files to a given path, optionally including map data.
Parameters:
to: The output file with all merged data.
via: List of files to merge, in sequence.
delete: Delete the file once it's been merged.
include_map_data: Whether to include the init map data.
"""
with open(to, "wb") as x:
if include_map_data and map_data and map_data[1]:
x.write(map_data[1])
for file in via:
x.write(file.read_bytes())
x.flush()
if delete:
file.unlink()
def decrypt(include_this_segment: bool) -> Path:
"""
Decrypt all segments that uses the currently set DRM.
All segments that will be decrypted with this DRM will be merged together
in sequence, prefixed with the init data (if any), and then deleted. Once
merged they will be decrypted. The merged and decrypted file names state
the range of segments that were used.
Parameters:
include_this_segment: Whether to include the current segment in the
list of segments to merge and decrypt. This should be False if
decrypting on EXT-X-KEY changes, or True when decrypting on the
last segment.
Returns the decrypted path.
"""
drm = encryption_data[1]
first_segment_i = next(
int(file.stem) for file in sorted(segment_save_dir.iterdir()) if file.stem.isdigit()
)
last_segment_i = max(0, i - int(not include_this_segment))
range_len = (last_segment_i - first_segment_i) + 1
segment_range = f"{str(first_segment_i).zfill(name_len)}-{str(last_segment_i).zfill(name_len)}"
merged_path = (
segment_save_dir / f"{segment_range}{get_extension(master.segments[last_segment_i].uri)}"
)
decrypted_path = segment_save_dir / f"{merged_path.stem}_decrypted{merged_path.suffix}"
files = [
file
for file in sorted(segment_save_dir.iterdir())
if file.stem.isdigit() and first_segment_i <= int(file.stem) <= last_segment_i
]
if not files:
raise ValueError(f"None of the segment files for {segment_range} exist...")
elif len(files) != range_len:
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
if isinstance(drm, Widevine):
# with widevine we can merge all segments and decrypt once
merge(to=merged_path, via=files, delete=True, include_map_data=True)
drm.decrypt(merged_path)
merged_path.rename(decrypted_path)
else:
# with other drm we must decrypt separately and then merge them
# for aes this is because each segment likely has 16-byte padding
for file in files:
drm.decrypt(file)
merge(to=merged_path, via=files, delete=True, include_map_data=True)
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=drm, segment=decrypted_path)
return decrypted_path
def merge_discontinuity(include_this_segment: bool, include_map_data: bool = True):
"""
Merge all segments of the discontinuity.
All segment files for this discontinuity must already be downloaded and
already decrypted (if it needs to be decrypted).
Parameters:
include_this_segment: Whether to include the current segment in the
list of segments to merge and decrypt. This should be False if
decrypting on EXT-X-KEY changes, or True when decrypting on the
last segment.
include_map_data: Whether to prepend the init map data before the
segment files when merging.
"""
last_segment_i = max(0, i - int(not include_this_segment))
files = [
file
for file in sorted(segment_save_dir.iterdir())
if int(file.stem.replace("_decrypted", "").split("-")[-1]) <= last_segment_i
]
if files:
to_dir = segment_save_dir.parent
to_path = to_dir / f"{str(discon_i).zfill(name_len)}{files[-1].suffix}"
merge(to=to_path, via=files, delete=True, include_map_data=include_map_data)
if segment not in unwanted_segments:
if isinstance(track, Subtitle):
segment_file_ext = get_extension(segment.uri)
segment_file_path = segment_save_dir / f"{str(i).zfill(name_len)}{segment_file_ext}"
segment_data = try_ensure_utf8(segment_file_path.read_bytes())
if track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML):
segment_data = (
segment_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
segment_file_path.write_bytes(segment_data)
if segment.discontinuity and i != 0:
if encryption_data:
decrypt(include_this_segment=False)
merge_discontinuity(
include_this_segment=False, include_map_data=not encryption_data or not encryption_data[1]
)
discon_i += 1
range_offset = 0 # TODO: Should this be reset or not?
map_data = None
if encryption_data:
encryption_data = (encryption_data[0], encryption_data[1])
if segment.init_section and (not map_data or segment.init_section != map_data[0]):
if segment.init_section.byterange:
init_byte_range = HLS.calculate_byte_range(segment.init_section.byterange, range_offset)
range_offset = init_byte_range.split("-")[0]
init_range_header = {"Range": f"bytes={init_byte_range}"}
else:
init_range_header = {}
# Handle both session types for init section request
res = session.get(
url=urljoin(segment.init_section.base_uri, segment.init_section.uri),
headers=init_range_header,
)
# Check response based on session type
if isinstance(res, requests.Response):
res.raise_for_status()
init_content = res.content
elif isinstance(res, httpx.Response):
if res.status_code >= 400:
raise requests.HTTPError(f"HTTP Error: {res.status_code}", response=res)
init_content = res.content
else:
raise TypeError(
f"Expected response to be requests.Response or httpx.Response, not {type(res)}"
)
map_data = (segment.init_section, init_content)
segment_keys = getattr(segment, "keys", None)
if segment_keys:
key = HLS.get_supported_key(segment_keys)
if encryption_data and encryption_data[0] != key and i != 0 and segment not in unwanted_segments:
decrypt(include_this_segment=False)
if key is None:
encryption_data = None
elif not encryption_data or encryption_data[0] != key:
drm = HLS.get_drm(key, session)
if isinstance(drm, (Widevine, PlayReady)):
try:
if map_data:
track_kid = track.get_key_id(map_data[1])
else:
track_kid = None
progress(downloaded="LICENSING")
license_widevine(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
except Exception: # noqa
DOWNLOAD_CANCELLED.set() # skip pending track downloads
progress(downloaded="[red]FAILED")
raise
encryption_data = (key, drm)
if DOWNLOAD_LICENCE_ONLY.is_set():
continue
if is_last_segment:
# required as it won't end with EXT-X-DISCONTINUITY nor a new key
if encryption_data:
decrypt(include_this_segment=True)
merge_discontinuity(
include_this_segment=True, include_map_data=not encryption_data or not encryption_data[1]
)
progress(advance=1)
if DOWNLOAD_LICENCE_ONLY.is_set():
return
if segment_save_dir.exists():
segment_save_dir.rmdir()
# finally merge all the discontinuity save files together to the final path
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
if len(segments_to_merge) == 1:
shutil.move(segments_to_merge[0], save_path)
else:
progress(downloaded="Merging")
if isinstance(track, (Video, Audio)):
HLS.merge_segments(segments=segments_to_merge, save_path=save_path)
else:
with open(save_path, "wb") as f:
for discontinuity_file in segments_to_merge:
discontinuity_data = discontinuity_file.read_bytes()
f.write(discontinuity_data)
f.flush()
discontinuity_file.unlink()
save_dir.rmdir()
progress(downloaded="Downloaded")
track.path = save_path
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
@staticmethod
def merge_segments(segments: list[Path], save_path: Path) -> int:
"""
Concatenate Segments by first demuxing with FFmpeg.
Returns the file size of the merged file.
"""
if not binaries.FFMPEG:
raise EnvironmentError("FFmpeg executable was not found but is required to merge HLS segments.")
demuxer_file = segments[0].parent / "ffmpeg_concat_demuxer.txt"
demuxer_file.write_text("\n".join([f"file '{segment}'" for segment in segments]))
subprocess.check_call(
[
binaries.FFMPEG,
"-hide_banner",
"-loglevel",
"panic",
"-f",
"concat",
"-safe",
"0",
"-i",
demuxer_file,
"-map",
"0",
"-c",
"copy",
save_path,
]
)
demuxer_file.unlink()
for segment in segments:
segment.unlink()
return save_path.stat().st_size
@staticmethod
def parse_session_data_keys(
manifest: M3U8, session: Optional[Union[Session, httpx.Client]] = None
) -> list[m3u8.model.Key]:
"""Parse `com.apple.hls.keys` session data and return Key objects."""
keys: list[m3u8.model.Key] = []
for data in getattr(manifest, "session_data", []) or []:
if getattr(data, "data_id", None) != "com.apple.hls.keys":
continue
value = getattr(data, "value", None)
if not value and data.uri:
if not session:
session = Session()
res = session.get(urljoin(manifest.base_uri or "", data.uri))
value = res.text
if not value:
continue
try:
decoded = base64.b64decode(value).decode()
except Exception:
decoded = value
try:
items = json.loads(decoded)
except Exception:
continue
for item in items if isinstance(items, list) else []:
if not isinstance(item, dict):
continue
key = m3u8.model.Key(
method=item.get("method"),
base_uri=manifest.base_uri or "",
uri=item.get("uri"),
keyformat=item.get("keyformat"),
keyformatversions=",".join(item.get("keyformatversion") or item.get("keyformatversions") or []),
)
if key.method in {"AES-128", "ISO-23001-7"} or (
key.keyformat
and key.keyformat.lower()
in {
WidevineCdm.urn,
PlayReadyCdm,
"com.microsoft.playready",
}
):
keys.append(key)
return keys
@staticmethod
def get_supported_key(keys: list[Union[m3u8.model.SessionKey, m3u8.model.Key]]) -> Optional[m3u8.Key]:
"""
Get a support Key System from a list of Key systems.
Note that the key systems are chosen in an opinionated order.
Returns None if one of the key systems is method=NONE, which means all segments
from hence forth should be treated as plain text until another key system is
encountered, unless it's also method=NONE.
Raises NotImplementedError if none of the key systems are supported.
"""
if any(key.method == "NONE" for key in keys):
return None
unsupported_systems = []
for key in keys:
if not key:
continue
# TODO: Add a way to specify which supported key system to use
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
elif key.method == "AES-128":
return key
elif key.method == "ISO-23001-7":
return key
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
return key
elif key.keyformat and (
key.keyformat.lower() == PlayReadyCdm or "com.microsoft.playready" in key.keyformat.lower()
):
return key
else:
unsupported_systems.append(key.method + (f" ({key.keyformat})" if key.keyformat else ""))
else:
raise NotImplementedError(f"None of the key systems are supported: {', '.join(unsupported_systems)}")
@staticmethod
def get_drm(
key: Union[m3u8.model.SessionKey, m3u8.model.Key], session: Optional[Union[Session, httpx.Client]] = None
) -> DRM_T:
"""
Convert HLS EXT-X-KEY data to an initialized DRM object.
Parameters:
key: m3u8 key system (EXT-X-KEY) object.
session: Optional session used to request AES-128 URIs.
Useful to set headers, proxies, cookies, and so forth.
Raises a NotImplementedError if the key system is not supported.
"""
if not isinstance(session, (Session, httpx.Client, type(None))):
raise TypeError(f"Expected session to be a {Session} or {httpx.Client}, not {type(session)}")
if not session:
session = Session()
# TODO: Add support for 'SAMPLE-AES', 'AES-CTR', 'AES-CBC', 'ClearKey'
if key.method == "AES-128":
drm = ClearKey.from_m3u_key(key, session)
elif key.method == "ISO-23001-7":
drm = Widevine(pssh=WV_PSSH.new(key_ids=[key.uri.split(",")[-1]], system_id=WV_PSSH.SystemId.Widevine))
elif key.keyformat and key.keyformat.lower() == WidevineCdm.urn:
drm = Widevine(
pssh=WV_PSSH(key.uri.split(",")[-1]),
**key._extra_params, # noqa
)
elif key.keyformat and (
key.keyformat.lower() == PlayReadyCdm or "com.microsoft.playready" in key.keyformat.lower()
):
drm = PlayReady(
pssh=PR_PSSH(key.uri.split(",")[-1]),
pssh_b64=key.uri.split(",")[-1],
)
else:
raise NotImplementedError(f"The key system is not supported: {key}")
return drm
@staticmethod
def get_all_drm(
keys: list[Union[m3u8.model.SessionKey, m3u8.model.Key]], proxy: Optional[str] = None
) -> list[DRM_T]:
"""
Convert HLS EXT-X-KEY data to initialized DRM objects.
Parameters:
keys: m3u8 key system (EXT-X-KEY) objects.
proxy: Optional proxy string used for requesting AES-128 URIs.
Raises a NotImplementedError if none of the key systems are supported.
"""
unsupported_keys: list[m3u8.Key] = []
drm_objects: list[DRM_T] = []
if any(key.method == "NONE" for key in keys):
return []
for key in keys:
try:
drm = HLS.get_drm(key, proxy)
drm_objects.append(drm)
except NotImplementedError:
unsupported_keys.append(key)
if not drm_objects and unsupported_keys:
logging.debug(
"Ignoring unsupported key systems: %s",
", ".join([str(k.keyformat or k.method) for k in unsupported_keys]),
)
return []
return drm_objects
@staticmethod
def calculate_byte_range(m3u_range: str, fallback_offset: int = 0) -> str:
"""
Convert a HLS EXT-X-BYTERANGE value to a more traditional range value.
E.g., '1433@0' -> '0-1432', '357392@1433' -> '1433-358824'.
"""
parts = [int(x) for x in m3u_range.split("@")]
if len(parts) != 2:
parts.append(fallback_offset)
length, offset = parts
return f"{offset}-{offset + length - 1}"
__all__ = ("HLS",)

View File

@@ -0,0 +1,335 @@
from __future__ import annotations
import base64
import hashlib
import html
import shutil
import urllib.parse
from functools import partial
from pathlib import Path
from typing import Any, Callable, Optional, Union
import requests
from langcodes import Language, tag_is_valid
from lxml.etree import Element
from pyplayready.system.pssh import PSSH as PR_PSSH
from pywidevine.pssh import PSSH
from requests import Session
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY, AnyTrack
from unshackle.core.drm import DRM_T, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.tracks import Audio, Subtitle, Track, Tracks, Video
from unshackle.core.utilities import try_ensure_utf8
from unshackle.core.utils.xml import load_xml
class ISM:
def __init__(self, manifest: Element, url: str) -> None:
if manifest.tag != "SmoothStreamingMedia":
raise TypeError(f"Expected 'SmoothStreamingMedia' document, got '{manifest.tag}'")
if not url:
raise requests.URLRequired("ISM manifest URL must be provided for relative paths")
self.manifest = manifest
self.url = url
@classmethod
def from_url(cls, url: str, session: Optional[Session] = None, **kwargs: Any) -> "ISM":
if not url:
raise requests.URLRequired("ISM manifest URL must be provided")
if not session:
session = Session()
res = session.get(url, **kwargs)
if res.url != url:
url = res.url
res.raise_for_status()
return cls(load_xml(res.content), url)
@classmethod
def from_text(cls, text: str, url: str) -> "ISM":
if not text:
raise ValueError("ISM manifest text must be provided")
if not url:
raise requests.URLRequired("ISM manifest URL must be provided for relative paths")
return cls(load_xml(text), url)
@staticmethod
def _get_drm(headers: list[Element]) -> list[DRM_T]:
drm: list[DRM_T] = []
for header in headers:
system_id = (header.get("SystemID") or header.get("SystemId") or "").lower()
data = "".join(header.itertext()).strip()
if not data:
continue
if system_id == "edef8ba9-79d6-4ace-a3c8-27dcd51d21ed":
try:
pssh = PSSH(base64.b64decode(data))
except Exception:
continue
kid = next(iter(pssh.key_ids), None)
drm.append(Widevine(pssh=pssh, kid=kid))
elif system_id == "9a04f079-9840-4286-ab92-e65be0885f95":
try:
pr_pssh = PR_PSSH(data)
except Exception:
continue
drm.append(PlayReady(pssh=pr_pssh, pssh_b64=data))
return drm
def to_tracks(self, language: Optional[Union[str, Language]] = None) -> Tracks:
tracks = Tracks()
base_url = self.url
duration = int(self.manifest.get("Duration") or 0)
drm = self._get_drm(self.manifest.xpath(".//ProtectionHeader"))
for stream_index in self.manifest.findall("StreamIndex"):
content_type = stream_index.get("Type")
if not content_type:
raise ValueError("No content type value could be found")
for ql in stream_index.findall("QualityLevel"):
codec = ql.get("FourCC")
if codec == "TTML":
codec = "STPP"
track_lang = None
lang = (stream_index.get("Language") or "").strip()
if lang and tag_is_valid(lang) and not lang.startswith("und"):
track_lang = Language.get(lang)
track_urls: list[str] = []
fragment_time = 0
fragments = stream_index.findall("c")
# Some manifests omit the first fragment in the <c> list but
# still expect a request for start time 0 which contains the
# initialization segment. If the first declared fragment is not
# at time 0, prepend the missing initialization URL.
if fragments:
first_time = int(fragments[0].get("t") or 0)
if first_time != 0:
track_urls.append(
urllib.parse.urljoin(
base_url,
stream_index.get("Url").format_map(
{
"bitrate": ql.get("Bitrate"),
"start time": "0",
}
),
)
)
for idx, frag in enumerate(fragments):
fragment_time = int(frag.get("t", fragment_time))
repeat = int(frag.get("r", 1))
duration_frag = int(frag.get("d") or 0)
if not duration_frag:
try:
next_time = int(fragments[idx + 1].get("t"))
except (IndexError, AttributeError):
next_time = duration
duration_frag = (next_time - fragment_time) / repeat
for _ in range(repeat):
track_urls.append(
urllib.parse.urljoin(
base_url,
stream_index.get("Url").format_map(
{
"bitrate": ql.get("Bitrate"),
"start time": str(fragment_time),
}
),
)
)
fragment_time += duration_frag
track_id = hashlib.md5(
f"{codec}-{track_lang}-{ql.get('Bitrate') or 0}-{ql.get('Index') or 0}".encode()
).hexdigest()
data = {
"ism": {
"manifest": self.manifest,
"stream_index": stream_index,
"quality_level": ql,
"segments": track_urls,
}
}
if content_type == "video":
try:
vcodec = Video.Codec.from_mime(codec) if codec else None
except ValueError:
vcodec = None
tracks.add(
Video(
id_=track_id,
url=self.url,
codec=vcodec,
language=track_lang or language,
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
bitrate=ql.get("Bitrate"),
width=int(ql.get("MaxWidth") or 0) or int(stream_index.get("MaxWidth") or 0),
height=int(ql.get("MaxHeight") or 0) or int(stream_index.get("MaxHeight") or 0),
descriptor=Video.Descriptor.ISM,
drm=drm,
data=data,
)
)
elif content_type == "audio":
try:
acodec = Audio.Codec.from_mime(codec) if codec else None
except ValueError:
acodec = None
tracks.add(
Audio(
id_=track_id,
url=self.url,
codec=acodec,
language=track_lang or language,
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
bitrate=ql.get("Bitrate"),
channels=ql.get("Channels"),
descriptor=Track.Descriptor.ISM,
drm=drm,
data=data,
)
)
else:
try:
scodec = Subtitle.Codec.from_mime(codec) if codec else None
except ValueError:
scodec = None
tracks.add(
Subtitle(
id_=track_id,
url=self.url,
codec=scodec,
language=track_lang or language,
is_original_lang=bool(language and track_lang and str(track_lang) == str(language)),
descriptor=Track.Descriptor.ISM,
drm=drm,
data=data,
)
)
return tracks
@staticmethod
def download_track(
track: AnyTrack,
save_path: Path,
save_dir: Path,
progress: partial,
session: Optional[Session] = None,
proxy: Optional[str] = None,
max_workers: Optional[int] = None,
license_widevine: Optional[Callable] = None,
*,
cdm: Optional[object] = None,
) -> None:
if not session:
session = Session()
elif not isinstance(session, Session):
raise TypeError(f"Expected session to be a {Session}, not {session!r}")
if proxy:
session.proxies.update({"all": proxy})
segments: list[str] = track.data["ism"]["segments"]
session_drm = None
if track.drm:
# Mirror HLS.download_track: pick the DRM matching the provided CDM
# (or the first available) and license it if supported.
session_drm = track.get_drm_for_cdm(cdm)
if isinstance(session_drm, (Widevine, PlayReady)):
try:
if not license_widevine:
raise ValueError("license_widevine func must be supplied to use DRM")
progress(downloaded="LICENSING")
license_widevine(session_drm)
progress(downloaded="[yellow]LICENSED")
except Exception:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[red]FAILED")
raise
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
return
progress(total=len(segments))
downloader = track.downloader
skip_merge = False
downloader_args = dict(
urls=[{"url": url} for url in segments],
output_dir=save_dir,
filename="{i:0%d}.mp4" % len(str(len(segments))),
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
max_workers=max_workers,
)
if downloader.__name__ == "n_m3u8dl_re":
skip_merge = True
downloader_args.update(
{
"filename": track.id,
"track": track,
"content_keys": session_drm.content_keys if session_drm else None,
}
)
for status_update in downloader(**downloader_args):
file_downloaded = status_update.get("file_downloaded")
if file_downloaded:
events.emit(events.Types.SEGMENT_DOWNLOADED, track=track, segment=file_downloaded)
else:
downloaded = status_update.get("downloaded")
if downloaded and downloaded.endswith("/s"):
status_update["downloaded"] = f"ISM {downloaded}"
progress(**status_update)
for control_file in save_dir.glob("*.aria2__temp"):
control_file.unlink()
segments_to_merge = [x for x in sorted(save_dir.iterdir()) if x.is_file()]
if skip_merge:
shutil.move(segments_to_merge[0], save_path)
else:
with open(save_path, "wb") as f:
for segment_file in segments_to_merge:
segment_data = segment_file.read_bytes()
if (
not session_drm
and isinstance(track, Subtitle)
and track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
):
segment_data = try_ensure_utf8(segment_data)
segment_data = (
segment_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
f.write(segment_data)
f.flush()
segment_file.unlink()
progress(advance=1)
track.path = save_path
events.emit(events.Types.TRACK_DOWNLOADED, track=track)
if not skip_merge and session_drm:
progress(downloaded="Decrypting", completed=0, total=100)
session_drm.decrypt(save_path)
track.drm = None
events.emit(events.Types.TRACK_DECRYPTED, track=track, drm=session_drm, segment=None)
progress(downloaded="Decrypting", advance=100)
save_dir.rmdir()
progress(downloaded="Downloaded")
__all__ = ("ISM",)

View File

@@ -0,0 +1,76 @@
"""Utility functions for parsing M3U8 playlists."""
from __future__ import annotations
from typing import Optional, Union
import httpx
import m3u8
from pyplayready.cdm import Cdm as PlayReadyCdm
from pyplayready.system.pssh import PSSH as PR_PSSH
from pywidevine.cdm import Cdm as WidevineCdm
from pywidevine.pssh import PSSH as WV_PSSH
from requests import Session
from unshackle.core.drm import PlayReady, Widevine
from unshackle.core.manifests.hls import HLS
from unshackle.core.tracks import Tracks
def parse(
master: m3u8.M3U8,
language: str,
*,
session: Optional[Union[Session, httpx.Client]] = None,
) -> Tracks:
"""Parse a variant playlist to ``Tracks`` with DRM information."""
tracks = HLS(master, session=session).to_tracks(language)
need_wv = not any(isinstance(d, Widevine) for t in tracks for d in (t.drm or []))
need_pr = not any(isinstance(d, PlayReady) for t in tracks for d in (t.drm or []))
if (need_wv or need_pr) and tracks.videos:
if not session:
session = Session()
session_keys = list(master.session_keys or [])
session_keys.extend(HLS.parse_session_data_keys(master, session))
for drm_obj in HLS.get_all_drm(session_keys):
if need_wv and isinstance(drm_obj, Widevine):
for t in tracks.videos + tracks.audio:
t.drm = [d for d in (t.drm or []) if not isinstance(d, Widevine)] + [drm_obj]
need_wv = False
elif need_pr and isinstance(drm_obj, PlayReady):
for t in tracks.videos + tracks.audio:
t.drm = [d for d in (t.drm or []) if not isinstance(d, PlayReady)] + [drm_obj]
need_pr = False
if not need_wv and not need_pr:
break
if (need_wv or need_pr) and tracks.videos:
first_video = tracks.videos[0]
playlist = m3u8.load(first_video.url)
for key in playlist.keys or []:
if not key or not key.keyformat:
continue
fmt = key.keyformat.lower()
if need_wv and fmt == WidevineCdm.urn:
pssh_b64 = key.uri.split(",")[-1]
drm = Widevine(pssh=WV_PSSH(pssh_b64))
for t in tracks.videos + tracks.audio:
t.drm = [d for d in (t.drm or []) if not isinstance(d, Widevine)] + [drm]
need_wv = False
elif need_pr and (fmt == PlayReadyCdm or "com.microsoft.playready" in fmt):
pssh_b64 = key.uri.split(",")[-1]
drm = PlayReady(pssh=PR_PSSH(pssh_b64), pssh_b64=pssh_b64)
for t in tracks.videos + tracks.audio:
t.drm = [d for d in (t.drm or []) if not isinstance(d, PlayReady)] + [drm]
need_pr = False
if not need_wv and not need_pr:
break
return tracks
__all__ = ["parse"]

View File

@@ -0,0 +1,5 @@
from .basic import Basic
from .hola import Hola
from .nordvpn import NordVPN
__all__ = ("Basic", "Hola", "NordVPN")

View File

@@ -0,0 +1,54 @@
import random
import re
from typing import Optional, Union
from requests.utils import prepend_scheme_if_needed
from urllib3.util import parse_url
from unshackle.core.proxies.proxy import Proxy
class Basic(Proxy):
def __init__(self, **countries: dict[str, Union[str, list[str]]]):
"""Basic Proxy Service using Proxies specified in the config."""
self.countries = {k.lower(): v for k, v in countries.items()}
def __repr__(self) -> str:
countries = len(self.countries)
servers = len(self.countries.values())
return f"{countries} Countr{['ies', 'y'][countries == 1]} ({servers} Server{['s', ''][servers == 1]})"
def get_proxy(self, query: str) -> Optional[str]:
"""Get a proxy URI from the config."""
query = query.lower()
match = re.match(r"^([a-z]{2})(\d+)?$", query, re.IGNORECASE)
if not match:
raise ValueError(f'The query "{query}" was not recognized...')
country_code = match.group(1)
entry = match.group(2)
servers: Optional[Union[str, list[str]]] = self.countries.get(country_code)
if not servers:
return None
if isinstance(servers, str):
proxy = servers
elif entry:
try:
proxy = servers[int(entry) - 1]
except IndexError:
raise ValueError(
f'There\'s only {len(servers)} prox{"y" if len(servers) == 1 else "ies"} for "{country_code}"...'
)
else:
proxy = random.choice(servers)
proxy = prepend_scheme_if_needed(proxy, "http")
parsed_proxy = parse_url(proxy)
if not parsed_proxy.host:
raise ValueError(f"The proxy '{proxy}' is not a valid proxy URI supported by Python-Requests.")
return proxy

View File

@@ -0,0 +1,60 @@
import random
import re
import subprocess
from typing import Optional
from unshackle.core import binaries
from unshackle.core.proxies.proxy import Proxy
class Hola(Proxy):
def __init__(self):
"""
Proxy Service using Hola's direct connections via the hola-proxy project.
https://github.com/Snawoot/hola-proxy
"""
self.binary = binaries.HolaProxy
if not self.binary:
raise EnvironmentError("hola-proxy executable not found but is required for the Hola proxy provider.")
self.countries = self.get_countries()
def __repr__(self) -> str:
countries = len(self.countries)
return f"{countries} Countr{['ies', 'y'][countries == 1]}"
def get_proxy(self, query: str) -> Optional[str]:
"""
Get an HTTP proxy URI for a Datacenter ('direct') or Residential ('lum') Hola server.
TODO: - Add ability to select 'lum' proxies (residential proxies).
- Return and use Proxy Authorization
"""
query = query.lower()
p = subprocess.check_output(
[self.binary, "-country", query, "-list-proxies"], stderr=subprocess.STDOUT
).decode()
if "Transaction error: temporary ban detected." in p:
raise ConnectionError("Hola banned your IP temporarily from it's services. Try change your IP.")
username, password, proxy_authorization = re.search(
r"Login: (.*)\nPassword: (.*)\nProxy-Authorization: (.*)", p
).groups()
servers = re.findall(r"(zagent.*)", p)
proxies = []
for server in servers:
host, ip_address, direct, peer, hola, trial, trial_peer, vendor = server.split(",")
proxies.append(f"http://{username}:{password}@{ip_address}:{peer}")
proxy = random.choice(proxies)
return proxy
def get_countries(self) -> list[dict[str, str]]:
"""Get a list of available Countries."""
p = subprocess.check_output([self.binary, "-list-countries"]).decode("utf8")
return [{code: name} for country in p.splitlines() for (code, name) in [country.split(" - ", maxsplit=1)]]

View File

@@ -0,0 +1,128 @@
import json
import re
from typing import Optional
import requests
from unshackle.core.proxies.proxy import Proxy
class NordVPN(Proxy):
def __init__(self, username: str, password: str, server_map: Optional[dict[str, int]] = None):
"""
Proxy Service using NordVPN Service Credentials.
A username and password must be provided. These are Service Credentials, not your Login Credentials.
The Service Credentials can be found here: https://my.nordaccount.com/dashboard/nordvpn/
"""
if not username:
raise ValueError("No Username was provided to the NordVPN Proxy Service.")
if not password:
raise ValueError("No Password was provided to the NordVPN Proxy Service.")
if not re.match(r"^[a-z0-9]{48}$", username + password, re.IGNORECASE) or "@" in username:
raise ValueError(
"The Username and Password must be NordVPN Service Credentials, not your Login Credentials. "
"The Service Credentials can be found here: https://my.nordaccount.com/dashboard/nordvpn/"
)
if server_map is not None and not isinstance(server_map, dict):
raise TypeError(f"Expected server_map to be a dict mapping a region to a server ID, not '{server_map!r}'.")
self.username = username
self.password = password
self.server_map = server_map or {}
self.countries = self.get_countries()
def __repr__(self) -> str:
countries = len(self.countries)
servers = sum(x["serverCount"] for x in self.countries)
return f"{countries} Countr{['ies', 'y'][countries == 1]} ({servers} Server{['s', ''][servers == 1]})"
def get_proxy(self, query: str) -> Optional[str]:
"""
Get an HTTP(SSL) proxy URI for a NordVPN server.
HTTP proxies under port 80 were disabled on the 15th of Feb, 2021:
https://nordvpn.com/blog/removing-http-proxies
"""
query = query.lower()
if re.match(r"^[a-z]{2}\d+$", query):
# country and nordvpn server id, e.g., us1, fr1234
hostname = f"{query}.nordvpn.com"
else:
if query.isdigit():
# country id
country = self.get_country(by_id=int(query))
elif re.match(r"^[a-z]+$", query):
# country code
country = self.get_country(by_code=query)
else:
raise ValueError(f"The query provided is unsupported and unrecognized: {query}")
if not country:
# NordVPN doesnt have servers in this region
return
server_mapping = self.server_map.get(country["code"].lower())
if server_mapping:
# country was set to a specific server ID in config
hostname = f"{country['code'].lower()}{server_mapping}.nordvpn.com"
else:
# get the recommended server ID
recommended_servers = self.get_recommended_servers(country["id"])
if not recommended_servers:
raise ValueError(
f"The NordVPN Country {query} currently has no recommended servers. "
"Try again later. If the issue persists, double-check the query."
)
hostname = recommended_servers[0]["hostname"]
if hostname.startswith("gb"):
# NordVPN uses the alpha2 of 'GB' in API responses, but 'UK' in the hostname
hostname = f"gb{hostname[2:]}"
return f"https://{self.username}:{self.password}@{hostname}:89"
def get_country(self, by_id: Optional[int] = None, by_code: Optional[str] = None) -> Optional[dict]:
"""Search for a Country and it's metadata."""
if all(x is None for x in (by_id, by_code)):
raise ValueError("At least one search query must be made.")
for country in self.countries:
if all(
[by_id is None or country["id"] == int(by_id), by_code is None or country["code"] == by_code.upper()]
):
return country
@staticmethod
def get_recommended_servers(country_id: int) -> list[dict]:
"""
Get the list of recommended Servers for a Country.
Note: There may not always be more than one recommended server.
"""
res = requests.get(
url="https://api.nordvpn.com/v1/servers/recommendations", params={"filters[country_id]": country_id}
)
if not res.ok:
raise ValueError(f"Failed to get a list of NordVPN countries [{res.status_code}]")
try:
return res.json()
except json.JSONDecodeError:
raise ValueError("Could not decode list of NordVPN countries, not JSON data.")
@staticmethod
def get_countries() -> list[dict]:
"""Get a list of available Countries and their metadata."""
res = requests.get(
url="https://api.nordvpn.com/v1/servers/countries",
)
if not res.ok:
raise ValueError(f"Failed to get a list of NordVPN countries [{res.status_code}]")
try:
return res.json()
except json.JSONDecodeError:
raise ValueError("Could not decode list of NordVPN countries, not JSON data.")

View File

@@ -0,0 +1,31 @@
from abc import abstractmethod
from typing import Optional
class Proxy:
@abstractmethod
def __init__(self, **kwargs):
"""
The constructor initializes the Service using passed configuration data.
Any authorization or pre-fetching of data should be done here.
"""
@abstractmethod
def __repr__(self) -> str:
"""Return a string denoting a list of Countries and Servers (if possible)."""
countries = ...
servers = ...
return f"{countries} Countr{['ies', 'y'][countries == 1]} ({servers} Server{['s', ''][servers == 1]})"
@abstractmethod
def get_proxy(self, query: str) -> Optional[str]:
"""
Get a Proxy URI from the Proxy Service.
Only return None if the query was accepted, but no proxy could be returned.
Otherwise, please use exceptions to denote any errors with the call or query.
The returned Proxy URI must be a string supported by Python-Requests:
'{scheme}://[{user}:{pass}@]{host}:{port}'
"""

View File

@@ -0,0 +1,44 @@
from typing import Optional, Union
class SearchResult:
def __init__(
self,
id_: Union[str, int],
title: str,
description: Optional[str] = None,
label: Optional[str] = None,
url: Optional[str] = None,
):
"""
A Search Result for any support Title Type.
Parameters:
id_: The search result's Title ID.
title: The primary display text, e.g., the Title's Name.
description: The secondary display text, e.g., the Title's Description or
further title information.
label: The tertiary display text. This will typically be used to display
an informative label or tag to the result. E.g., "unavailable", the
title's price tag, region, etc.
url: A hyperlink to the search result or title's page.
"""
if not isinstance(id_, (str, int)):
raise TypeError(f"Expected id_ to be a {str} or {int}, not {type(id_)}")
if not isinstance(title, str):
raise TypeError(f"Expected title to be a {str}, not {type(title)}")
if not isinstance(description, (str, type(None))):
raise TypeError(f"Expected description to be a {str}, not {type(description)}")
if not isinstance(label, (str, type(None))):
raise TypeError(f"Expected label to be a {str}, not {type(label)}")
if not isinstance(url, (str, type(None))):
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
self.id = id_
self.title = title
self.description = description
self.label = label
self.url = url
__all__ = ("SearchResult",)

290
unshackle/core/service.py Normal file
View File

@@ -0,0 +1,290 @@
import base64
import logging
from abc import ABCMeta, abstractmethod
from collections.abc import Generator
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urlparse
import click
import m3u8
import requests
from requests.adapters import HTTPAdapter, Retry
from rich.padding import Padding
from rich.rule import Rule
from unshackle.core.cacher import Cacher
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import AnyTrack
from unshackle.core.credential import Credential
from unshackle.core.drm import DRM_T
from unshackle.core.search_result import SearchResult
from unshackle.core.titles import Title_T, Titles_T
from unshackle.core.tracks import Chapters, Tracks
from unshackle.core.utilities import get_ip_info
class Service(metaclass=ABCMeta):
"""The Service Base Class."""
# Abstract class variables
ALIASES: tuple[str, ...] = () # list of aliases for the service; alternatives to the service tag.
GEOFENCE: tuple[str, ...] = () # list of ip regions required to use the service. empty list == no specific region.
def __init__(self, ctx: click.Context):
console.print(Padding(Rule(f"[rule.text]Service: {self.__class__.__name__}"), (1, 2)))
self.config = ctx.obj.config
self.log = logging.getLogger(self.__class__.__name__)
self.session = self.get_session()
self.cache = Cacher(self.__class__.__name__)
if not ctx.parent or not ctx.parent.params.get("no_proxy"):
if ctx.parent:
proxy = ctx.parent.params["proxy"]
else:
proxy = None
if not proxy:
# don't override the explicit proxy set by the user, even if they may be geoblocked
with console.status("Checking if current region is Geoblocked...", spinner="dots"):
if self.GEOFENCE:
# no explicit proxy, let's get one to GEOFENCE if needed
current_region = get_ip_info(self.session)["country"].lower()
if any(x.lower() == current_region for x in self.GEOFENCE):
self.log.info("Service is not Geoblocked in your region")
else:
requested_proxy = self.GEOFENCE[0] # first is likely main region
self.log.info(f"Service is Geoblocked in your region, getting a Proxy to {requested_proxy}")
for proxy_provider in ctx.obj.proxy_providers:
proxy = proxy_provider.get_proxy(requested_proxy)
if proxy:
self.log.info(f"Got Proxy from {proxy_provider.__class__.__name__}")
break
else:
self.log.info("Service has no Geofence")
if proxy:
self.session.proxies.update({"all": proxy})
proxy_parse = urlparse(proxy)
if proxy_parse.username and proxy_parse.password:
self.session.headers.update(
{
"Proxy-Authorization": base64.b64encode(
f"{proxy_parse.username}:{proxy_parse.password}".encode("utf8")
).decode()
}
)
# Optional Abstract functions
# The following functions may be implemented by the Service.
# Otherwise, the base service code (if any) of the function will be executed on call.
# The functions will be executed in shown order.
@staticmethod
def get_session() -> requests.Session:
"""
Creates a Python-requests Session, adds common headers
from config, cookies, retry handler, and a proxy if available.
:returns: Prepared Python-requests Session
"""
session = requests.Session()
session.headers.update(config.headers)
session.mount(
"https://",
HTTPAdapter(
max_retries=Retry(total=15, backoff_factor=0.2, status_forcelist=[429, 500, 502, 503, 504]),
pool_block=True,
),
)
session.mount("http://", session.adapters["https://"])
return session
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
"""
Authenticate the Service with Cookies and/or Credentials (Email/Username and Password).
This is effectively a login() function. Any API calls or object initializations
needing to be made, should be made here. This will be run before any of the
following abstract functions.
You should avoid storing or using the Credential outside this function.
Make any calls you need for any Cookies, Tokens, or such, then use those.
The Cookie jar should also not be stored outside this function. However, you may load
the Cookie jar into the service session.
"""
if cookies is not None:
if not isinstance(cookies, CookieJar):
raise TypeError(f"Expected cookies to be a {CookieJar}, not {cookies!r}.")
self.session.cookies.update(cookies)
def search(self) -> Generator[SearchResult, None, None]:
"""
Search by query for titles from the Service.
The query must be taken as a CLI argument by the Service class.
Ideally just re-use the title ID argument (i.e. self.title).
Search results will be displayed in the order yielded.
"""
raise NotImplementedError(f"Search functionality has not been implemented by {self.__class__.__name__}")
def get_widevine_service_certificate(
self, *, challenge: bytes, title: Title_T, track: AnyTrack
) -> Union[bytes, str]:
"""
Get the Widevine Service Certificate used for Privacy Mode.
:param challenge: The service challenge, providing this to a License endpoint should return the
privacy certificate that the service uses.
:param title: The current `Title` from get_titles that is being executed. This is provided in
case it has data needed to be used, e.g. for a HTTP request.
:param track: The current `Track` needing decryption. Provided for same reason as `title`.
:return: The Service Privacy Certificate as Bytes or a Base64 string. Don't Base64 Encode or
Decode the data, return as is to reduce unnecessary computations.
"""
def get_widevine_license(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> Optional[Union[bytes, str]]:
"""
Get a Widevine License message by sending a License Request (challenge).
This License message contains the encrypted Content Decryption Keys and will be
read by the Cdm and decrypted.
This is a very important request to get correct. A bad, unexpected, or missing
value in the request can cause your key to be detected and promptly banned,
revoked, disabled, or downgraded.
:param challenge: The license challenge from the Widevine CDM.
:param title: The current `Title` from get_titles that is being executed. This is provided in
case it has data needed to be used, e.g. for a HTTP request.
:param track: The current `Track` needing decryption. Provided for same reason as `title`.
:return: The License response as Bytes or a Base64 string. Don't Base64 Encode or
Decode the data, return as is to reduce unnecessary computations.
"""
# Required Abstract functions
# The following functions *must* be implemented by the Service.
# The functions will be executed in shown order.
@abstractmethod
def get_titles(self) -> Titles_T:
"""
Get Titles for the provided title ID.
Return a Movies, Series, or Album objects containing Movie, Episode, or Song title objects respectively.
The returned data must be for the given title ID, or a spawn of the title ID.
At least one object is expected to be returned, or it will presume an invalid Title ID was
provided.
You can use the `data` dictionary class instance attribute of each Title to store data you may need later on.
This can be useful to store information on each title that will be required like any sub-asset IDs, or such.
"""
@abstractmethod
def get_tracks(self, title: Title_T) -> Tracks:
"""
Get Track objects of the Title.
Return a Tracks object, which itself can contain Video, Audio, Subtitle or even Chapters.
Tracks.videos, Tracks.audio, Tracks.subtitles, and Track.chapters should be a List of Track objects.
Each Track in the Tracks should represent a Video/Audio Stream/Representation/Adaptation or
a Subtitle file.
While one Track should only hold information for one stream/downloadable, try to get as many
unique Track objects per stream type so Stream selection by the root code can give you more
options in terms of Resolution, Bitrate, Codecs, Language, e.t.c.
No decision making or filtering of which Tracks get returned should happen here. It can be
considered an error to filter for e.g. resolution, codec, and such. All filtering based on
arguments will be done by the root code automatically when needed.
Make sure you correctly mark which Tracks are encrypted or not, and by which DRM System
via its `drm` property.
If you are able to obtain the Track's KID (Key ID) as a 32 char (16 bit) HEX string, provide
it to the Track's `kid` variable as it will speed up the decryption process later on. It may
or may not be needed, that depends on the service. Generally if you can provide it, without
downloading any of the Track's stream data, then do.
:param title: The current `Title` from get_titles that is being executed.
:return: Tracks object containing Video, Audio, Subtitles, and Chapters, if available.
"""
@abstractmethod
def get_chapters(self, title: Title_T) -> Chapters:
"""
Get Chapters for the Title.
Parameters:
title: The current Title from `get_titles` that is being processed.
You must return a Chapters object containing 0 or more Chapter objects.
You do not need to set a Chapter number or sort/order the chapters in any way as
the Chapters class automatically handles all of that for you. If there's no
descriptive name for a Chapter then do not set a name at all.
You must not set Chapter names to "Chapter {n}" or such. If you (or the user)
wants "Chapter {n}" style Chapter names (or similar) then they can use the config
option `chapter_fallback_name`. For example, `"Chapter {i:02}"` for "Chapter 01".
"""
# Optional Event methods
def on_segment_downloaded(self, track: AnyTrack, segment: Path) -> None:
"""
Called when one of a Track's Segments has finished downloading.
Parameters:
track: The Track object that had a Segment downloaded.
segment: The Path to the Segment that was downloaded.
"""
def on_track_downloaded(self, track: AnyTrack) -> None:
"""
Called when a Track has finished downloading.
Parameters:
track: The Track object that was downloaded.
"""
def on_track_decrypted(self, track: AnyTrack, drm: DRM_T, segment: Optional[m3u8.Segment] = None) -> None:
"""
Called when a Track has finished decrypting.
Parameters:
track: The Track object that was decrypted.
drm: The DRM object it decrypted with.
segment: The HLS segment information that was decrypted.
"""
def on_track_repacked(self, track: AnyTrack) -> None:
"""
Called when a Track has finished repacking.
Parameters:
track: The Track object that was repacked.
"""
def on_track_multiplex(self, track: AnyTrack) -> None:
"""
Called when a Track is about to be Multiplexed into a Container.
Note: Right now only MKV containers are multiplexed but in the future
this may also be called when multiplexing to other containers like
MP4 via ffmpeg/mp4box.
Parameters:
track: The Track object that was repacked.
"""
__all__ = ("Service",)

View File

@@ -0,0 +1,83 @@
from pathlib import Path
import click
from unshackle.core.config import config
from unshackle.core.service import Service
from unshackle.core.utilities import import_module_by_path
_SERVICES = sorted((path for path in config.directories.services.glob("*/__init__.py")), key=lambda x: x.parent.stem)
_MODULES = {path.parent.stem: getattr(import_module_by_path(path), path.parent.stem) for path in _SERVICES}
_ALIASES = {tag: getattr(module, "ALIASES") for tag, module in _MODULES.items()}
class Services(click.MultiCommand):
"""Lazy-loaded command group of project services."""
# Click-specific methods
def list_commands(self, ctx: click.Context) -> list[str]:
"""Returns a list of all available Services as command names for Click."""
return Services.get_tags()
def get_command(self, ctx: click.Context, name: str) -> click.Command:
"""Load the Service and return the Click CLI method."""
tag = Services.get_tag(name)
try:
service = Services.load(tag)
except KeyError as e:
available_services = self.list_commands(ctx)
if not available_services:
raise click.ClickException(
f"There are no Services added yet, therefore the '{name}' Service could not be found."
)
raise click.ClickException(f"{e}. Available Services: {', '.join(available_services)}")
if hasattr(service, "cli"):
return service.cli
raise click.ClickException(f"Service '{tag}' has no 'cli' method configured.")
# Methods intended to be used anywhere
@staticmethod
def get_tags() -> list[str]:
"""Returns a list of service tags from all available Services."""
return [x.parent.stem for x in _SERVICES]
@staticmethod
def get_path(name: str) -> Path:
"""Get the directory path of a command."""
tag = Services.get_tag(name)
for service in _SERVICES:
if service.parent.stem == tag:
return service.parent
raise KeyError(f"There is no Service added by the Tag '{name}'")
@staticmethod
def get_tag(value: str) -> str:
"""
Get the Service Tag (e.g. DSNP, not DisneyPlus/Disney+, etc.) by an Alias.
Input value can be of any case-sensitivity.
Original input value is returned if it did not match a service tag.
"""
original_value = value
value = value.lower()
for path in _SERVICES:
tag = path.parent.stem
if value in (tag.lower(), *_ALIASES.get(tag, [])):
return tag
return original_value
@staticmethod
def load(tag: str) -> Service:
"""Load a Service module by Service tag."""
module = _MODULES.get(tag)
if not module:
raise KeyError(f"There is no Service added by the Tag '{tag}'")
return module
__all__ = ("Services",)

View File

@@ -0,0 +1,11 @@
from typing import Union
from .episode import Episode, Series
from .movie import Movie, Movies
from .song import Album, Song
Title_T = Union[Movie, Episode, Song]
Titles_T = Union[Movies, Series, Album]
__all__ = ("Episode", "Series", "Movie", "Movies", "Album", "Song", "Title_T", "Titles_T")

View File

@@ -0,0 +1,209 @@
import re
from abc import ABC
from collections import Counter
from typing import Any, Iterable, Optional, Union
from langcodes import Language
from pymediainfo import MediaInfo
from rich.tree import Tree
from sortedcontainers import SortedKeyList
from unshackle.core.config import config
from unshackle.core.constants import AUDIO_CODEC_MAP, DYNAMIC_RANGE_MAP, VIDEO_CODEC_MAP
from unshackle.core.titles.title import Title
from unshackle.core.utilities import sanitize_filename
class Episode(Title):
def __init__(
self,
id_: Any,
service: type,
title: str,
season: Union[int, str],
number: Union[int, str],
name: Optional[str] = None,
year: Optional[Union[int, str]] = None,
language: Optional[Union[str, Language]] = None,
data: Optional[Any] = None,
description: Optional[str] = None,
) -> None:
super().__init__(id_, service, language, data)
if not title:
raise ValueError("Episode title must be provided")
if not isinstance(title, str):
raise TypeError(f"Expected title to be a str, not {title!r}")
if season != 0 and not season:
raise ValueError("Episode season must be provided")
if isinstance(season, str) and season.isdigit():
season = int(season)
elif not isinstance(season, int):
raise TypeError(f"Expected season to be an int, not {season!r}")
if number != 0 and not number:
raise ValueError("Episode number must be provided")
if isinstance(number, str) and number.isdigit():
number = int(number)
elif not isinstance(number, int):
raise TypeError(f"Expected number to be an int, not {number!r}")
if name is not None and not isinstance(name, str):
raise TypeError(f"Expected name to be a str, not {name!r}")
if year is not None:
if isinstance(year, str) and year.isdigit():
year = int(year)
elif not isinstance(year, int):
raise TypeError(f"Expected year to be an int, not {year!r}")
title = title.strip()
if name is not None:
name = name.strip()
# ignore episode names that are the episode number or title name
if re.match(r"Episode ?#?\d+", name, re.IGNORECASE):
name = None
elif name.lower() == title.lower():
name = None
if year is not None and year <= 0:
raise ValueError(f"Episode year cannot be {year}")
self.title = title
self.season = season
self.number = number
self.name = name
self.year = year
self.description = description
def __str__(self) -> str:
return "{title}{year} S{season:02}E{number:02} {name}".format(
title=self.title,
year=f" {self.year}" if self.year else "",
season=self.season,
number=self.number,
name=self.name or "",
).strip()
def get_filename(self, media_info: MediaInfo, folder: bool = False, show_service: bool = True) -> str:
primary_video_track = next(iter(media_info.video_tracks), None)
primary_audio_track = next(iter(media_info.audio_tracks), None)
unique_audio_languages = len({x.language.split("-")[0] for x in media_info.audio_tracks if x.language})
# Title [Year] SXXEXX Name (or Title [Year] SXX if folder)
if folder:
name = f"{self.title}"
if self.year:
name += f" {self.year}"
name += f" S{self.season:02}"
else:
name = "{title}{year} S{season:02}E{number:02} {name}".format(
title=self.title.replace("$", "S"), # e.g., Arli$$
year=f" {self.year}" if self.year else "",
season=self.season,
number=self.number,
name=self.name or "",
).strip()
# MULTi
if unique_audio_languages > 1:
name += " MULTi"
# Resolution
if primary_video_track:
resolution = primary_video_track.height
aspect_ratio = [int(float(plane)) for plane in primary_video_track.other_display_aspect_ratio[0].split(":")]
if len(aspect_ratio) == 1:
# e.g., aspect ratio of 2 (2.00:1) would end up as `(2.0,)`, add 1
aspect_ratio.append(1)
if aspect_ratio[0] / aspect_ratio[1] not in (16 / 9, 4 / 3):
# We want the resolution represented in a 4:3 or 16:9 canvas.
# If it's not 4:3 or 16:9, calculate as if it's inside a 16:9 canvas,
# otherwise the track's height value is fine.
# We are assuming this title is some weird aspect ratio so most
# likely a movie or HD source, so it's most likely widescreen so
# 16:9 canvas makes the most sense.
resolution = int(primary_video_track.width * (9 / 16))
name += f" {resolution}p"
# Service
if show_service:
name += f" {self.service.__name__}"
# 'WEB-DL'
name += " WEB-DL"
# Audio Codec + Channels (+ feature)
if primary_audio_track:
codec = primary_audio_track.format
channel_layout = primary_audio_track.channel_layout or primary_audio_track.channellayout_original
if channel_layout:
channels = float(sum({"LFE": 0.1}.get(position.upper(), 1) for position in channel_layout.split(" ")))
else:
channel_count = primary_audio_track.channel_s or primary_audio_track.channels or 0
channels = float(channel_count)
features = primary_audio_track.format_additionalfeatures or ""
name += f" {AUDIO_CODEC_MAP.get(codec, codec)}{channels:.1f}"
if "JOC" in features or primary_audio_track.joc:
name += " Atmos"
# Video (dynamic range + hfr +) Codec
if primary_video_track:
codec = primary_video_track.format
hdr_format = primary_video_track.hdr_format_commercial
trc = primary_video_track.transfer_characteristics or primary_video_track.transfer_characteristics_original
frame_rate = float(primary_video_track.frame_rate)
if hdr_format:
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
elif trc and "HLG" in trc:
name += " HLG"
if frame_rate > 30:
name += " HFR"
name += f" {VIDEO_CODEC_MAP.get(codec, codec)}"
if config.tag:
name += f"-{config.tag}"
return sanitize_filename(name)
class Series(SortedKeyList, ABC):
def __init__(self, iterable: Optional[Iterable] = None):
super().__init__(iterable, key=lambda x: (x.season, x.number, x.year or 0))
def __str__(self) -> str:
if not self:
return super().__str__()
return self[0].title + (f" ({self[0].year})" if self[0].year else "")
def tree(self, verbose: bool = False) -> Tree:
seasons = Counter(x.season for x in self)
num_seasons = len(seasons)
num_episodes = sum(seasons.values())
tree = Tree(
f"{num_seasons} Season{['s', ''][num_seasons == 1]}, {num_episodes} Episode{['s', ''][num_episodes == 1]}",
guide_style="bright_black",
)
if verbose:
for season, episodes in seasons.items():
season_tree = tree.add(
f"[bold]Season {str(season).zfill(len(str(num_seasons)))}[/]: [bright_black]{episodes} episodes",
guide_style="bright_black",
)
for episode in self:
if episode.season == season:
if episode.name:
season_tree.add(
f"[bold]{str(episode.number).zfill(len(str(episodes)))}.[/] "
f"[bright_black]{episode.name}"
)
else:
season_tree.add(f"[bright_black]Episode {str(episode.number).zfill(len(str(episodes)))}")
return tree
__all__ = ("Episode", "Series")

View File

@@ -0,0 +1,144 @@
from abc import ABC
from typing import Any, Iterable, Optional, Union
from langcodes import Language
from pymediainfo import MediaInfo
from rich.tree import Tree
from sortedcontainers import SortedKeyList
from unshackle.core.config import config
from unshackle.core.constants import AUDIO_CODEC_MAP, DYNAMIC_RANGE_MAP, VIDEO_CODEC_MAP
from unshackle.core.titles.title import Title
from unshackle.core.utilities import sanitize_filename
class Movie(Title):
def __init__(
self,
id_: Any,
service: type,
name: str,
year: Optional[Union[int, str]] = None,
language: Optional[Union[str, Language]] = None,
data: Optional[Any] = None,
description: Optional[str] = None,
) -> None:
super().__init__(id_, service, language, data)
if not name:
raise ValueError("Movie name must be provided")
if not isinstance(name, str):
raise TypeError(f"Expected name to be a str, not {name!r}")
if year is not None:
if isinstance(year, str) and year.isdigit():
year = int(year)
elif not isinstance(year, int):
raise TypeError(f"Expected year to be an int, not {year!r}")
name = name.strip()
if year is not None and year <= 0:
raise ValueError(f"Movie year cannot be {year}")
self.name = name
self.year = year
self.description = description
def __str__(self) -> str:
if self.year:
return f"{self.name} ({self.year})"
return self.name
def get_filename(self, media_info: MediaInfo, folder: bool = False, show_service: bool = True) -> str:
primary_video_track = next(iter(media_info.video_tracks), None)
primary_audio_track = next(iter(media_info.audio_tracks), None)
unique_audio_languages = len({x.language.split("-")[0] for x in media_info.audio_tracks if x.language})
# Name (Year)
name = str(self).replace("$", "S") # e.g., Arli$$
# MULTi
if unique_audio_languages > 1:
name += " MULTi"
# Resolution
if primary_video_track:
resolution = primary_video_track.height
aspect_ratio = [int(float(plane)) for plane in primary_video_track.other_display_aspect_ratio[0].split(":")]
if len(aspect_ratio) == 1:
# e.g., aspect ratio of 2 (2.00:1) would end up as `(2.0,)`, add 1
aspect_ratio.append(1)
if aspect_ratio[0] / aspect_ratio[1] not in (16 / 9, 4 / 3):
# We want the resolution represented in a 4:3 or 16:9 canvas.
# If it's not 4:3 or 16:9, calculate as if it's inside a 16:9 canvas,
# otherwise the track's height value is fine.
# We are assuming this title is some weird aspect ratio so most
# likely a movie or HD source, so it's most likely widescreen so
# 16:9 canvas makes the most sense.
resolution = int(primary_video_track.width * (9 / 16))
name += f" {resolution}p"
# Service
if show_service:
name += f" {self.service.__name__}"
# 'WEB-DL'
name += " WEB-DL"
# Audio Codec + Channels (+ feature)
if primary_audio_track:
codec = primary_audio_track.format
channel_layout = primary_audio_track.channel_layout or primary_audio_track.channellayout_original
if channel_layout:
channels = float(sum({"LFE": 0.1}.get(position.upper(), 1) for position in channel_layout.split(" ")))
else:
channel_count = primary_audio_track.channel_s or primary_audio_track.channels or 0
channels = float(channel_count)
features = primary_audio_track.format_additionalfeatures or ""
name += f" {AUDIO_CODEC_MAP.get(codec, codec)}{channels:.1f}"
if "JOC" in features or primary_audio_track.joc:
name += " Atmos"
# Video (dynamic range + hfr +) Codec
if primary_video_track:
codec = primary_video_track.format
hdr_format = primary_video_track.hdr_format_commercial
trc = primary_video_track.transfer_characteristics or primary_video_track.transfer_characteristics_original
frame_rate = float(primary_video_track.frame_rate)
if hdr_format:
name += f" {DYNAMIC_RANGE_MAP.get(hdr_format)} "
elif trc and "HLG" in trc:
name += " HLG"
if frame_rate > 30:
name += " HFR"
name += f" {VIDEO_CODEC_MAP.get(codec, codec)}"
if config.tag:
name += f"-{config.tag}"
return sanitize_filename(name)
class Movies(SortedKeyList, ABC):
def __init__(self, iterable: Optional[Iterable] = None):
super().__init__(iterable, key=lambda x: x.year or 0)
def __str__(self) -> str:
if not self:
return super().__str__()
# TODO: Assumes there's only one movie
return self[0].name + (f" ({self[0].year})" if self[0].year else "")
def tree(self, verbose: bool = False) -> Tree:
num_movies = len(self)
tree = Tree(f"{num_movies} Movie{['s', ''][num_movies == 1]}", guide_style="bright_black")
if verbose:
for movie in self:
tree.add(f"[bold]{movie.name}[/] [bright_black]({movie.year or '?'})", guide_style="bright_black")
return tree
__all__ = ("Movie", "Movies")

View File

@@ -0,0 +1,140 @@
from abc import ABC
from typing import Any, Iterable, Optional, Union
from langcodes import Language
from pymediainfo import MediaInfo
from rich.tree import Tree
from sortedcontainers import SortedKeyList
from unshackle.core.config import config
from unshackle.core.constants import AUDIO_CODEC_MAP
from unshackle.core.titles.title import Title
from unshackle.core.utilities import sanitize_filename
class Song(Title):
def __init__(
self,
id_: Any,
service: type,
name: str,
artist: str,
album: str,
track: int,
disc: int,
year: int,
language: Optional[Union[str, Language]] = None,
data: Optional[Any] = None,
) -> None:
super().__init__(id_, service, language, data)
if not name:
raise ValueError("Song name must be provided")
if not isinstance(name, str):
raise TypeError(f"Expected name to be a str, not {name!r}")
if not artist:
raise ValueError("Song artist must be provided")
if not isinstance(artist, str):
raise TypeError(f"Expected artist to be a str, not {artist!r}")
if not album:
raise ValueError("Song album must be provided")
if not isinstance(album, str):
raise TypeError(f"Expected album to be a str, not {name!r}")
if not track:
raise ValueError("Song track must be provided")
if not isinstance(track, int):
raise TypeError(f"Expected track to be an int, not {track!r}")
if not disc:
raise ValueError("Song disc must be provided")
if not isinstance(disc, int):
raise TypeError(f"Expected disc to be an int, not {disc!r}")
if not year:
raise ValueError("Song year must be provided")
if not isinstance(year, int):
raise TypeError(f"Expected year to be an int, not {year!r}")
name = name.strip()
artist = artist.strip()
album = album.strip()
if track <= 0:
raise ValueError(f"Song track cannot be {track}")
if disc <= 0:
raise ValueError(f"Song disc cannot be {disc}")
if year <= 0:
raise ValueError(f"Song year cannot be {year}")
self.name = name
self.artist = artist
self.album = album
self.track = track
self.disc = disc
self.year = year
def __str__(self) -> str:
return "{artist} - {album} ({year}) / {track:02}. {name}".format(
artist=self.artist, album=self.album, year=self.year, track=self.track, name=self.name
).strip()
def get_filename(self, media_info: MediaInfo, folder: bool = False, show_service: bool = True) -> str:
audio_track = next(iter(media_info.audio_tracks), None)
codec = audio_track.format
channel_layout = audio_track.channel_layout or audio_track.channellayout_original
if channel_layout:
channels = float(sum({"LFE": 0.1}.get(position.upper(), 1) for position in channel_layout.split(" ")))
else:
channel_count = audio_track.channel_s or audio_track.channels or 0
channels = float(channel_count)
features = audio_track.format_additionalfeatures or ""
if folder:
# Artist - Album (Year)
name = str(self).split(" / ")[0]
else:
# NN. Song Name
name = str(self).split(" / ")[1]
# Service
if show_service:
name += f" {self.service.__name__}"
# 'WEB-DL'
name += " WEB-DL"
# Audio Codec + Channels (+ feature)
name += f" {AUDIO_CODEC_MAP.get(codec, codec)}{channels:.1f}"
if "JOC" in features or audio_track.joc:
name += " Atmos"
if config.tag:
name += f"-{config.tag}"
return sanitize_filename(name, " ")
class Album(SortedKeyList, ABC):
def __init__(self, iterable: Optional[Iterable] = None):
super().__init__(iterable, key=lambda x: (x.album, x.disc, x.track, x.year or 0))
def __str__(self) -> str:
if not self:
return super().__str__()
return f"{self[0].artist} - {self[0].album} ({self[0].year or '?'})"
def tree(self, verbose: bool = False) -> Tree:
num_songs = len(self)
tree = Tree(f"{num_songs} Song{['s', ''][num_songs == 1]}", guide_style="bright_black")
if verbose:
for song in self:
tree.add(f"[bold]Track {song.track:02}.[/] [bright_black]({song.name})", guide_style="bright_black")
return tree
__all__ = ("Song", "Album")

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
from abc import abstractmethod
from typing import Any, Optional, Union
from langcodes import Language
from pymediainfo import MediaInfo
from unshackle.core.tracks import Tracks
class Title:
def __init__(
self, id_: Any, service: type, language: Optional[Union[str, Language]] = None, data: Optional[Any] = None
) -> None:
"""
Media Title from a Service.
Parameters:
id_: An identifier for this specific title. It must be unique. Can be of any
value.
service: Service class that this title is from.
language: The original recorded language for the title. If that information
is not available, this should not be set to anything.
data: Arbitrary storage for the title. Often used to store extra metadata
information, IDs, URIs, and so on.
"""
if not id_: # includes 0, false, and similar values, this is intended
raise ValueError("A unique ID must be provided")
if hasattr(id_, "__len__") and len(id_) < 4:
raise ValueError("The unique ID is not large enough, clash likely.")
if not service:
raise ValueError("Service class must be provided")
if not isinstance(service, type):
raise TypeError(f"Expected service to be a Class (type), not {service!r}")
if language is not None:
if isinstance(language, str):
language = Language.get(language)
elif not isinstance(language, Language):
raise TypeError(f"Expected language to be a {Language} or str, not {language!r}")
self.id = id_
self.service = service
self.language = language
self.data = data
self.tracks = Tracks()
def __eq__(self, other: Title) -> bool:
return self.id == other.id
@abstractmethod
def get_filename(self, media_info: MediaInfo, folder: bool = False, show_service: bool = True) -> str:
"""
Get a Filename for this Title with the provided Media Info.
All filenames should be sanitized with the sanitize_filename() utility function.
Parameters:
media_info: MediaInfo object of the file this name will be used for.
folder: This filename will be used as a folder name. Some changes may want to
be made if this is the case.
show_service: Show the service tag (e.g., iT, NF) in the filename.
"""
__all__ = ("Title",)

View File

@@ -0,0 +1,10 @@
from .attachment import Attachment
from .audio import Audio
from .chapter import Chapter
from .chapters import Chapters
from .subtitle import Subtitle
from .track import Track
from .tracks import Tracks
from .video import Video
__all__ = ("Audio", "Attachment", "Chapter", "Chapters", "Subtitle", "Track", "Tracks", "Video")

View File

@@ -0,0 +1,146 @@
from __future__ import annotations
import mimetypes
import os
from pathlib import Path
from typing import Optional, Union
from urllib.parse import urlparse
from zlib import crc32
import requests
from unshackle.core.config import config
class Attachment:
def __init__(
self,
path: Union[Path, str, None] = None,
url: Optional[str] = None,
name: Optional[str] = None,
mime_type: Optional[str] = None,
description: Optional[str] = None,
session: Optional[requests.Session] = None,
):
"""
Create a new Attachment.
If providing a path, the file must already exist.
If providing a URL, the file will be downloaded to the temp directory.
Either path or url must be provided.
If name is not provided it will use the file name (without extension).
If mime_type is not provided, it will try to guess it.
Args:
path: Path to an existing file.
url: URL to download the attachment from.
name: Name of the attachment.
mime_type: MIME type of the attachment.
description: Description of the attachment.
session: Optional requests session to use for downloading.
"""
if path is None and url is None:
raise ValueError("Either path or url must be provided.")
if url:
if not isinstance(url, str):
raise ValueError("The attachment URL must be a string.")
# If a URL is provided, download the file to the temp directory
parsed_url = urlparse(url)
file_name = os.path.basename(parsed_url.path) or "attachment"
# Use provided name for the file if available
if name:
file_name = f"{name.replace(' ', '_')}{os.path.splitext(file_name)[1]}"
download_path = config.directories.temp / file_name
# Download the file
try:
session = session or requests.Session()
response = session.get(url, stream=True)
response.raise_for_status()
download_path.parent.mkdir(parents=True, exist_ok=True)
with open(download_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
path = download_path
except Exception as e:
raise ValueError(f"Failed to download attachment from URL: {e}")
if not isinstance(path, (str, Path)):
raise ValueError("The attachment path must be provided.")
path = Path(path)
if not path.exists():
raise ValueError("The attachment file does not exist.")
name = (name or path.stem).strip()
mime_type = (mime_type or "").strip() or None
description = (description or "").strip() or None
if not mime_type:
mime_type = {
".ttf": "application/x-truetype-font",
".otf": "application/vnd.ms-opentype",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
}.get(path.suffix.lower(), mimetypes.guess_type(path)[0])
if not mime_type:
raise ValueError("The attachment mime-type could not be automatically detected.")
self.path = path
self.name = name
self.mime_type = mime_type
self.description = description
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__, items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __str__(self) -> str:
return " | ".join(filter(bool, ["ATT", self.name, self.mime_type, self.description]))
@property
def id(self) -> str:
"""Compute an ID from the attachment data."""
checksum = crc32(self.path.read_bytes())
return hex(checksum)
def delete(self) -> None:
if self.path:
self.path.unlink()
self.path = None
@classmethod
def from_url(
cls,
url: str,
name: Optional[str] = None,
mime_type: Optional[str] = None,
description: Optional[str] = None,
session: Optional[requests.Session] = None,
) -> "Attachment":
"""
Create an attachment from a URL.
Args:
url: URL to download the attachment from.
name: Name of the attachment.
mime_type: MIME type of the attachment.
description: Description of the attachment.
session: Optional requests session to use for downloading.
Returns:
Attachment: A new attachment instance.
"""
return cls(url=url, name=name, mime_type=mime_type, description=description, session=session)
__all__ = ("Attachment",)

View File

@@ -0,0 +1,188 @@
from __future__ import annotations
import math
from enum import Enum
from typing import Any, Optional, Union
from unshackle.core.tracks.track import Track
class Audio(Track):
class Codec(str, Enum):
AAC = "AAC" # https://wikipedia.org/wiki/Advanced_Audio_Coding
AC3 = "DD" # https://wikipedia.org/wiki/Dolby_Digital
EC3 = "DD+" # https://wikipedia.org/wiki/Dolby_Digital_Plus
OPUS = "OPUS" # https://wikipedia.org/wiki/Opus_(audio_format)
OGG = "VORB" # https://wikipedia.org/wiki/Vorbis
DTS = "DTS" # https://en.wikipedia.org/wiki/DTS_(company)#DTS_Digital_Surround
ALAC = "ALAC" # https://en.wikipedia.org/wiki/Apple_Lossless_Audio_Codec
FLAC = "FLAC" # https://en.wikipedia.org/wiki/FLAC
@property
def extension(self) -> str:
return self.name.lower()
@staticmethod
def from_mime(mime: str) -> Audio.Codec:
mime = mime.lower().strip().split(".")[0]
if mime == "mp4a":
return Audio.Codec.AAC
if mime == "ac-3":
return Audio.Codec.AC3
if mime == "ec-3":
return Audio.Codec.EC3
if mime == "opus":
return Audio.Codec.OPUS
if mime == "dtsc":
return Audio.Codec.DTS
if mime == "alac":
return Audio.Codec.ALAC
if mime == "flac":
return Audio.Codec.FLAC
raise ValueError(f"The MIME '{mime}' is not a supported Audio Codec")
@staticmethod
def from_codecs(codecs: str) -> Audio.Codec:
for codec in codecs.lower().split(","):
mime = codec.strip().split(".")[0]
try:
return Audio.Codec.from_mime(mime)
except ValueError:
pass
raise ValueError(f"No MIME types matched any supported Audio Codecs in '{codecs}'")
@staticmethod
def from_netflix_profile(profile: str) -> Audio.Codec:
profile = profile.lower().strip()
if profile.startswith("heaac"):
return Audio.Codec.AAC
if profile.startswith("dd-"):
return Audio.Codec.AC3
if profile.startswith("ddplus"):
return Audio.Codec.EC3
if profile.startswith("playready-oggvorbis"):
return Audio.Codec.OGG
raise ValueError(f"The Content Profile '{profile}' is not a supported Audio Codec")
def __init__(
self,
*args: Any,
codec: Optional[Audio.Codec] = None,
bitrate: Optional[Union[str, int, float]] = None,
channels: Optional[Union[str, int, float]] = None,
joc: Optional[int] = None,
descriptive: Union[bool, int] = False,
**kwargs: Any,
):
"""
Create a new Audio track object.
Parameters:
codec: An Audio.Codec enum representing the audio codec.
If not specified, MediaInfo will be used to retrieve the codec
once the track has been downloaded.
bitrate: A number or float representing the average bandwidth in bytes/s.
Float values are rounded up to the nearest integer.
channels: A number, float, or string representing the number of audio channels.
Strings may represent numbers or floats. Expanded layouts like 7.1.1 is
not supported. All numbers and strings will be cast to float.
joc: The number of Joint-Object-Coding Channels/Objects in the audio stream.
descriptive: Mark this audio as being descriptive audio for the blind.
Note: If codec, bitrate, channels, or joc is not specified some checks may be
skipped or assume a value. Specifying as much information as possible is highly
recommended.
"""
super().__init__(*args, **kwargs)
if not isinstance(codec, (Audio.Codec, type(None))):
raise TypeError(f"Expected codec to be a {Audio.Codec}, not {codec!r}")
if not isinstance(bitrate, (str, int, float, type(None))):
raise TypeError(f"Expected bitrate to be a {str}, {int}, or {float}, not {bitrate!r}")
if not isinstance(channels, (str, int, float, type(None))):
raise TypeError(f"Expected channels to be a {str}, {int}, or {float}, not {channels!r}")
if not isinstance(joc, (int, type(None))):
raise TypeError(f"Expected joc to be a {int}, not {joc!r}")
if not isinstance(descriptive, (bool, int)) or (isinstance(descriptive, int) and descriptive not in (0, 1)):
raise TypeError(f"Expected descriptive to be a {bool} or bool-like {int}, not {descriptive!r}")
self.codec = codec
try:
self.bitrate = int(math.ceil(float(bitrate))) if bitrate else None
except (ValueError, TypeError) as e:
raise ValueError(f"Expected bitrate to be a number or float, {e}")
try:
self.channels = self.parse_channels(channels) if channels else None
except (ValueError, NotImplementedError) as e:
raise ValueError(f"Expected channels to be a number, float, or a string, {e}")
self.joc = joc
self.descriptive = bool(descriptive)
@property
def atmos(self) -> bool:
"""Return True if the audio track contains Dolby Atmos."""
return bool(self.joc)
def __str__(self) -> str:
return " | ".join(
filter(
bool,
[
"AUD",
f"[{self.codec.value}]" if self.codec else None,
str(self.language),
", ".join(
filter(
bool,
[
str(self.channels) if self.channels else None,
"Atmos" if self.atmos else None,
f"JOC {self.joc}" if self.joc else None,
],
)
),
f"{self.bitrate // 1000} kb/s" if self.bitrate else None,
self.get_track_name(),
self.edition,
],
)
)
@staticmethod
def parse_channels(channels: Union[str, int, float]) -> float:
"""
Converts a Channel string to a float representing audio channel count and layout.
E.g. "3" -> "3.0", "2.1" -> "2.1", ".1" -> "0.1".
This does not validate channel strings as genuine channel counts or valid layouts.
It does not convert the value to assume a sub speaker channel layout, e.g. 5.1->6.0.
It also does not support expanded surround sound channel layout strings like 7.1.2.
"""
if isinstance(channels, str):
# TODO: Support all possible DASH channel configurations (https://datatracker.ietf.org/doc/html/rfc8216)
if channels.upper() == "A000":
return 2.0
elif channels.upper() == "F801":
return 5.1
elif channels.replace("ch", "").replace(".", "", 1).isdigit():
# e.g., '2ch', '2', '2.0', '5.1ch', '5.1'
return float(channels.replace("ch", ""))
raise NotImplementedError(f"Unsupported Channels string value, '{channels}'")
return float(channels)
def get_track_name(self) -> Optional[str]:
"""Return the base Track Name."""
track_name = super().get_track_name() or ""
flag = self.descriptive and "Descriptive"
if flag:
if track_name:
flag = f" ({flag})"
track_name += flag
return track_name or None
__all__ = ("Audio",)

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
import re
from typing import Optional, Union
from zlib import crc32
TIMESTAMP_FORMAT = re.compile(r"^(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(?P<ms>.\d{3}|)$")
class Chapter:
def __init__(self, timestamp: Union[str, int, float], name: Optional[str] = None):
"""
Create a new Chapter with a Timestamp and optional name.
The timestamp may be in the following formats:
- "HH:MM:SS" string, e.g., `25:05:23`.
- "HH:MM:SS.mss" string, e.g., `25:05:23.120`.
- a timecode integer in milliseconds, e.g., `90323120` is `25:05:23.120`.
- a timecode float in seconds, e.g., `90323.12` is `25:05:23.120`.
If you have a timecode integer in seconds, just multiply it by 1000.
If you have a timecode float in milliseconds (no decimal value), just convert
it to an integer.
"""
if timestamp is None:
raise ValueError("The timestamp must be provided.")
if not isinstance(timestamp, (str, int, float)):
raise TypeError(f"Expected timestamp to be {str}, {int} or {float}, not {type(timestamp)}")
if not isinstance(name, (str, type(None))):
raise TypeError(f"Expected name to be {str}, not {type(name)}")
if not isinstance(timestamp, str):
if isinstance(timestamp, int): # ms
hours, remainder = divmod(timestamp, 1000 * 60 * 60)
minutes, remainder = divmod(remainder, 1000 * 60)
seconds, ms = divmod(remainder, 1000)
elif isinstance(timestamp, float): # seconds.ms
hours, remainder = divmod(timestamp, 60 * 60)
minutes, remainder = divmod(remainder, 60)
seconds, ms = divmod(int(remainder * 1000), 1000)
else:
raise TypeError
timestamp = f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}.{str(ms).zfill(3)[:3]}"
timestamp_m = TIMESTAMP_FORMAT.match(timestamp)
if not timestamp_m:
raise ValueError(f"The timestamp format is invalid: {timestamp}")
hour, minute, second, ms = timestamp_m.groups()
if not ms:
timestamp += ".000"
self.timestamp = timestamp
self.name = name
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__, items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __str__(self) -> str:
return " | ".join(filter(bool, ["CHP", self.timestamp, self.name]))
@property
def id(self) -> str:
"""Compute an ID from the Chapter data."""
checksum = crc32(str(self).encode("utf8"))
return hex(checksum)
@property
def named(self) -> bool:
"""Check if Chapter is named."""
return bool(self.name)
__all__ = ("Chapter",)

View File

@@ -0,0 +1,144 @@
from __future__ import annotations
import re
from abc import ABC
from pathlib import Path
from typing import Any, Iterable, Optional, Union
from zlib import crc32
from sortedcontainers import SortedKeyList
from unshackle.core.tracks import Chapter
OGM_SIMPLE_LINE_1_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)=(?P<timestamp>\d{2,}:\d{2}:\d{2}\.\d{3})$")
OGM_SIMPLE_LINE_2_FORMAT = re.compile(r"^CHAPTER(?P<number>\d+)NAME=(?P<name>.*)$")
class Chapters(SortedKeyList, ABC):
def __init__(self, iterable: Optional[Iterable[Chapter]] = None):
super().__init__(key=lambda x: x.timestamp or 0)
for chapter in iterable or []:
self.add(chapter)
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__, items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __str__(self) -> str:
return "\n".join(
[
" | ".join(filter(bool, ["CHP", f"[{i:02}]", chapter.timestamp, chapter.name]))
for i, chapter in enumerate(self, start=1)
]
)
@classmethod
def loads(cls, data: str) -> Chapters:
"""Load chapter data from a string."""
lines = [line.strip() for line in data.strip().splitlines(keepends=False)]
if len(lines) % 2 != 0:
raise ValueError("The number of chapter lines must be even.")
chapters = []
for line_1, line_2 in zip(lines[::2], lines[1::2]):
line_1_match = OGM_SIMPLE_LINE_1_FORMAT.match(line_1)
if not line_1_match:
raise SyntaxError(f"An unexpected syntax error occurred on: {line_1}")
line_2_match = OGM_SIMPLE_LINE_2_FORMAT.match(line_2)
if not line_2_match:
raise SyntaxError(f"An unexpected syntax error occurred on: {line_2}")
line_1_number, timestamp = line_1_match.groups()
line_2_number, name = line_2_match.groups()
if line_1_number != line_2_number:
raise SyntaxError(
f"The chapter numbers {line_1_number} and {line_2_number} do not match on:\n{line_1}\n{line_2}"
)
if not timestamp:
raise SyntaxError(f"The timestamp is missing on: {line_1}")
chapters.append(Chapter(timestamp, name))
return cls(chapters)
@classmethod
def load(cls, path: Union[Path, str]) -> Chapters:
"""Load chapter data from a file."""
if isinstance(path, str):
path = Path(path)
return cls.loads(path.read_text(encoding="utf8"))
def dumps(self, fallback_name: str = "") -> str:
"""
Return chapter data in OGM-based Simple Chapter format.
https://mkvtoolnix.download/doc/mkvmerge.html#mkvmerge.chapters.simple
Parameters:
fallback_name: Name used for Chapters without a Name set.
The fallback name can use the following variables in f-string style:
- {i}: The Chapter number starting at 1.
E.g., `"Chapter {i}"`: "Chapter 1", "Intro", "Chapter 3".
- {j}: A number starting at 1 that increments any time a Chapter has no name.
E.g., `"Chapter {j}"`: "Chapter 1", "Intro", "Chapter 2".
These are formatted with f-strings, directives are supported.
For example, `"Chapter {i:02}"` will result in `"Chapter 01"`.
"""
chapters = []
j = 0
for i, chapter in enumerate(self, start=1):
if not chapter.name:
j += 1
chapters.append(
"CHAPTER{num}={time}\nCHAPTER{num}NAME={name}".format(
num=f"{i:02}", time=chapter.timestamp, name=chapter.name or fallback_name.format(i=i, j=j)
)
)
return "\n".join(chapters)
def dump(self, path: Union[Path, str], *args: Any, **kwargs: Any) -> int:
"""
Write chapter data in OGM-based Simple Chapter format to a file.
Parameters:
path: The file path to write the Chapter data to, overwriting
any existing data.
See `Chapters.dumps` for more parameter documentation.
"""
if isinstance(path, str):
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
ogm_text = self.dumps(*args, **kwargs)
return path.write_text(ogm_text, encoding="utf8")
def add(self, value: Chapter) -> None:
if not isinstance(value, Chapter):
raise TypeError(f"Can only add {Chapter} objects, not {type(value)}")
if any(chapter.timestamp == value.timestamp for chapter in self):
raise ValueError(f"A Chapter with the Timestamp {value.timestamp} already exists")
super().add(value)
if not any(chapter.timestamp == "00:00:00.000" for chapter in self):
self.add(Chapter(0))
@property
def id(self) -> str:
"""Compute an ID from the Chapter data."""
checksum = crc32("\n".join([chapter.id for chapter in self]).encode("utf8"))
return hex(checksum)
__all__ = ("Chapters", "Chapter")

View File

@@ -0,0 +1,726 @@
from __future__ import annotations
import re
import subprocess
from collections import defaultdict
from enum import Enum
from functools import partial
from io import BytesIO
from pathlib import Path
from typing import Any, Callable, Iterable, Optional, Union
import pycaption
import requests
from construct import Container
from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader
from pycaption.geometry import Layout
from pymp4.parser import MP4
from subtitle_filter import Subtitles
from unshackle.core import binaries
from unshackle.core.tracks.track import Track
from unshackle.core.utilities import try_ensure_utf8
from unshackle.core.utils.webvtt import merge_segmented_webvtt
class Subtitle(Track):
class Codec(str, Enum):
SubRip = "SRT" # https://wikipedia.org/wiki/SubRip
SubStationAlpha = "SSA" # https://wikipedia.org/wiki/SubStation_Alpha
SubStationAlphav4 = "ASS" # https://wikipedia.org/wiki/SubStation_Alpha#Advanced_SubStation_Alpha=
TimedTextMarkupLang = "TTML" # https://wikipedia.org/wiki/Timed_Text_Markup_Language
WebVTT = "VTT" # https://wikipedia.org/wiki/WebVTT
# MPEG-DASH box-encapsulated subtitle formats
fTTML = "STPP" # https://www.w3.org/TR/2018/REC-ttml-imsc1.0.1-20180424
fVTT = "WVTT" # https://www.w3.org/TR/webvtt1
@property
def extension(self) -> str:
return self.value.lower()
@staticmethod
def from_mime(mime: str) -> Subtitle.Codec:
mime = mime.lower().strip().split(".")[0]
if mime == "srt":
return Subtitle.Codec.SubRip
elif mime == "ssa":
return Subtitle.Codec.SubStationAlpha
elif mime == "ass":
return Subtitle.Codec.SubStationAlphav4
elif mime == "ttml":
return Subtitle.Codec.TimedTextMarkupLang
elif mime == "vtt":
return Subtitle.Codec.WebVTT
elif mime == "stpp":
return Subtitle.Codec.fTTML
elif mime == "wvtt":
return Subtitle.Codec.fVTT
raise ValueError(f"The MIME '{mime}' is not a supported Subtitle Codec")
@staticmethod
def from_codecs(codecs: str) -> Subtitle.Codec:
for codec in codecs.lower().split(","):
mime = codec.strip().split(".")[0]
try:
return Subtitle.Codec.from_mime(mime)
except ValueError:
pass
raise ValueError(f"No MIME types matched any supported Subtitle Codecs in '{codecs}'")
@staticmethod
def from_netflix_profile(profile: str) -> Subtitle.Codec:
profile = profile.lower().strip()
if profile.startswith("webvtt"):
return Subtitle.Codec.WebVTT
if profile.startswith("dfxp"):
return Subtitle.Codec.TimedTextMarkupLang
raise ValueError(f"The Content Profile '{profile}' is not a supported Subtitle Codec")
def __init__(
self,
*args: Any,
codec: Optional[Subtitle.Codec] = None,
cc: bool = False,
sdh: bool = False,
forced: bool = False,
**kwargs: Any,
):
"""
Create a new Subtitle track object.
Parameters:
codec: A Subtitle.Codec enum representing the subtitle format.
If not specified, MediaInfo will be used to retrieve the format
once the track has been downloaded.
cc: Closed Caption.
- Intended as if you couldn't hear the audio at all.
- Can have Sound as well as Dialogue, but doesn't have to.
- Original source would be from an EIA-CC encoded stream. Typically all
upper-case characters.
Indicators of it being CC without knowing original source:
- Extracted with CCExtractor, or
- >>> (or similar) being used at the start of some or all lines, or
- All text is uppercase or at least the majority, or
- Subtitles are Scrolling-text style (one line appears, oldest line
then disappears).
Just because you downloaded it as a SRT or VTT or such, doesn't mean it
isn't from an EIA-CC stream. And I wouldn't take the streaming services
(CC) as gospel either as they tend to get it wrong too.
sdh: Deaf or Hard-of-Hearing. Also known as HOH in the UK (EU?).
- Intended as if you couldn't hear the audio at all.
- MUST have Sound as well as Dialogue to be considered SDH.
- It has no "syntax" or "format" but is not transmitted using archaic
forms like EIA-CC streams, would be intended for transmission via
SubRip (SRT), WebVTT (VTT), TTML, etc.
If you can see important audio/sound transcriptions and not just dialogue
and it doesn't have the indicators of CC, then it's most likely SDH.
If it doesn't have important audio/sounds transcriptions it might just be
regular subtitling (you wouldn't mark as CC or SDH). This would be the
case for most translation subtitles. Like Anime for example.
forced: Typically used if there's important information at some point in time
like watching Dubbed content and an important Sign or Letter is shown
or someone talking in a different language.
Forced tracks are recommended by the Matroska Spec to be played if
the player's current playback audio language matches a subtitle
marked as "forced".
However, that doesn't mean every player works like this but there is
no other way to reliably work with Forced subtitles where multiple
forced subtitles may be in the output file. Just know what to expect
with "forced" subtitles.
Note: If codec is not specified some checks may be skipped or assume a value.
Specifying as much information as possible is highly recommended.
Information on Subtitle Types:
https://bit.ly/2Oe4fLC (3PlayMedia Blog on SUB vs CC vs SDH).
However, I wouldn't pay much attention to the claims about SDH needing to
be in the original source language. It's logically not true.
CC == Closed Captions. Source: Basically every site.
SDH = Subtitles for the Deaf or Hard-of-Hearing. Source: Basically every site.
HOH = Exact same as SDH. Is a term used in the UK. Source: https://bit.ly/2PGJatz (ICO UK)
More in-depth information, examples, and stuff to look for can be found in the Parameter
explanation list above.
"""
super().__init__(*args, **kwargs)
if not isinstance(codec, (Subtitle.Codec, type(None))):
raise TypeError(f"Expected codec to be a {Subtitle.Codec}, not {codec!r}")
if not isinstance(cc, (bool, int)) or (isinstance(cc, int) and cc not in (0, 1)):
raise TypeError(f"Expected cc to be a {bool} or bool-like {int}, not {cc!r}")
if not isinstance(sdh, (bool, int)) or (isinstance(sdh, int) and sdh not in (0, 1)):
raise TypeError(f"Expected sdh to be a {bool} or bool-like {int}, not {sdh!r}")
if not isinstance(forced, (bool, int)) or (isinstance(forced, int) and forced not in (0, 1)):
raise TypeError(f"Expected forced to be a {bool} or bool-like {int}, not {forced!r}")
self.codec = codec
self.cc = bool(cc)
self.sdh = bool(sdh)
self.forced = bool(forced)
if self.cc and self.sdh:
raise ValueError("A text track cannot be both CC and SDH.")
if self.forced and (self.cc or self.sdh):
raise ValueError("A text track cannot be CC/SDH as well as Forced.")
# TODO: Migrate to new event observer system
# Called after Track has been converted to another format
self.OnConverted: Optional[Callable[[Subtitle.Codec], None]] = None
def __str__(self) -> str:
return " | ".join(
filter(
bool,
["SUB", f"[{self.codec.value}]" if self.codec else None, str(self.language), self.get_track_name()],
)
)
def get_track_name(self) -> Optional[str]:
"""Return the base Track Name."""
track_name = super().get_track_name() or ""
flag = self.cc and "CC" or self.sdh and "SDH" or self.forced and "Forced"
if flag:
if track_name:
flag = f" ({flag})"
track_name += flag
return track_name or None
def download(
self,
session: requests.Session,
prepare_drm: partial,
max_workers: Optional[int] = None,
progress: Optional[partial] = None,
*,
cdm: Optional[object] = None,
):
super().download(session, prepare_drm, max_workers, progress, cdm=cdm)
if not self.path:
return
if self.codec == Subtitle.Codec.fTTML:
self.convert(Subtitle.Codec.TimedTextMarkupLang)
elif self.codec == Subtitle.Codec.fVTT:
self.convert(Subtitle.Codec.WebVTT)
elif self.codec == Subtitle.Codec.WebVTT:
text = self.path.read_text("utf8")
if self.descriptor == Track.Descriptor.DASH:
if len(self.data["dash"]["segment_durations"]) > 1:
text = merge_segmented_webvtt(
text,
segment_durations=self.data["dash"]["segment_durations"],
timescale=self.data["dash"]["timescale"],
)
elif self.descriptor == Track.Descriptor.HLS:
if len(self.data["hls"]["segment_durations"]) > 1:
text = merge_segmented_webvtt(
text,
segment_durations=self.data["hls"]["segment_durations"],
timescale=1, # ?
)
# Sanitize WebVTT timestamps before parsing
text = Subtitle.sanitize_webvtt_timestamps(text)
try:
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
self.path.write_text(subtitle_text, encoding="utf8")
except pycaption.exceptions.CaptionReadSyntaxError:
# If first attempt fails, try more aggressive sanitization
text = Subtitle.sanitize_webvtt(text)
try:
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)
self.path.write_text(subtitle_text, encoding="utf8")
except Exception:
# Keep the sanitized version even if parsing failed
self.path.write_text(text, encoding="utf8")
@staticmethod
def sanitize_webvtt_timestamps(text: str) -> str:
"""
Fix invalid timestamps in WebVTT files, particularly negative timestamps.
Parameters:
text: The WebVTT content as string
Returns:
Sanitized WebVTT content
"""
# Replace negative timestamps with 00:00:00.000
return re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", text)
@staticmethod
def sanitize_webvtt(text: str) -> str:
"""
More thorough sanitization of WebVTT files to handle multiple potential issues.
Parameters:
text: The WebVTT content as string
Returns:
Sanitized WebVTT content
"""
# Make sure we have a proper WEBVTT header
if not text.strip().startswith("WEBVTT"):
text = "WEBVTT\n\n" + text
lines = text.split("\n")
sanitized_lines = []
timestamp_pattern = re.compile(r"^((?:\d+:)?\d+:\d+\.\d+)\s+-->\s+((?:\d+:)?\d+:\d+\.\d+)")
# Skip invalid headers - keep only WEBVTT
header_done = False
for line in lines:
if not header_done:
if line.startswith("WEBVTT"):
sanitized_lines.append("WEBVTT")
header_done = True
continue
# Replace negative timestamps
if "-" in line and "-->" in line:
line = re.sub(r"(-\d+:\d+:\d+\.\d+)", "00:00:00.000", line)
# Validate timestamp format
match = timestamp_pattern.match(line)
if match:
start_time = match.group(1)
end_time = match.group(2)
# Ensure proper format with hours if missing
if start_time.count(":") == 1:
start_time = f"00:{start_time}"
if end_time.count(":") == 1:
end_time = f"00:{end_time}"
line = f"{start_time} --> {end_time}"
sanitized_lines.append(line)
return "\n".join(sanitized_lines)
def convert(self, codec: Subtitle.Codec) -> Path:
"""
Convert this Subtitle to another Format.
The file path location of the Subtitle data will be kept at the same
location but the file extension will be changed appropriately.
Supported formats:
- SubRip - SubtitleEdit or pycaption.SRTWriter
- TimedTextMarkupLang - SubtitleEdit or pycaption.DFXPWriter
- WebVTT - SubtitleEdit or pycaption.WebVTTWriter
- SubStationAlphav4 - SubtitleEdit
- fTTML* - custom code using some pycaption functions
- fVTT* - custom code using some pycaption functions
*: Can read from format, but cannot convert to format
Note: It currently prioritizes using SubtitleEdit over PyCaption as
I have personally noticed more oddities with PyCaption parsing over
SubtitleEdit. Especially when working with TTML/DFXP where it would
often have timecodes and stuff mixed in/duplicated.
Returns the new file path of the Subtitle.
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")
if self.codec == codec:
return self.path
output_path = self.path.with_suffix(f".{codec.value.lower()}")
original_path = self.path
if binaries.SubtitleEdit and self.codec not in (Subtitle.Codec.fTTML, Subtitle.Codec.fVTT):
sub_edit_format = {
Subtitle.Codec.SubStationAlphav4: "AdvancedSubStationAlpha",
Subtitle.Codec.TimedTextMarkupLang: "TimedText1.0",
}.get(codec, codec.name)
sub_edit_args = [
binaries.SubtitleEdit,
"/Convert",
self.path,
sub_edit_format,
f"/outputfilename:{output_path.name}",
"/encoding:utf8",
]
if codec == Subtitle.Codec.SubRip:
sub_edit_args.append("/ConvertColorsToDialog")
subprocess.run(sub_edit_args, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
else:
writer = {
# pycaption generally only supports these subtitle formats
Subtitle.Codec.SubRip: pycaption.SRTWriter,
Subtitle.Codec.TimedTextMarkupLang: pycaption.DFXPWriter,
Subtitle.Codec.WebVTT: pycaption.WebVTTWriter,
}.get(codec)
if writer is None:
raise NotImplementedError(f"Cannot yet convert {self.codec.name} to {codec.name}.")
caption_set = self.parse(self.path.read_bytes(), self.codec)
Subtitle.merge_same_cues(caption_set)
subtitle_text = writer().write(caption_set)
output_path.write_text(subtitle_text, encoding="utf8")
if original_path.exists() and original_path != output_path:
original_path.unlink()
self.path = output_path
self.codec = codec
if callable(self.OnConverted):
self.OnConverted(codec)
return output_path
@staticmethod
def parse(data: bytes, codec: Subtitle.Codec) -> pycaption.CaptionSet:
if not isinstance(data, bytes):
raise ValueError(f"Subtitle data must be parsed as bytes data, not {type(data).__name__}")
try:
if codec == Subtitle.Codec.SubRip:
text = try_ensure_utf8(data).decode("utf8")
caption_set = pycaption.SRTReader().read(text)
elif codec == Subtitle.Codec.fTTML:
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
for segment in (
Subtitle.parse(box.data, Subtitle.Codec.TimedTextMarkupLang)
for box in MP4.parse_stream(BytesIO(data))
if box.type == b"mdat"
):
for lang in segment.get_languages():
caption_lists[lang].extend(segment.get_captions(lang))
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
elif codec == Subtitle.Codec.TimedTextMarkupLang:
text = try_ensure_utf8(data).decode("utf8")
text = text.replace("tt:", "")
# negative size values aren't allowed in TTML/DFXP spec, replace with 0
text = re.sub(r'"(-\d+(\.\d+)?(px|em|%|c|pt))"', '"0"', text)
caption_set = pycaption.DFXPReader().read(text)
elif codec == Subtitle.Codec.fVTT:
caption_lists: dict[str, pycaption.CaptionList] = defaultdict(pycaption.CaptionList)
caption_list, language = Subtitle.merge_segmented_wvtt(data)
caption_lists[language] = caption_list
caption_set: pycaption.CaptionSet = pycaption.CaptionSet(caption_lists)
elif codec == Subtitle.Codec.WebVTT:
text = try_ensure_utf8(data).decode("utf8")
text = Subtitle.sanitize_broken_webvtt(text)
text = Subtitle.space_webvtt_headers(text)
caption_set = pycaption.WebVTTReader().read(text)
else:
raise ValueError(f'Unknown Subtitle format "{codec}"...')
except pycaption.exceptions.CaptionReadSyntaxError as e:
raise SyntaxError(f'A syntax error has occurred when reading the "{codec}" subtitle: {e}')
except pycaption.exceptions.CaptionReadNoCaptions:
return pycaption.CaptionSet({"en": []})
# remove empty caption lists or some code breaks, especially if it's the first list
for language in caption_set.get_languages():
if not caption_set.get_captions(language):
# noinspection PyProtectedMember
del caption_set._captions[language]
return caption_set
@staticmethod
def sanitize_broken_webvtt(text: str) -> str:
"""
Remove or fix corrupted WebVTT lines, particularly those with invalid timestamps.
Parameters:
text: The WebVTT content as string
Returns:
Sanitized WebVTT content with corrupted lines removed
"""
lines = text.splitlines()
sanitized_lines = []
i = 0
while i < len(lines):
# Skip empty lines
if not lines[i].strip():
sanitized_lines.append(lines[i])
i += 1
continue
# Check for timestamp lines
if "-->" in lines[i]:
# Validate timestamp format
timestamp_parts = lines[i].split("-->")
if len(timestamp_parts) != 2 or not timestamp_parts[1].strip() or timestamp_parts[1].strip() == "0":
# Skip this timestamp and its content until next timestamp or end
j = i + 1
while j < len(lines) and "-->" not in lines[j] and lines[j].strip():
j += 1
i = j
continue
# Add valid timestamp line
sanitized_lines.append(lines[i])
else:
# Add non-timestamp line
sanitized_lines.append(lines[i])
i += 1
return "\n".join(sanitized_lines)
@staticmethod
def space_webvtt_headers(data: Union[str, bytes]):
"""
Space out the WEBVTT Headers from Captions.
Segmented VTT when merged may have the WEBVTT headers part of the next caption
as they were not separated far enough from the previous caption and ended up
being considered as caption text rather than the header for the next segment.
"""
if isinstance(data, bytes):
data = try_ensure_utf8(data).decode("utf8")
elif not isinstance(data, str):
raise ValueError(f"Expecting data to be a str, not {data!r}")
text = (
data.replace("WEBVTT", "\n\nWEBVTT").replace("\r", "").replace("\n\n\n", "\n \n\n").replace("\n\n<", "\n<")
)
return text
@staticmethod
def merge_same_cues(caption_set: pycaption.CaptionSet):
"""Merge captions with the same timecodes and text as one in-place."""
for lang in caption_set.get_languages():
captions = caption_set.get_captions(lang)
last_caption = None
concurrent_captions = pycaption.CaptionList()
merged_captions = pycaption.CaptionList()
for caption in captions:
if last_caption:
if (caption.start, caption.end) == (last_caption.start, last_caption.end):
if caption.get_text() != last_caption.get_text():
concurrent_captions.append(caption)
last_caption = caption
continue
else:
merged_captions.append(pycaption.base.merge(concurrent_captions))
concurrent_captions = [caption]
last_caption = caption
if concurrent_captions:
merged_captions.append(pycaption.base.merge(concurrent_captions))
if merged_captions:
caption_set.set_captions(lang, merged_captions)
@staticmethod
def merge_segmented_wvtt(data: bytes, period_start: float = 0.0) -> tuple[CaptionList, Optional[str]]:
"""
Convert Segmented DASH WebVTT cues into a pycaption Caption List.
Also returns an ISO 639-2 alpha-3 language code if available.
Code ported originally by xhlove to Python from shaka-player.
Has since been improved upon by rlaphoenix using pymp4 and
pycaption functions.
"""
captions = CaptionList()
# init:
saw_wvtt_box = False
timescale = None
language = None
# media:
# > tfhd
default_duration = None
# > tfdt
saw_tfdt_box = False
base_time = 0
# > trun
saw_trun_box = False
samples = []
def flatten_boxes(box: Container) -> Iterable[Container]:
for child in box:
if hasattr(child, "children"):
yield from flatten_boxes(child.children)
del child["children"]
if hasattr(child, "entries"):
yield from flatten_boxes(child.entries)
del child["entries"]
# some boxes (mainly within 'entries') uses format not type
child["type"] = child.get("type") or child.get("format")
yield child
for box in flatten_boxes(MP4.parse_stream(BytesIO(data))):
# init
if box.type == b"mdhd":
timescale = box.timescale
language = box.language
if box.type == b"wvtt":
saw_wvtt_box = True
# media
if box.type == b"styp":
# essentially the start of each segment
# media var resets
# > tfhd
default_duration = None
# > tfdt
saw_tfdt_box = False
base_time = 0
# > trun
saw_trun_box = False
samples = []
if box.type == b"tfhd":
if box.flags.default_sample_duration_present:
default_duration = box.default_sample_duration
if box.type == b"tfdt":
saw_tfdt_box = True
base_time = box.baseMediaDecodeTime
if box.type == b"trun":
saw_trun_box = True
samples = box.sample_info
if box.type == b"mdat":
if not timescale:
raise ValueError("Timescale was not found in the Segmented WebVTT.")
if not saw_wvtt_box:
raise ValueError("The WVTT box was not found in the Segmented WebVTT.")
if not saw_tfdt_box:
raise ValueError("The TFDT box was not found in the Segmented WebVTT.")
if not saw_trun_box:
raise ValueError("The TRUN box was not found in the Segmented WebVTT.")
vttc_boxes = MP4.parse_stream(BytesIO(box.data))
current_time = base_time + period_start
for sample, vttc_box in zip(samples, vttc_boxes):
duration = sample.sample_duration or default_duration
if sample.sample_composition_time_offsets:
current_time += sample.sample_composition_time_offsets
start_time = current_time
end_time = current_time + (duration or 0)
current_time = end_time
if vttc_box.type == b"vtte":
# vtte is a vttc that's empty, skip
continue
layout: Optional[Layout] = None
nodes: list[CaptionNode] = []
for cue_box in vttc_box.children:
if cue_box.type == b"vsid":
# this is a V(?) Source ID box, we don't care
continue
if cue_box.type == b"sttg":
layout = Layout(webvtt_positioning=cue_box.settings)
elif cue_box.type == b"payl":
nodes.extend(
[
node
for line in cue_box.cue_text.split("\n")
for node in [
CaptionNode.create_text(WebVTTReader()._decode(line)),
CaptionNode.create_break(),
]
]
)
nodes.pop()
if nodes:
caption = Caption(
start=start_time * timescale, # as microseconds
end=end_time * timescale,
nodes=nodes,
layout_info=layout,
)
p_caption = captions[-1] if captions else None
if p_caption and caption.start == p_caption.end and str(caption.nodes) == str(p_caption.nodes):
# it's a duplicate, but lets take its end time
p_caption.end = caption.end
continue
captions.append(caption)
return captions, language
def strip_hearing_impaired(self) -> None:
"""
Strip captions for hearing impaired (SDH).
It uses SubtitleEdit if available, otherwise filter-subs.
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")
if binaries.SubtitleEdit:
if self.codec == Subtitle.Codec.SubStationAlphav4:
output_format = "AdvancedSubStationAlpha"
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
output_format = "TimedText1.0"
else:
output_format = self.codec.name
subprocess.run(
[
binaries.SubtitleEdit,
"/Convert",
self.path,
output_format,
"/encoding:utf8",
"/overwrite",
"/RemoveTextForHI",
],
check=True,
stdout=subprocess.DEVNULL,
)
else:
sub = Subtitles(self.path)
sub.filter(rm_fonts=True, rm_ast=True, rm_music=True, rm_effects=True, rm_names=True, rm_author=True)
sub.save()
def reverse_rtl(self) -> None:
"""
Reverse RTL (Right to Left) Start/End on Captions.
This can be used to fix the positioning of sentence-ending characters.
"""
if not self.path or not self.path.exists():
raise ValueError("You must download the subtitle track first.")
if not binaries.SubtitleEdit:
raise EnvironmentError("SubtitleEdit executable not found...")
if self.codec == Subtitle.Codec.SubStationAlphav4:
output_format = "AdvancedSubStationAlpha"
elif self.codec == Subtitle.Codec.TimedTextMarkupLang:
output_format = "TimedText1.0"
else:
output_format = self.codec.name
subprocess.run(
[
binaries.SubtitleEdit,
"/Convert",
self.path,
output_format,
"/ReverseRtlStartEnd",
"/encoding:utf8",
"/overwrite",
],
check=True,
stdout=subprocess.DEVNULL,
)
__all__ = ("Subtitle",)

View File

@@ -0,0 +1,597 @@
import base64
import html
import logging
import re
import shutil
import subprocess
from collections import defaultdict
from copy import copy
from enum import Enum
from functools import partial
from pathlib import Path
from typing import Any, Callable, Iterable, Optional, Union
from uuid import UUID
from zlib import crc32
from langcodes import Language
from pyplayready.cdm import Cdm as PlayReadyCdm
from pywidevine.cdm import Cdm as WidevineCdm
from requests import Session
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.constants import DOWNLOAD_CANCELLED, DOWNLOAD_LICENCE_ONLY
from unshackle.core.downloaders import aria2c, curl_impersonate, n_m3u8dl_re, requests
from unshackle.core.drm import DRM_T, PlayReady, Widevine
from unshackle.core.events import events
from unshackle.core.utilities import get_boxes, try_ensure_utf8
from unshackle.core.utils.subprocess import ffprobe
class Track:
class Descriptor(Enum):
URL = 1 # Direct URL, nothing fancy
HLS = 2 # https://en.wikipedia.org/wiki/HTTP_Live_Streaming
DASH = 3 # https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
ISM = 4 # https://learn.microsoft.com/en-us/silverlight/smooth-streaming
def __init__(
self,
url: Union[str, list[str]],
language: Union[Language, str],
is_original_lang: bool = False,
descriptor: Descriptor = Descriptor.URL,
needs_repack: bool = False,
name: Optional[str] = None,
drm: Optional[Iterable[DRM_T]] = None,
edition: Optional[str] = None,
downloader: Optional[Callable] = None,
data: Optional[Union[dict, defaultdict]] = None,
id_: Optional[str] = None,
extra: Optional[Any] = None,
) -> None:
if not isinstance(url, (str, list)):
raise TypeError(f"Expected url to be a {str}, or list of {str}, not {type(url)}")
if not isinstance(language, (Language, str)):
raise TypeError(f"Expected language to be a {Language} or {str}, not {type(language)}")
if not isinstance(is_original_lang, bool):
raise TypeError(f"Expected is_original_lang to be a {bool}, not {type(is_original_lang)}")
if not isinstance(descriptor, Track.Descriptor):
raise TypeError(f"Expected descriptor to be a {Track.Descriptor}, not {type(descriptor)}")
if not isinstance(needs_repack, bool):
raise TypeError(f"Expected needs_repack to be a {bool}, not {type(needs_repack)}")
if not isinstance(name, (str, type(None))):
raise TypeError(f"Expected name to be a {str}, not {type(name)}")
if not isinstance(id_, (str, type(None))):
raise TypeError(f"Expected id_ to be a {str}, not {type(id_)}")
if not isinstance(edition, (str, type(None))):
raise TypeError(f"Expected edition to be a {str}, not {type(edition)}")
if not isinstance(downloader, (Callable, type(None))):
raise TypeError(f"Expected downloader to be a {Callable}, not {type(downloader)}")
if not isinstance(data, (dict, defaultdict, type(None))):
raise TypeError(f"Expected data to be a {dict} or {defaultdict}, not {type(data)}")
invalid_urls = ", ".join(set(type(x) for x in url if not isinstance(x, str)))
if invalid_urls:
raise TypeError(f"Expected all items in url to be a {str}, but found {invalid_urls}")
if drm is not None:
try:
iter(drm)
except TypeError:
raise TypeError(f"Expected drm to be an iterable, not {type(drm)}")
if downloader is None:
downloader = {
"aria2c": aria2c,
"curl_impersonate": curl_impersonate,
"requests": requests,
"n_m3u8dl_re": n_m3u8dl_re,
}[config.downloader]
self.path: Optional[Path] = None
self.url = url
self.language = Language.get(language)
self.is_original_lang = is_original_lang
self.descriptor = descriptor
self.needs_repack = needs_repack
self.name = name
self.drm = drm
self.edition: str = edition
self.downloader = downloader
self._data: defaultdict[Any, Any] = defaultdict(dict)
self.data = data or {}
self.extra: Any = extra or {} # allow anything for extra, but default to a dict
if self.name is None:
lang = Language.get(self.language)
if (lang.language or "").lower() == (lang.territory or "").lower():
lang.territory = None # e.g. en-en, de-DE
reduced = lang.simplify_script()
extra_parts = []
if reduced.script is not None:
script = reduced.script_name(max_distance=25)
if script and script != "Zzzz":
extra_parts.append(script)
if reduced.territory is not None:
territory = reduced.territory_name(max_distance=25)
if territory and territory != "ZZ":
territory = territory.removesuffix(" SAR China")
extra_parts.append(territory)
self.name = ", ".join(extra_parts) or None
if not id_:
this = copy(self)
this.url = self.url.rsplit("?", maxsplit=1)[0]
checksum = crc32(repr(this).encode("utf8"))
id_ = hex(checksum)[2:]
self.id = id_
# TODO: Currently using OnFoo event naming, change to just segment_filter
self.OnSegmentFilter: Optional[Callable] = None
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__, items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __eq__(self, other: Any) -> bool:
return isinstance(other, Track) and self.id == other.id
@property
def data(self) -> defaultdict[Any, Any]:
"""
Arbitrary track data dictionary.
A defaultdict is used with a dict as the factory for easier
nested saving and safer exists-checks.
Reserved keys:
- "hls" used by the HLS class.
- playlist: m3u8.model.Playlist - The primary track information.
- media: m3u8.model.Media - The audio/subtitle track information.
- segment_durations: list[int] - A list of each segment's duration.
- "dash" used by the DASH class.
- manifest: lxml.ElementTree - DASH MPD manifest.
- period: lxml.Element - The period of this track.
- adaptation_set: lxml.Element - The adaptation set of this track.
- representation: lxml.Element - The representation of this track.
- timescale: int - The timescale of the track's segments.
- segment_durations: list[int] - A list of each segment's duration.
You should not add, change, or remove any data within reserved keys.
You may use their data but do note that the values of them may change
or be removed at any point.
"""
return self._data
@data.setter
def data(self, value: Union[dict, defaultdict]) -> None:
if not isinstance(value, (dict, defaultdict)):
raise TypeError(f"Expected data to be a {dict} or {defaultdict}, not {type(value)}")
if isinstance(value, dict):
value = defaultdict(dict, **value)
self._data = value
def download(
self,
session: Session,
prepare_drm: partial,
max_workers: Optional[int] = None,
progress: Optional[partial] = None,
*,
cdm: Optional[object] = None,
):
"""Download and optionally Decrypt this Track."""
from unshackle.core.manifests import DASH, HLS, ISM
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPING")
if DOWNLOAD_CANCELLED.is_set():
progress(downloaded="[yellow]SKIPPED")
return
log = logging.getLogger("track")
proxy = next(iter(session.proxies.values()), None)
track_type = self.__class__.__name__
save_path = config.directories.temp / f"{track_type}_{self.id}.mp4"
if track_type == "Subtitle":
save_path = save_path.with_suffix(f".{self.codec.extension}")
if self.downloader.__name__ == "n_m3u8dl_re":
self.downloader = requests
if self.descriptor != self.Descriptor.URL:
save_dir = save_path.with_name(save_path.name + "_segments")
else:
save_dir = save_path.parent
def cleanup():
# track file (e.g., "foo.mp4")
save_path.unlink(missing_ok=True)
# aria2c control file (e.g., "foo.mp4.aria2" or "foo.mp4.aria2__temp")
save_path.with_suffix(f"{save_path.suffix}.aria2").unlink(missing_ok=True)
save_path.with_suffix(f"{save_path.suffix}.aria2__temp").unlink(missing_ok=True)
if save_dir.exists() and save_dir.name.endswith("_segments"):
shutil.rmtree(save_dir)
if not DOWNLOAD_LICENCE_ONLY.is_set():
if config.directories.temp.is_file():
raise ValueError(f"Temp Directory '{config.directories.temp}' must be a Directory, not a file")
config.directories.temp.mkdir(parents=True, exist_ok=True)
# Delete any pre-existing temp files matching this track.
# We can't re-use or continue downloading these tracks as they do not use a
# lock file. Or at least the majority don't. Even if they did I've encountered
# corruptions caused by sudden interruptions to the lock file.
cleanup()
try:
if self.descriptor == self.Descriptor.HLS:
HLS.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
max_workers=max_workers,
license_widevine=prepare_drm,
cdm=cdm,
)
elif self.descriptor == self.Descriptor.DASH:
DASH.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
max_workers=max_workers,
license_widevine=prepare_drm,
cdm=cdm,
)
elif self.descriptor == self.Descriptor.ISM:
ISM.download_track(
track=self,
save_path=save_path,
save_dir=save_dir,
progress=progress,
session=session,
proxy=proxy,
max_workers=max_workers,
license_widevine=prepare_drm,
cdm=cdm,
)
elif self.descriptor == self.Descriptor.URL:
try:
if not self.drm and track_type in ("Video", "Audio"):
# the service might not have explicitly defined the `drm` property
# try find widevine DRM information from the init data of URL
try:
self.drm = [Widevine.from_track(self, session)]
except Widevine.Exceptions.PSSHNotFound:
# it might not have Widevine DRM, or might not have found the PSSH
log.warning("No Widevine PSSH was found for this track, is it DRM free?")
if self.drm:
track_kid = self.get_key_id(session=session)
drm = self.get_drm_for_cdm(cdm)
if isinstance(drm, Widevine):
# license and grab content keys
if not prepare_drm:
raise ValueError("prepare_drm func must be supplied to use Widevine DRM")
progress(downloaded="LICENSING")
prepare_drm(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
elif isinstance(drm, PlayReady):
# license and grab content keys
if not prepare_drm:
raise ValueError("prepare_drm func must be supplied to use PlayReady DRM")
progress(downloaded="LICENSING")
prepare_drm(drm, track_kid=track_kid)
progress(downloaded="[yellow]LICENSED")
else:
drm = None
if DOWNLOAD_LICENCE_ONLY.is_set():
progress(downloaded="[yellow]SKIPPED")
elif track_type != "Subtitle" and self.downloader.__name__ == "n_m3u8dl_re":
progress(downloaded="[red]FAILED")
error = f"[N_m3u8DL-RE]: {self.descriptor} is currently not supported"
raise ValueError(error)
else:
for status_update in self.downloader(
urls=self.url,
output_dir=save_path.parent,
filename=save_path.name,
headers=session.headers,
cookies=session.cookies,
proxy=proxy,
max_workers=max_workers,
):
file_downloaded = status_update.get("file_downloaded")
if not file_downloaded:
progress(**status_update)
# see https://github.com/devine-dl/devine/issues/71
save_path.with_suffix(f"{save_path.suffix}.aria2__temp").unlink(missing_ok=True)
self.path = save_path
events.emit(events.Types.TRACK_DOWNLOADED, track=self)
if drm:
progress(downloaded="Decrypting", completed=0, total=100)
drm.decrypt(save_path)
self.drm = None
events.emit(events.Types.TRACK_DECRYPTED, track=self, drm=drm, segment=None)
progress(downloaded="Decrypted", completed=100)
if track_type == "Subtitle" and self.codec.name not in ("fVTT", "fTTML"):
track_data = self.path.read_bytes()
track_data = try_ensure_utf8(track_data)
track_data = (
track_data.decode("utf8")
.replace("&lrm;", html.unescape("&lrm;"))
.replace("&rlm;", html.unescape("&rlm;"))
.encode("utf8")
)
self.path.write_bytes(track_data)
progress(downloaded="Downloaded")
except KeyboardInterrupt:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[yellow]CANCELLED")
raise
except Exception:
DOWNLOAD_CANCELLED.set()
progress(downloaded="[red]FAILED")
raise
except (Exception, KeyboardInterrupt):
if not DOWNLOAD_LICENCE_ONLY.is_set():
cleanup()
raise
if DOWNLOAD_CANCELLED.is_set():
# we stopped during the download, let's exit
return
if not DOWNLOAD_LICENCE_ONLY.is_set():
if self.path.stat().st_size <= 3: # Empty UTF-8 BOM == 3 bytes
raise IOError("Download failed, the downloaded file is empty.")
events.emit(events.Types.TRACK_DOWNLOADED, track=self)
def delete(self) -> None:
if self.path:
self.path.unlink()
self.path = None
def move(self, target: Union[Path, str]) -> Path:
"""
Move the Track's file from current location, to target location.
This will overwrite anything at the target path.
Raises:
TypeError: If the target argument is not the expected type.
ValueError: If track has no file to move, or the target does not exist.
OSError: If the file somehow failed to move.
Returns the new location of the track.
"""
if not isinstance(target, (str, Path)):
raise TypeError(f"Expected {target} to be a {Path} or {str}, not {type(target)}")
if not self.path:
raise ValueError("Track has no file to move")
if not isinstance(target, Path):
target = Path(target)
if not target.exists():
raise ValueError(f"Target file {repr(target)} does not exist")
moved_to = Path(shutil.move(self.path, target))
if moved_to.resolve() != target.resolve():
raise OSError(f"Failed to move {self.path} to {target}")
self.path = target
return target
def get_track_name(self) -> Optional[str]:
"""Get the Track Name."""
return self.name
def get_drm_for_cdm(self, cdm: Optional[object]) -> Optional[DRM_T]:
"""Return the DRM matching the provided CDM, if available."""
if not self.drm:
return None
if isinstance(cdm, WidevineCdm):
for drm in self.drm:
if isinstance(drm, Widevine):
return drm
elif isinstance(cdm, PlayReadyCdm):
for drm in self.drm:
if isinstance(drm, PlayReady):
return drm
return self.drm[0]
def get_key_id(self, init_data: Optional[bytes] = None, *args, **kwargs) -> Optional[UUID]:
"""
Probe the DRM encryption Key ID (KID) for this specific track.
It currently supports finding the Key ID by probing the track's stream
with ffprobe for `enc_key_id` data, as well as for mp4 `tenc` (Track
Encryption) boxes.
It explicitly ignores PSSH information like the `PSSH` box, as the box
is likely to contain multiple Key IDs that may or may not be for this
specific track.
To retrieve the initialization segment, this method calls :meth:`get_init_segment`
with the positional and keyword arguments. The return value of `get_init_segment`
is then used to determine the Key ID.
Returns:
The Key ID as a UUID object, or None if the Key ID could not be determined.
"""
if not init_data:
init_data = self.get_init_segment(*args, **kwargs)
if not isinstance(init_data, bytes):
raise TypeError(f"Expected init_data to be bytes, not {init_data!r}")
probe = ffprobe(init_data)
if probe:
for stream in probe.get("streams") or []:
enc_key_id = stream.get("tags", {}).get("enc_key_id")
if enc_key_id:
return UUID(bytes=base64.b64decode(enc_key_id))
for tenc in get_boxes(init_data, b"tenc"):
if tenc.key_ID.int != 0:
return tenc.key_ID
for uuid_box in get_boxes(init_data, b"uuid"):
if uuid_box.extended_type == UUID("8974dbce-7be7-4c51-84f9-7148f9882554"): # tenc
tenc = uuid_box.data
if tenc.key_ID.int != 0:
return tenc.key_ID
def get_init_segment(
self,
maximum_size: int = 20000,
url: Optional[str] = None,
byte_range: Optional[str] = None,
session: Optional[Session] = None,
) -> bytes:
"""
Get the Track's Initial Segment Data Stream.
HLS and DASH tracks must explicitly provide a URL to the init segment or file.
Providing the byte-range for the init segment is recommended where possible.
If `byte_range` is not set, it will make a HEAD request and check the size of
the file. If the size could not be determined, it will download up to the first
20KB only, which should contain the entirety of the init segment. You may
override this by changing the `maximum_size`.
The default maximum_size of 20000 (20KB) is a tried-and-tested value that
seems to work well across the board.
Parameters:
maximum_size: Size to assume as the content length if byte-range is not
used, the content size could not be determined, or the content size
is larger than it. A value of 20000 (20KB) or higher is recommended.
url: Explicit init map or file URL to probe from.
byte_range: Range of bytes to download from the explicit or implicit URL.
session: Session context, e.g., authorization and headers.
"""
if not isinstance(maximum_size, int):
raise TypeError(f"Expected maximum_size to be an {int}, not {type(maximum_size)}")
if not isinstance(url, (str, type(None))):
raise TypeError(f"Expected url to be a {str}, not {type(url)}")
if not isinstance(byte_range, (str, type(None))):
raise TypeError(f"Expected byte_range to be a {str}, not {type(byte_range)}")
if not isinstance(session, (Session, type(None))):
raise TypeError(f"Expected session to be a {Session}, not {type(session)}")
if not url:
if self.descriptor != self.Descriptor.URL:
raise ValueError(f"An explicit URL must be provided for {self.descriptor.name} tracks")
if not self.url:
raise ValueError("An explicit URL must be provided as the track has no URL")
url = self.url
if not session:
session = Session()
content_length = maximum_size
if byte_range:
if not isinstance(byte_range, str):
raise TypeError(f"Expected byte_range to be a str, not {byte_range!r}")
if not re.match(r"^\d+-\d+$", byte_range):
raise ValueError(f"The value of byte_range is unrecognized: '{byte_range}'")
start, end = byte_range.split("-")
if start > end:
raise ValueError(f"The start range cannot be greater than the end range: {start}>{end}")
else:
size_test = session.head(url)
if "Content-Length" in size_test.headers:
content_length_header = int(size_test.headers["Content-Length"])
if content_length_header > 0:
content_length = min(content_length_header, maximum_size)
range_test = session.head(url, headers={"Range": "bytes=0-1"})
if range_test.status_code == 206:
byte_range = f"0-{content_length - 1}"
if byte_range:
res = session.get(url=url, headers={"Range": f"bytes={byte_range}"})
res.raise_for_status()
init_data = res.content
else:
init_data = None
with session.get(url, stream=True) as s:
for chunk in s.iter_content(content_length):
init_data = chunk
break
if not init_data:
raise ValueError(f"Failed to read {content_length} bytes from the track URI.")
return init_data
def repackage(self) -> None:
if not self.path or not self.path.exists():
raise ValueError("Cannot repackage a Track that has not been downloaded.")
if not binaries.FFMPEG:
raise EnvironmentError('FFmpeg executable "ffmpeg" was not found but is required for this call.')
original_path = self.path
output_path = original_path.with_stem(f"{original_path.stem}_repack")
def _ffmpeg(extra_args: list[str] = None):
subprocess.run(
[
binaries.FFMPEG,
"-hide_banner",
"-loglevel",
"error",
"-i",
original_path,
*(extra_args or []),
# Following are very important!
"-map_metadata",
"-1", # don't transfer metadata to output file
"-fflags",
"bitexact", # only have minimal tag data, reproducible mux
"-codec",
"copy",
str(output_path),
],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
try:
_ffmpeg()
except subprocess.CalledProcessError as e:
if b"Malformed AAC bitstream detected" in e.stderr:
# e.g., TruTV's dodgy encodes
_ffmpeg(["-y", "-bsf:a", "aac_adtstoasc"])
else:
raise
original_path.unlink()
self.path = output_path
__all__ = ("Track",)

View File

@@ -0,0 +1,434 @@
from __future__ import annotations
import logging
import subprocess
from functools import partial
from pathlib import Path
from typing import Callable, Iterator, Optional, Sequence, Union
from langcodes import Language, closest_supported_match
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn
from rich.table import Table
from rich.tree import Tree
from unshackle.core.config import config
from unshackle.core.console import console
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE, AnyTrack, TrackT
from unshackle.core.events import events
from unshackle.core.tracks.attachment import Attachment
from unshackle.core.tracks.audio import Audio
from unshackle.core.tracks.chapters import Chapter, Chapters
from unshackle.core.tracks.subtitle import Subtitle
from unshackle.core.tracks.track import Track
from unshackle.core.tracks.video import Video
from unshackle.core.utilities import is_close_match, sanitize_filename
from unshackle.core.utils.collections import as_list, flatten
class Tracks:
"""
Video, Audio, Subtitle, Chapter, and Attachment Track Store.
It provides convenience functions for listing, sorting, and selecting tracks.
"""
TRACK_ORDER_MAP = {Video: 0, Audio: 1, Subtitle: 2, Chapter: 3, Attachment: 4}
def __init__(
self,
*args: Union[
Tracks, Sequence[Union[AnyTrack, Chapter, Chapters, Attachment]], Track, Chapter, Chapters, Attachment
],
):
self.videos: list[Video] = []
self.audio: list[Audio] = []
self.subtitles: list[Subtitle] = []
self.chapters = Chapters()
self.attachments: list[Attachment] = []
if args:
self.add(args)
def __iter__(self) -> Iterator[AnyTrack]:
return iter(as_list(self.videos, self.audio, self.subtitles))
def __len__(self) -> int:
return len(self.videos) + len(self.audio) + len(self.subtitles)
def __add__(
self,
other: Union[
Tracks, Sequence[Union[AnyTrack, Chapter, Chapters, Attachment]], Track, Chapter, Chapters, Attachment
],
) -> Tracks:
self.add(other)
return self
def __repr__(self) -> str:
return "{name}({items})".format(
name=self.__class__.__name__, items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
)
def __str__(self) -> str:
rep = {Video: [], Audio: [], Subtitle: [], Chapter: [], Attachment: []}
tracks = [*list(self), *self.chapters]
for track in sorted(tracks, key=lambda t: self.TRACK_ORDER_MAP[type(t)]):
if not rep[type(track)]:
count = sum(type(x) is type(track) for x in tracks)
rep[type(track)].append(
"{count} {type} Track{plural}{colon}".format(
count=count,
type=track.__class__.__name__,
plural="s" if count != 1 else "",
colon=":" if count > 0 else "",
)
)
rep[type(track)].append(str(track))
for type_ in list(rep):
if not rep[type_]:
del rep[type_]
continue
rep[type_] = "\n".join([rep[type_][0]] + [f"├─ {x}" for x in rep[type_][1:-1]] + [f"└─ {rep[type_][-1]}"])
rep = "\n".join(list(rep.values()))
return rep
def tree(self, add_progress: bool = False) -> tuple[Tree, list[partial]]:
all_tracks = [*list(self), *self.chapters, *self.attachments]
progress_callables = []
tree = Tree("", hide_root=True)
for track_type in self.TRACK_ORDER_MAP:
tracks = list(x for x in all_tracks if isinstance(x, track_type))
if not tracks:
continue
num_tracks = len(tracks)
track_type_plural = track_type.__name__ + ("s" if track_type != Audio and num_tracks != 1 else "")
tracks_tree = tree.add(f"[repr.number]{num_tracks}[/] {track_type_plural}")
for track in tracks:
if add_progress and track_type not in (Chapter, Attachment):
progress = Progress(
SpinnerColumn(finished_text=""),
BarColumn(),
"",
TimeRemainingColumn(compact=True, elapsed_when_finished=True),
"",
TextColumn("[progress.data.speed]{task.fields[downloaded]}"),
console=console,
speed_estimate_period=10,
)
task = progress.add_task("", downloaded="-")
progress_callables.append(partial(progress.update, task_id=task))
track_table = Table.grid()
track_table.add_row(str(track)[6:], style="text2")
track_table.add_row(progress)
tracks_tree.add(track_table)
else:
tracks_tree.add(str(track)[6:], style="text2")
return tree, progress_callables
def exists(self, by_id: Optional[str] = None, by_url: Optional[Union[str, list[str]]] = None) -> bool:
"""Check if a track already exists by various methods."""
if by_id: # recommended
return any(x.id == by_id for x in self)
if by_url:
return any(x.url == by_url for x in self)
return False
def add(
self,
tracks: Union[
Tracks, Sequence[Union[AnyTrack, Chapter, Chapters, Attachment]], Track, Chapter, Chapters, Attachment
],
warn_only: bool = False,
) -> None:
"""Add a provided track to its appropriate array and ensuring it's not a duplicate."""
if isinstance(tracks, Tracks):
tracks = [*list(tracks), *tracks.chapters, *tracks.attachments]
duplicates = 0
for track in flatten(tracks):
if self.exists(by_id=track.id):
if not warn_only:
raise ValueError(
"One or more of the provided Tracks is a duplicate. "
"Track IDs must be unique but accurate using static values. The "
"value should stay the same no matter when you request the same "
"content. Use a value that has relation to the track content "
"itself and is static or permanent and not random/RNG data that "
"wont change each refresh or conflict in edge cases."
)
duplicates += 1
continue
if isinstance(track, Video):
self.videos.append(track)
elif isinstance(track, Audio):
self.audio.append(track)
elif isinstance(track, Subtitle):
self.subtitles.append(track)
elif isinstance(track, Chapter):
self.chapters.add(track)
elif isinstance(track, Attachment):
self.attachments.append(track)
else:
raise ValueError("Track type was not set or is invalid.")
log = logging.getLogger("Tracks")
if duplicates:
log.warning(f" - Found and skipped {duplicates} duplicate tracks...")
def sort_videos(self, by_language: Optional[Sequence[Union[str, Language]]] = None) -> None:
"""Sort video tracks by bitrate, and optionally language."""
if not self.videos:
return
# bitrate
self.videos.sort(key=lambda x: float(x.bitrate or 0.0), reverse=True)
# language
for language in reversed(by_language or []):
if str(language) in ("all", "best"):
language = next((x.language for x in self.videos if x.is_original_lang), "")
if not language:
continue
self.videos.sort(key=lambda x: str(x.language))
self.videos.sort(key=lambda x: not is_close_match(language, [x.language]))
def sort_audio(self, by_language: Optional[Sequence[Union[str, Language]]] = None) -> None:
"""Sort audio tracks by bitrate, descriptive, and optionally language."""
if not self.audio:
return
# bitrate
self.audio.sort(key=lambda x: float(x.bitrate or 0.0), reverse=True)
# descriptive
self.audio.sort(key=lambda x: str(x.language) if x.descriptive else "")
# language
for language in reversed(by_language or []):
if str(language) in ("all", "best"):
language = next((x.language for x in self.audio if x.is_original_lang), "")
if not language:
continue
self.audio.sort(key=lambda x: str(x.language))
self.audio.sort(key=lambda x: not is_close_match(language, [x.language]))
def sort_subtitles(self, by_language: Optional[Sequence[Union[str, Language]]] = None) -> None:
"""
Sort subtitle tracks by various track attributes to a common P2P standard.
You may optionally provide a sequence of languages to prioritize to the top.
Section Order:
- by_language groups prioritized to top, and ascending alphabetically
- then rest ascending alphabetically after the prioritized groups
(Each section ascending alphabetically, but separated)
Language Group Order:
- Forced
- Normal
- Hard of Hearing (SDH/CC)
(Least to most captions expected in the subtitle)
"""
if not self.subtitles:
return
# language groups
self.subtitles.sort(key=lambda x: str(x.language))
self.subtitles.sort(key=lambda x: x.sdh or x.cc)
self.subtitles.sort(key=lambda x: x.forced, reverse=True)
# sections
for language in reversed(by_language or []):
if str(language) == "all":
language = next((x.language for x in self.subtitles if x.is_original_lang), "")
if not language:
continue
self.subtitles.sort(key=lambda x: is_close_match(language, [x.language]), reverse=True)
def select_video(self, x: Callable[[Video], bool]) -> None:
self.videos = list(filter(x, self.videos))
def select_audio(self, x: Callable[[Audio], bool]) -> None:
self.audio = list(filter(x, self.audio))
def select_subtitles(self, x: Callable[[Subtitle], bool]) -> None:
self.subtitles = list(filter(x, self.subtitles))
def by_resolutions(self, resolutions: list[int], per_resolution: int = 0) -> None:
# Note: Do not merge these list comprehensions. They must be done separately so the results
# from the 16:9 canvas check is only used if there's no exact height resolution match.
selected = []
for resolution in resolutions:
matches = [ # exact matches
x for x in self.videos if x.height == resolution
]
if not matches:
matches = [ # 16:9 canvas matches
x for x in self.videos if int(x.width * (9 / 16)) == resolution
]
selected.extend(matches[: per_resolution or None])
self.videos = selected
@staticmethod
def by_language(tracks: list[TrackT], languages: list[str], per_language: int = 0) -> list[TrackT]:
selected = []
for language in languages:
selected.extend(
[x for x in tracks if closest_supported_match(x.language, [language], LANGUAGE_MAX_DISTANCE)][
: per_language or None
]
)
return selected
def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int, list[str]]:
"""
Multiplex all the Tracks into a Matroska Container file.
Parameters:
title: Set the Matroska Container file title. Usually displayed in players
instead of the filename if set.
delete: Delete all track files after multiplexing.
progress: Update a rich progress bar via `completed=...`. This must be the
progress object's update() func, pre-set with task id via functools.partial.
"""
cl = [
"mkvmerge",
"--no-date", # remove dates from the output for security
]
if config.muxing.get("set_title", True):
cl.extend(["--title", title])
for i, vt in enumerate(self.videos):
if not vt.path or not vt.path.exists():
raise ValueError("Video Track must be downloaded before muxing...")
events.emit(events.Types.TRACK_MULTIPLEX, track=vt)
cl.extend(
[
"--language",
f"0:{vt.language}",
"--default-track",
f"0:{i == 0}",
"--original-flag",
f"0:{vt.is_original_lang}",
"--compression",
"0:none", # disable extra compression
"(",
str(vt.path),
")",
]
)
for i, at in enumerate(self.audio):
if not at.path or not at.path.exists():
raise ValueError("Audio Track must be downloaded before muxing...")
events.emit(events.Types.TRACK_MULTIPLEX, track=at)
cl.extend(
[
"--track-name",
f"0:{at.get_track_name() or ''}",
"--language",
f"0:{at.language}",
"--default-track",
f"0:{at.is_original_lang}",
"--visual-impaired-flag",
f"0:{at.descriptive}",
"--original-flag",
f"0:{at.is_original_lang}",
"--compression",
"0:none", # disable extra compression
"(",
str(at.path),
")",
]
)
for st in self.subtitles:
if not st.path or not st.path.exists():
raise ValueError("Text Track must be downloaded before muxing...")
events.emit(events.Types.TRACK_MULTIPLEX, track=st)
default = bool(self.audio and is_close_match(st.language, [self.audio[0].language]) and st.forced)
cl.extend(
[
"--track-name",
f"0:{st.get_track_name() or ''}",
"--language",
f"0:{st.language}",
"--sub-charset",
"0:UTF-8",
"--forced-track",
f"0:{st.forced}",
"--default-track",
f"0:{default}",
"--hearing-impaired-flag",
f"0:{st.sdh}",
"--original-flag",
f"0:{st.is_original_lang}",
"--compression",
"0:none", # disable extra compression (probably zlib)
"(",
str(st.path),
")",
]
)
if self.chapters:
chapters_path = config.directories.temp / config.filenames.chapters.format(
title=sanitize_filename(title), random=self.chapters.id
)
self.chapters.dump(chapters_path, fallback_name=config.chapter_fallback_name)
cl.extend(["--chapter-charset", "UTF-8", "--chapters", str(chapters_path)])
else:
chapters_path = None
for attachment in self.attachments:
if not attachment.path or not attachment.path.exists():
raise ValueError("Attachment File was not found...")
cl.extend(
[
"--attachment-description",
attachment.description or "",
"--attachment-mime-type",
attachment.mime_type,
"--attachment-name",
attachment.name,
"--attach-file",
str(attachment.path.resolve()),
]
)
output_path = (
self.videos[0].path.with_suffix(".muxed.mkv")
if self.videos
else self.audio[0].path.with_suffix(".muxed.mka")
if self.audio
else self.subtitles[0].path.with_suffix(".muxed.mks")
if self.subtitles
else chapters_path.with_suffix(".muxed.mkv")
if self.chapters
else None
)
if not output_path:
raise ValueError("No tracks provided, at least one track must be provided.")
# let potential failures go to caller, caller should handle
try:
errors = []
p = subprocess.Popen([*cl, "--output", str(output_path), "--gui-mode"], text=True, stdout=subprocess.PIPE)
for line in iter(p.stdout.readline, ""):
if line.startswith("#GUI#error") or line.startswith("#GUI#warning"):
errors.append(line)
if "progress" in line:
progress(total=100, completed=int(line.strip()[14:-1]))
return output_path, p.wait(), errors
finally:
if chapters_path:
chapters_path.unlink()
if delete:
for track in self:
track.delete()
for attachment in self.attachments:
if attachment.path and attachment.path.exists():
attachment.path.unlink()
__all__ = ("Tracks",)

View File

@@ -0,0 +1,451 @@
from __future__ import annotations
import logging
import math
import re
import subprocess
from enum import Enum
from pathlib import Path
from typing import Any, Optional, Union
from langcodes import Language
from unshackle.core import binaries
from unshackle.core.config import config
from unshackle.core.tracks.subtitle import Subtitle
from unshackle.core.tracks.track import Track
from unshackle.core.utilities import FPS, get_boxes
class Video(Track):
class Codec(str, Enum):
AVC = "H.264"
HEVC = "H.265"
VC1 = "VC-1"
VP8 = "VP8"
VP9 = "VP9"
AV1 = "AV1"
@property
def extension(self) -> str:
return self.value.lower().replace(".", "").replace("-", "")
@staticmethod
def from_mime(mime: str) -> Video.Codec:
mime = mime.lower().strip().split(".")[0]
if mime in (
"avc1",
"avc2",
"avc3",
"dva1",
"dvav", # Dolby Vision
):
return Video.Codec.AVC
if mime in (
"hev1",
"hev2",
"hev3",
"hvc1",
"hvc2",
"hvc3",
"dvh1",
"dvhe", # Dolby Vision
"lhv1",
"lhe1", # Layered
):
return Video.Codec.HEVC
if mime == "vc-1":
return Video.Codec.VC1
if mime in ("vp08", "vp8"):
return Video.Codec.VP8
if mime in ("vp09", "vp9"):
return Video.Codec.VP9
if mime == "av01":
return Video.Codec.AV1
raise ValueError(f"The MIME '{mime}' is not a supported Video Codec")
@staticmethod
def from_codecs(codecs: str) -> Video.Codec:
for codec in codecs.lower().split(","):
codec = codec.strip()
mime = codec.split(".")[0]
try:
return Video.Codec.from_mime(mime)
except ValueError:
pass
raise ValueError(f"No MIME types matched any supported Video Codecs in '{codecs}'")
@staticmethod
def from_netflix_profile(profile: str) -> Video.Codec:
profile = profile.lower().strip()
if profile.startswith(("h264", "playready-h264")):
return Video.Codec.AVC
if profile.startswith("hevc"):
return Video.Codec.HEVC
if profile.startswith("vp9"):
return Video.Codec.VP9
if profile.startswith("av1"):
return Video.Codec.AV1
raise ValueError(f"The Content Profile '{profile}' is not a supported Video Codec")
class Range(str, Enum):
SDR = "SDR" # No Dynamic Range
HLG = "HLG" # https://en.wikipedia.org/wiki/Hybrid_log%E2%80%93gamma
HDR10 = "HDR10" # https://en.wikipedia.org/wiki/HDR10
HDR10P = "HDR10+" # https://en.wikipedia.org/wiki/HDR10%2B
DV = "DV" # https://en.wikipedia.org/wiki/Dolby_Vision
@staticmethod
def from_cicp(primaries: int, transfer: int, matrix: int) -> Video.Range:
"""
ISO/IEC 23001-8 Coding-independent code points to Video Range.
Sources:
https://www.itu.int/rec/T-REC-H.Sup19-202104-I
"""
class Primaries(Enum):
Unspecified = 0
BT_709 = 1
BT_601_625 = 5
BT_601_525 = 6
BT_2020_and_2100 = 9
SMPTE_ST_2113_and_EG_4321 = 12 # P3D65
class Transfer(Enum):
Unspecified = 0
BT_709 = 1
BT_601 = 6
BT_2020 = 14
BT_2100 = 15
BT_2100_PQ = 16
BT_2100_HLG = 18
class Matrix(Enum):
RGB = 0
YCbCr_BT_709 = 1
YCbCr_BT_601_625 = 5
YCbCr_BT_601_525 = 6
YCbCr_BT_2020_and_2100 = 9 # YCbCr BT.2100 shares the same CP
ICtCp_BT_2100 = 14
if transfer == 5:
# While not part of any standard, it is typically used as a PAL variant of Transfer.BT_601=6.
# i.e. where Transfer 6 would be for BT.601-NTSC and Transfer 5 would be for BT.601-PAL.
# The codebase is currently agnostic to either, so a manual conversion to 6 is done.
transfer = 6
primaries = Primaries(primaries)
transfer = Transfer(transfer)
matrix = Matrix(matrix)
# primaries and matrix does not strictly correlate to a range
if (primaries, transfer, matrix) == (0, 0, 0):
return Video.Range.SDR
elif primaries in (Primaries.BT_601_625, Primaries.BT_601_525):
return Video.Range.SDR
elif transfer == Transfer.BT_2100_PQ:
return Video.Range.HDR10
elif transfer == Transfer.BT_2100_HLG:
return Video.Range.HLG
else:
return Video.Range.SDR
@staticmethod
def from_m3u_range_tag(tag: str) -> Optional[Video.Range]:
tag = (tag or "").upper().replace('"', "").strip()
if not tag:
return None
if tag == "SDR":
return Video.Range.SDR
elif tag == "PQ":
return Video.Range.HDR10 # technically could be any PQ-transfer range
elif tag == "HLG":
return Video.Range.HLG
# for some reason there's no Dolby Vision info tag
raise ValueError(f"The M3U Range Tag '{tag}' is not a supported Video Range")
def __init__(
self,
*args: Any,
codec: Optional[Video.Codec] = None,
range_: Optional[Video.Range] = None,
bitrate: Optional[Union[str, int, float]] = None,
width: Optional[int] = None,
height: Optional[int] = None,
fps: Optional[Union[str, int, float]] = None,
**kwargs: Any,
) -> None:
"""
Create a new Video track object.
Parameters:
codec: A Video.Codec enum representing the video codec.
If not specified, MediaInfo will be used to retrieve the codec
once the track has been downloaded.
range_: A Video.Range enum representing the video color range.
Defaults to SDR if not specified.
bitrate: A number or float representing the average bandwidth in bytes/s.
Float values are rounded up to the nearest integer.
width: The horizontal resolution of the video.
height: The vertical resolution of the video.
fps: A number, float, or string representing the frames/s of the video.
Strings may represent numbers, floats, or a fraction (num/den).
All strings will be cast to either a number or float.
Note: If codec, bitrate, width, height, or fps is not specified some checks
may be skipped or assume a value. Specifying as much information as possible
is highly recommended.
"""
super().__init__(*args, **kwargs)
if not isinstance(codec, (Video.Codec, type(None))):
raise TypeError(f"Expected codec to be a {Video.Codec}, not {codec!r}")
if not isinstance(range_, (Video.Range, type(None))):
raise TypeError(f"Expected range_ to be a {Video.Range}, not {range_!r}")
if not isinstance(bitrate, (str, int, float, type(None))):
raise TypeError(f"Expected bitrate to be a {str}, {int}, or {float}, not {bitrate!r}")
if not isinstance(width, (int, str, type(None))):
raise TypeError(f"Expected width to be a {int}, not {width!r}")
if not isinstance(height, (int, str, type(None))):
raise TypeError(f"Expected height to be a {int}, not {height!r}")
if not isinstance(fps, (str, int, float, type(None))):
raise TypeError(f"Expected fps to be a {str}, {int}, or {float}, not {fps!r}")
self.codec = codec
self.range = range_ or Video.Range.SDR
try:
self.bitrate = int(math.ceil(float(bitrate))) if bitrate else None
except (ValueError, TypeError) as e:
raise ValueError(f"Expected bitrate to be a number or float, {e}")
try:
self.width = int(width or 0) or None
except ValueError as e:
raise ValueError(f"Expected width to be a number, not {width!r}, {e}")
try:
self.height = int(height or 0) or None
except ValueError as e:
raise ValueError(f"Expected height to be a number, not {height!r}, {e}")
try:
self.fps = (FPS.parse(str(fps)) or None) if fps else None
except Exception as e:
raise ValueError("Expected fps to be a number, float, or a string as numerator/denominator form, " + str(e))
def __str__(self) -> str:
return " | ".join(
filter(
bool,
[
"VID",
"[" + (", ".join(filter(bool, [self.codec.value if self.codec else None, self.range.name]))) + "]",
str(self.language),
", ".join(
filter(
bool,
[
" @ ".join(
filter(
bool,
[
f"{self.width}x{self.height}" if self.width and self.height else None,
f"{self.bitrate // 1000} kb/s" if self.bitrate else None,
],
)
),
f"{self.fps:.3f} FPS" if self.fps else None,
],
)
),
self.edition,
],
)
)
def change_color_range(self, range_: int) -> None:
"""Change the Video's Color Range to Limited (0) or Full (1)."""
if not self.path or not self.path.exists():
raise ValueError("Cannot change the color range flag on a Video that has not been downloaded.")
if not self.codec:
raise ValueError("Cannot change the color range flag on a Video that has no codec specified.")
if self.codec not in (Video.Codec.AVC, Video.Codec.HEVC):
raise NotImplementedError(
"Cannot change the color range flag on this Video as "
f"it's codec, {self.codec.value}, is not yet supported."
)
if not binaries.FFMPEG:
raise EnvironmentError('FFmpeg executable "ffmpeg" was not found but is required for this call.')
filter_key = {Video.Codec.AVC: "h264_metadata", Video.Codec.HEVC: "hevc_metadata"}[self.codec]
original_path = self.path
output_path = original_path.with_stem(f"{original_path.stem}_{['limited', 'full'][range_]}_range")
subprocess.run(
[
binaries.FFMPEG,
"-hide_banner",
"-loglevel",
"panic",
"-i",
original_path,
"-codec",
"copy",
"-bsf:v",
f"{filter_key}=video_full_range_flag={range_}",
str(output_path),
],
check=True,
)
self.path = output_path
original_path.unlink()
def ccextractor(
self, track_id: Any, out_path: Union[Path, str], language: Language, original: bool = False
) -> Optional[Subtitle]:
"""Return a TextTrack object representing CC track extracted by CCExtractor."""
if not self.path:
raise ValueError("You must download the track first.")
if not binaries.CCExtractor:
raise EnvironmentError("ccextractor executable was not found.")
# ccextractor often fails in weird ways unless we repack
self.repackage()
out_path = Path(out_path)
try:
subprocess.run(
[binaries.CCExtractor, "-trim", "-nobom", "-noru", "-ru1", "-o", out_path, self.path],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
except subprocess.CalledProcessError as e:
out_path.unlink(missing_ok=True)
if not e.returncode == 10: # No captions found
raise
if out_path.exists():
cc_track = Subtitle(
id_=track_id,
url="", # doesn't need to be downloaded
codec=Subtitle.Codec.SubRip,
language=language,
is_original_lang=original,
cc=True,
)
cc_track.path = out_path
return cc_track
return None
def extract_c608(self) -> list[Subtitle]:
"""
Extract Apple-Style c608 box (CEA-608) subtitle using ccextractor.
This isn't much more than a wrapper to the track.ccextractor function.
All this does, is actually check if a c608 box exists and only if so
does it actually call ccextractor.
Even though there is a possibility of more than one c608 box, only one
can actually be extracted. Not only that but it's very possible this
needs to be done before any decryption as the decryption may destroy
some of the metadata.
TODO: Need a test file with more than one c608 box to add support for
more than one CEA-608 extraction.
"""
if not self.path:
raise ValueError("You must download the track first.")
with self.path.open("rb") as f:
# assuming 20KB is enough to contain the c608 box.
# ffprobe will fail, so a c608 box check must be done.
c608_count = len(list(get_boxes(f.read(20000), b"c608")))
if c608_count > 0:
# TODO: Figure out the real language, it might be different
# CEA-608 boxes doesnt seem to carry language information :(
# TODO: Figure out if the CC language is original lang or not.
# Will need to figure out above first to do so.
track_id = f"ccextractor-{self.id}"
cc_lang = self.language
cc_track = self.ccextractor(
track_id=track_id,
out_path=config.directories.temp / config.filenames.subtitle.format(id=track_id, language=cc_lang),
language=cc_lang,
original=False,
)
if not cc_track:
return []
return [cc_track]
return []
def remove_eia_cc(self) -> bool:
"""
Remove EIA-CC data from Bitstream while keeping SEI data.
This works by removing all NAL Unit's with the Type of 6 from the bistream
and then re-adding SEI data (effectively a new NAL Unit with just the SEI data).
Only bitstreams with x264 encoding information is currently supported due to the
obscurity on the MDAT mp4 box structure. Therefore, we need to use hacky regex.
"""
if not self.path or not self.path.exists():
raise ValueError("Cannot clean a Track that has not been downloaded.")
if not binaries.FFMPEG:
raise EnvironmentError('FFmpeg executable "ffmpeg" was not found but is required for this call.')
log = logging.getLogger("x264-clean")
log.info("Removing EIA-CC from Video Track with FFMPEG")
with open(self.path, "rb") as f:
file = f.read(60000)
x264 = re.search(rb"(.{16})(x264)", file)
if not x264:
log.info(" - No x264 encode settings were found, unsupported...")
return False
uuid = x264.group(1).hex()
i = file.index(b"x264")
encoding_settings = file[i : i + file[i:].index(b"\x00")].replace(b":", rb"\\:").replace(b",", rb"\,").decode()
original_path = self.path
cleaned_path = original_path.with_suffix(f".cleaned{original_path.suffix}")
subprocess.run(
[
binaries.FFMPEG,
"-hide_banner",
"-loglevel",
"panic",
"-i",
original_path,
"-map_metadata",
"-1",
"-fflags",
"bitexact",
"-bsf:v",
f"filter_units=remove_types=6,h264_metadata=sei_user_data={uuid}+{encoding_settings}",
"-codec",
"copy",
str(cleaned_path),
],
check=True,
)
log.info(" + Removed")
self.path = cleaned_path
original_path.unlink()
return True
__all__ = ("Video",)

354
unshackle/core/utilities.py Normal file
View File

@@ -0,0 +1,354 @@
import ast
import contextlib
import importlib.util
import os
import re
import socket
import sys
import time
import unicodedata
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from types import ModuleType
from typing import Optional, Sequence, Union
from urllib.parse import ParseResult, urlparse
import chardet
import requests
from construct import ValidationError
from langcodes import Language, closest_match
from pymp4.parser import Box
from unidecode import unidecode
from unshackle.core.config import config
from unshackle.core.constants import LANGUAGE_MAX_DISTANCE
def rotate_log_file(log_path: Path, keep: int = 20) -> Path:
"""
Update Log Filename and delete old log files.
It keeps only the 20 newest logs by default.
"""
if not log_path:
raise ValueError("A log path must be provided")
try:
log_path.relative_to(Path("")) # file name only
except ValueError:
pass
else:
log_path = config.directories.logs / log_path
log_path = log_path.parent / log_path.name.format_map(
defaultdict(str, name="root", time=datetime.now().strftime("%Y%m%d-%H%M%S"))
)
if log_path.parent.exists():
log_files = [x for x in log_path.parent.iterdir() if x.suffix == log_path.suffix]
for log_file in log_files[::-1][keep - 1 :]:
# keep n newest files and delete the rest
log_file.unlink()
log_path.parent.mkdir(parents=True, exist_ok=True)
return log_path
def import_module_by_path(path: Path) -> ModuleType:
"""Import a Python file by Path as a Module."""
if not path:
raise ValueError("Path must be provided")
if not isinstance(path, Path):
raise TypeError(f"Expected path to be a {Path}, not {path!r}")
if not path.exists():
raise ValueError("Path does not exist")
# compute package hierarchy for relative import support
if path.is_relative_to(config.directories.core_dir):
name = []
_path = path.parent
while _path.stem != config.directories.core_dir.stem:
name.append(_path.stem)
_path = _path.parent
name = ".".join([config.directories.core_dir.stem] + name[::-1])
else:
# is outside the src package
if str(path.parent.parent) not in sys.path:
sys.path.insert(1, str(path.parent.parent))
name = path.parent.stem
spec = importlib.util.spec_from_file_location(name, path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def sanitize_filename(filename: str, spacer: str = ".") -> str:
"""
Sanitize a string to be filename safe.
The spacer is safer to be a '.' for older DDL and p2p sharing spaces.
This includes web-served content via direct links and such.
"""
# replace all non-ASCII characters with ASCII equivalents
filename = unidecode(filename)
# remove or replace further characters as needed
filename = "".join(c for c in filename if unicodedata.category(c) != "Mn") # hidden characters
filename = filename.replace("/", " & ").replace(";", " & ") # e.g. multi-episode filenames
filename = re.sub(r"[:; ]", spacer, filename) # structural chars to (spacer)
filename = re.sub(r"[\\*!?¿,'\"" "()<>|$#~]", "", filename) # not filename safe chars
filename = re.sub(rf"[{spacer}]{{2,}}", spacer, filename) # remove extra neighbouring (spacer)s
return filename
def is_close_match(language: Union[str, Language], languages: Sequence[Union[str, Language, None]]) -> bool:
"""Check if a language is a close match to any of the provided languages."""
languages = [x for x in languages if x]
if not languages:
return False
return closest_match(language, list(map(str, languages)))[1] <= LANGUAGE_MAX_DISTANCE
def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
"""
Scan a byte array for a wanted MP4/ISOBMFF box, then parse and yield each find.
This function searches through binary MP4 data to find and parse specific box types.
The MP4/ISOBMFF box format consists of:
- 4 bytes: size of the box (including size and type fields)
- 4 bytes: box type identifier (e.g., 'moov', 'trak', 'pssh')
- Remaining bytes: box data
The function uses slicing to directly locate the requested box type in the data
rather than recursively traversing the box hierarchy. This is efficient when
looking for specific box types regardless of their position in the hierarchy.
Parameters:
data: Binary data containing MP4/ISOBMFF boxes
box_type: 4-byte identifier of the box type to find (e.g., b'pssh')
as_bytes: If True, returns the box as bytes, otherwise returns parsed box object
Yields:
Box objects of the requested type found in the data
Notes:
- For each box found, the function updates the search offset to skip past
the current box to avoid finding the same box multiple times
- The function handles validation errors for certain box types (e.g., tenc)
- The size field is located 4 bytes before the box type identifier
"""
# using slicing to get to the wanted box is done because parsing the entire box and recursively
# scanning through each box and its children often wouldn't scan far enough to reach the wanted box.
# since it doesn't care what child box the wanted box is from, this works fine.
if not isinstance(data, (bytes, bytearray)):
raise ValueError("data must be bytes")
offset = 0
while offset < len(data):
try:
index = data[offset:].index(box_type)
except ValueError:
break
pos = offset + index
if pos < 4:
offset = pos + len(box_type)
continue
box_start = pos - 4
try:
box = Box.parse(data[box_start:])
if as_bytes:
box = Box.build(box)
yield box
box_size = len(Box.build(box))
offset = box_start + box_size
except IOError:
break
except ValidationError as e:
if box_type == b"tenc":
offset = pos + len(box_type)
continue
raise e
def ap_case(text: str, keep_spaces: bool = False, stop_words: tuple[str] = None) -> str:
"""
Convert a string to title case using AP/APA style.
Based on https://github.com/words/ap-style-title-case
Parameters:
text: The text string to title case with AP/APA style.
keep_spaces: To keep the original whitespace, or to just use a normal space.
This would only be needed if you have special whitespace between words.
stop_words: Override the default stop words with your own ones.
"""
if not text:
return ""
if not stop_words:
stop_words = (
"a",
"an",
"and",
"at",
"but",
"by",
"for",
"in",
"nor",
"of",
"on",
"or",
"so",
"the",
"to",
"up",
"yet",
)
splitter = re.compile(r"(\s+|[-‑–—])")
words = splitter.split(text)
return "".join(
[
[" ", word][keep_spaces]
if re.match(r"\s+", word)
else word
if splitter.match(word)
else word.lower()
if i != 0 and i != len(words) - 1 and word.lower() in stop_words
else word.capitalize()
for i, word in enumerate(words)
]
)
def get_ip_info(session: Optional[requests.Session] = None) -> dict:
"""
Use ipinfo.io to get IP location information.
If you provide a Requests Session with a Proxy, that proxies IP information
is what will be returned.
"""
return (session or requests.Session()).get("https://ipinfo.io/json").json()
def time_elapsed_since(start: float) -> str:
"""
Get time elapsed since a timestamp as a string.
E.g., `1h56m2s`, `15m12s`, `0m55s`, e.t.c.
"""
elapsed = int(time.time() - start)
minutes, seconds = divmod(elapsed, 60)
hours, minutes = divmod(minutes, 60)
time_string = f"{minutes:d}m{seconds:d}s"
if hours:
time_string = f"{hours:d}h{time_string}"
return time_string
def try_ensure_utf8(data: bytes) -> bytes:
"""
Try to ensure that the given data is encoded in UTF-8.
Parameters:
data: Input data that may or may not yet be UTF-8 or another encoding.
Returns the input data encoded in UTF-8 if successful. If unable to detect the
encoding of the input data, then the original data is returned as-received.
"""
try:
data.decode("utf8")
return data
except UnicodeDecodeError:
try:
# CP-1252 is a superset of latin1 but has gaps. Replace unknown
# characters instead of failing on them.
return data.decode("cp1252", errors="replace").encode("utf8")
except UnicodeDecodeError:
try:
# last ditch effort to detect encoding
detection_result = chardet.detect(data)
if not detection_result["encoding"]:
return data
return data.decode(detection_result["encoding"]).encode("utf8")
except UnicodeDecodeError:
return data
def get_free_port() -> int:
"""
Get an available port to use between a-b (inclusive).
The port is freed as soon as this has returned, therefore, it
is possible for the port to be taken before you try to use it.
"""
with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
def get_extension(value: Union[str, Path, ParseResult]) -> Optional[str]:
"""
Get a URL or Path file extension/suffix.
Note: The returned value will begin with `.`.
"""
if isinstance(value, ParseResult):
value_parsed = value
elif isinstance(value, (str, Path)):
value_parsed = urlparse(str(value))
else:
raise TypeError(f"Expected {str}, {Path}, or {ParseResult}, got {type(value)}")
if value_parsed.path:
ext = os.path.splitext(value_parsed.path)[1]
if ext and ext != ".":
return ext
def get_system_fonts() -> dict[str, Path]:
if sys.platform == "win32":
import winreg
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as reg:
key = winreg.OpenKey(reg, r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Fonts", 0, winreg.KEY_READ)
total_fonts = winreg.QueryInfoKey(key)[1]
return {
name.replace(" (TrueType)", ""): Path(r"C:\Windows\Fonts", filename)
for n in range(0, total_fonts)
for name, filename, _ in [winreg.EnumValue(key, n)]
}
else:
# TODO: Get System Fonts for Linux and mac OS
return {}
class FPS(ast.NodeVisitor):
def visit_BinOp(self, node: ast.BinOp) -> float:
if isinstance(node.op, ast.Div):
return self.visit(node.left) / self.visit(node.right)
raise ValueError(f"Invalid operation: {node.op}")
def visit_Num(self, node: ast.Num) -> complex:
return node.n
def visit_Expr(self, node: ast.Expr) -> float:
return self.visit(node.value)
@classmethod
def parse(cls, expr: str) -> float:
return cls().visit(ast.parse(expr).body[0])

View File

View File

@@ -0,0 +1,169 @@
import re
from typing import Any, Optional, Union
import click
from click.shell_completion import CompletionItem
from pywidevine.cdm import Cdm as WidevineCdm
class ContextData:
def __init__(self, config: dict, cdm: WidevineCdm, proxy_providers: list, profile: Optional[str] = None):
self.config = config
self.cdm = cdm
self.proxy_providers = proxy_providers
self.profile = profile
class SeasonRange(click.ParamType):
name = "ep_range"
MIN_EPISODE = 0
MAX_EPISODE = 999
def parse_tokens(self, *tokens: str) -> list[str]:
"""
Parse multiple tokens or ranged tokens as '{s}x{e}' strings.
Supports exclusioning by putting a `-` before the token.
Example:
>>> sr = SeasonRange()
>>> sr.parse_tokens("S01E01")
["1x1"]
>>> sr.parse_tokens("S02E01", "S02E03-S02E05")
["2x1", "2x3", "2x4", "2x5"]
>>> sr.parse_tokens("S01-S05", "-S03", "-S02E01")
["1x0", "1x1", ..., "2x0", (...), "2x2", (...), "4x0", ..., "5x0", ...]
"""
if len(tokens) == 0:
return []
computed: list = []
exclusions: list = []
for token in tokens:
exclude = token.startswith("-")
if exclude:
token = token[1:]
parsed = [
re.match(r"^S(?P<season>\d+)(E(?P<episode>\d+))?$", x, re.IGNORECASE) for x in re.split(r"[:-]", token)
]
if len(parsed) > 2:
self.fail(f"Invalid token, only a left and right range is acceptable: {token}")
if len(parsed) == 1:
parsed.append(parsed[0])
if any(x is None for x in parsed):
self.fail(f"Invalid token, syntax error occurred: {token}")
from_season, from_episode = [
int(v) if v is not None else self.MIN_EPISODE
for k, v in parsed[0].groupdict().items()
if parsed[0] # type: ignore[union-attr]
]
to_season, to_episode = [
int(v) if v is not None else self.MAX_EPISODE
for k, v in parsed[1].groupdict().items()
if parsed[1] # type: ignore[union-attr]
]
if from_season > to_season:
self.fail(f"Invalid range, left side season cannot be bigger than right side season: {token}")
if from_season == to_season and from_episode > to_episode:
self.fail(f"Invalid range, left side episode cannot be bigger than right side episode: {token}")
for s in range(from_season, to_season + 1):
for e in range(
from_episode if s == from_season else 0, (self.MAX_EPISODE if s < to_season else to_episode) + 1
):
(computed if not exclude else exclusions).append(f"{s}x{e}")
for exclusion in exclusions:
if exclusion in computed:
computed.remove(exclusion)
return list(set(computed))
def convert(
self, value: str, param: Optional[click.Parameter] = None, ctx: Optional[click.Context] = None
) -> list[str]:
return self.parse_tokens(*re.split(r"\s*[,;]\s*", value))
class LanguageRange(click.ParamType):
name = "lang_range"
def convert(
self, value: Union[str, list], param: Optional[click.Parameter] = None, ctx: Optional[click.Context] = None
) -> list[str]:
if isinstance(value, list):
return value
if not value:
return []
return re.split(r"\s*[,;]\s*", value)
class QualityList(click.ParamType):
name = "quality_list"
def convert(
self, value: Union[str, list[str]], param: Optional[click.Parameter] = None, ctx: Optional[click.Context] = None
) -> list[int]:
if not value:
return []
if not isinstance(value, list):
value = value.split(",")
resolutions = []
for resolution in value:
try:
resolutions.append(int(resolution.lower().rstrip("p")))
except TypeError:
self.fail(
f"Expected string for int() conversion, got {resolution!r} of type {type(resolution).__name__}",
param,
ctx,
)
except ValueError:
self.fail(f"{resolution!r} is not a valid integer", param, ctx)
return sorted(resolutions, reverse=True)
class MultipleChoice(click.Choice):
"""
The multiple choice type allows multiple values to be checked against
a fixed set of supported values.
It internally uses and is based off of click.Choice.
"""
name = "multiple_choice"
def __repr__(self) -> str:
return f"MultipleChoice({list(self.choices)})"
def convert(
self, value: Any, param: Optional[click.Parameter] = None, ctx: Optional[click.Context] = None
) -> list[Any]:
if not value:
return []
if isinstance(value, str):
values = value.split(",")
elif isinstance(value, list):
values = value
else:
self.fail(f"{value!r} is not a supported value.", param, ctx)
chosen_values: list[Any] = []
for value in values:
chosen_values.append(super().convert(value, param, ctx))
return chosen_values
def shell_complete(self, ctx: click.Context, param: click.Parameter, incomplete: str) -> list[CompletionItem]:
"""
Complete choices that start with the incomplete value.
Parameters:
ctx: Invocation context for this command.
param: The parameter that is requesting completion.
incomplete: Value being completed. May be empty.
"""
incomplete = incomplete.rsplit(",")[-1]
return super(self).shell_complete(ctx, param, incomplete)
SEASON_RANGE = SeasonRange()
LANGUAGE_RANGE = LanguageRange()
QUALITY_LIST = QualityList()

View File

@@ -0,0 +1,51 @@
import itertools
from typing import Any, Iterable, Iterator, Sequence, Tuple, Type, Union
def as_lists(*args: Any) -> Iterator[Any]:
"""Converts any input objects to list objects."""
for item in args:
yield item if isinstance(item, list) else [item]
def as_list(*args: Any) -> list:
"""
Convert any input objects to a single merged list object.
Example:
>>> as_list('foo', ['buzz', 'bizz'], 'bazz', 'bozz', ['bar'], ['bur'])
['foo', 'buzz', 'bizz', 'bazz', 'bozz', 'bar', 'bur']
"""
return list(itertools.chain.from_iterable(as_lists(*args)))
def flatten(items: Any, ignore_types: Union[Type, Tuple[Type, ...]] = str) -> Iterator:
"""
Flattens items recursively.
Example:
>>> list(flatten(["foo", [["bar", ["buzz", [""]], "bee"]]]))
['foo', 'bar', 'buzz', '', 'bee']
>>> list(flatten("foo"))
['foo']
>>> list(flatten({1}, set))
[{1}]
"""
if isinstance(items, (Iterable, Sequence)) and not isinstance(items, ignore_types):
for i in items:
yield from flatten(i, ignore_types)
else:
yield items
def merge_dict(source: dict, destination: dict) -> None:
"""Recursively merge Source into Destination in-place."""
if not source:
return
for key, value in source.items():
if isinstance(value, dict):
# get node or create one
node = destination.setdefault(key, {})
merge_dict(value, node)
else:
destination[key] = value

View File

@@ -0,0 +1,30 @@
import logging
import os
import random
from datetime import datetime, timedelta
log = logging.getLogger("NF-ESN")
def chrome_esn_generator():
ESN_GEN = "".join(random.choice("0123456789ABCDEF") for _ in range(30))
esn_file = ".esn"
def gen_file():
with open(esn_file, "w") as file:
file.write(f"NFCDIE-03-{ESN_GEN}")
if not os.path.isfile(esn_file):
log.warning("Generating a new Chrome ESN")
gen_file()
file_datetime = datetime.fromtimestamp(os.path.getmtime(esn_file))
time_diff = datetime.now() - file_datetime
if time_diff > timedelta(hours=6):
log.warning("Old ESN detected, Generating a new Chrome ESN")
gen_file()
with open(esn_file, "r") as f:
esn = f.read()
return esn

View File

@@ -0,0 +1,24 @@
import platform
def get_os_arch(name: str) -> str:
"""Builds a name-os-arch based on the input name, system, architecture."""
os_name = platform.system().lower()
os_arch = platform.machine().lower()
# Map platform.system() output to desired OS name
if os_name == "windows":
os_name = "win"
elif os_name == "darwin":
os_name = "osx"
else:
os_name = "linux"
# Map platform.machine() output to desired architecture
if os_arch in ["x86_64", "amd64"]:
os_arch = "x64"
elif os_arch == "arm64":
os_arch = "arm64"
# Construct the dependency name in the desired format using the input name
return f"{name}-{os_name}-{os_arch}"

View File

@@ -0,0 +1,77 @@
import ssl
from typing import Optional
from requests.adapters import HTTPAdapter
class SSLCiphers(HTTPAdapter):
"""
Custom HTTP Adapter to change the TLS Cipher set and security requirements.
Security Level may optionally be provided. A level above 0 must be used at all times.
A list of Security Levels and their security is listed below. Usually 2 is used by default.
Do not set the Security level via @SECLEVEL in the cipher list.
Level 0:
Everything is permitted. This retains compatibility with previous versions of OpenSSL.
Level 1:
The security level corresponds to a minimum of 80 bits of security. Any parameters
offering below 80 bits of security are excluded. As a result RSA, DSA and DH keys
shorter than 1024 bits and ECC keys shorter than 160 bits are prohibited. All export
cipher suites are prohibited since they all offer less than 80 bits of security. SSL
version 2 is prohibited. Any cipher suite using MD5 for the MAC is also prohibited.
Level 2:
Security level set to 112 bits of security. As a result RSA, DSA and DH keys shorter
than 2048 bits and ECC keys shorter than 224 bits are prohibited. In addition to the
level 1 exclusions any cipher suite using RC4 is also prohibited. SSL version 3 is
also not allowed. Compression is disabled.
Level 3:
Security level set to 128 bits of security. As a result RSA, DSA and DH keys shorter
than 3072 bits and ECC keys shorter than 256 bits are prohibited. In addition to the
level 2 exclusions cipher suites not offering forward secrecy are prohibited. TLS
versions below 1.1 are not permitted. Session tickets are disabled.
Level 4:
Security level set to 192 bits of security. As a result RSA, DSA and DH keys shorter
than 7680 bits and ECC keys shorter than 384 bits are prohibited. Cipher suites using
SHA1 for the MAC are prohibited. TLS versions below 1.2 are not permitted.
Level 5:
Security level set to 256 bits of security. As a result RSA, DSA and DH keys shorter
than 15360 bits and ECC keys shorter than 512 bits are prohibited.
"""
def __init__(self, cipher_list: Optional[str] = None, security_level: int = 0, *args, **kwargs):
if cipher_list:
if not isinstance(cipher_list, str):
raise TypeError(f"Expected cipher_list to be a str, not {cipher_list!r}")
if "@SECLEVEL" in cipher_list:
raise ValueError("You must not specify the Security Level manually in the cipher list.")
if not isinstance(security_level, int):
raise TypeError(f"Expected security_level to be an int, not {security_level!r}")
if security_level not in range(6):
raise ValueError(f"The security_level must be a value between 0 and 5, not {security_level}")
if not cipher_list:
# cpython's default cipher list differs to Python-requests cipher list
cipher_list = "DEFAULT"
cipher_list += f":@SECLEVEL={security_level}"
ctx = ssl.create_default_context()
ctx.check_hostname = False # For some reason this is needed to avoid a verification error
ctx.set_ciphers(cipher_list)
self._ssl_context = ctx
super().__init__(*args, **kwargs)
def init_poolmanager(self, *args, **kwargs):
kwargs["ssl_context"] = self._ssl_context
return super().init_poolmanager(*args, **kwargs)
def proxy_manager_for(self, *args, **kwargs):
kwargs["ssl_context"] = self._ssl_context
return super().proxy_manager_for(*args, **kwargs)

View File

@@ -0,0 +1,25 @@
import json
import subprocess
from pathlib import Path
from typing import Union
from unshackle.core import binaries
def ffprobe(uri: Union[bytes, Path]) -> dict:
"""Use ffprobe on the provided data to get stream information."""
if not binaries.FFProbe:
raise EnvironmentError('FFProbe executable "ffprobe" not found but is required.')
args = [binaries.FFProbe, "-v", "quiet", "-of", "json", "-show_streams"]
if isinstance(uri, Path):
args.extend(
["-f", "lavfi", "-i", "movie={}[out+subcc]".format(str(uri).replace("\\", "/").replace(":", "\\\\:"))]
)
elif isinstance(uri, bytes):
args.append("pipe:")
try:
ff = subprocess.run(args, input=uri if isinstance(uri, bytes) else None, check=True, capture_output=True)
except subprocess.CalledProcessError:
return {}
return json.loads(ff.stdout.decode("utf8"))

View File

@@ -0,0 +1,279 @@
from __future__ import annotations
import logging
import os
import re
import shutil
import subprocess
import tempfile
from difflib import SequenceMatcher
from pathlib import Path
from typing import Optional, Tuple
import requests
from unshackle.core.config import config
from unshackle.core.titles.episode import Episode
from unshackle.core.titles.movie import Movie
from unshackle.core.titles.title import Title
STRIP_RE = re.compile(r"[^a-z0-9]+", re.I)
YEAR_RE = re.compile(r"\s*\(?[12][0-9]{3}\)?$")
HEADERS = {"User-Agent": "unshackle-tags/1.0"}
log = logging.getLogger("TAGS")
def _api_key() -> Optional[str]:
return config.tmdb_api_key or os.getenv("TMDB_API_KEY")
def _clean(s: str) -> str:
return STRIP_RE.sub("", s).lower()
def _strip_year(s: str) -> str:
return YEAR_RE.sub("", s).strip()
def fuzzy_match(a: str, b: str, threshold: float = 0.8) -> bool:
"""Return True if ``a`` and ``b`` are a close match."""
ratio = SequenceMatcher(None, _clean(a), _clean(b)).ratio()
return ratio >= threshold
def search_tmdb(title: str, year: Optional[int], kind: str) -> Tuple[Optional[int], Optional[str]]:
api_key = _api_key()
if not api_key:
return None, None
search_title = _strip_year(title)
log.debug("Searching TMDB for %r (%s, %s)", search_title, kind, year)
params = {"api_key": api_key, "query": search_title}
if year is not None:
params["year" if kind == "movie" else "first_air_date_year"] = year
r = requests.get(
f"https://api.themoviedb.org/3/search/{kind}",
params=params,
headers=HEADERS,
timeout=30,
)
r.raise_for_status()
js = r.json()
results = js.get("results") or []
log.debug("TMDB returned %d results", len(results))
if not results:
return None, None
best_ratio = 0.0
best_id: Optional[int] = None
best_title: Optional[str] = None
for result in results:
candidates = [
result.get("title"),
result.get("name"),
result.get("original_title"),
result.get("original_name"),
]
candidates = [c for c in candidates if c] # Filter out None/empty values
if not candidates:
continue
# Find the best matching candidate from all available titles
for candidate in candidates:
ratio = SequenceMatcher(None, _clean(search_title), _clean(candidate)).ratio()
if ratio > best_ratio:
best_ratio = ratio
best_id = result.get("id")
best_title = candidate
log.debug(
"Best candidate ratio %.2f for %r (ID %s)",
best_ratio,
best_title,
best_id,
)
if best_id is not None:
return best_id, best_title
first = results[0]
return first.get("id"), first.get("title") or first.get("name")
def get_title(tmdb_id: int, kind: str) -> Optional[str]:
"""Fetch the name/title of a TMDB entry by ID."""
api_key = _api_key()
if not api_key:
return None
try:
r = requests.get(
f"https://api.themoviedb.org/3/{kind}/{tmdb_id}",
params={"api_key": api_key},
headers=HEADERS,
timeout=30,
)
r.raise_for_status()
except requests.RequestException as exc:
log.debug("Failed to fetch TMDB title: %s", exc)
return None
js = r.json()
return js.get("title") or js.get("name")
def get_year(tmdb_id: int, kind: str) -> Optional[int]:
"""Fetch the release year of a TMDB entry by ID."""
api_key = _api_key()
if not api_key:
return None
try:
r = requests.get(
f"https://api.themoviedb.org/3/{kind}/{tmdb_id}",
params={"api_key": api_key},
headers=HEADERS,
timeout=30,
)
r.raise_for_status()
except requests.RequestException as exc:
log.debug("Failed to fetch TMDB year: %s", exc)
return None
js = r.json()
date = js.get("release_date") or js.get("first_air_date")
if date and len(date) >= 4 and date[:4].isdigit():
return int(date[:4])
return None
def external_ids(tmdb_id: int, kind: str) -> dict:
api_key = _api_key()
if not api_key:
return {}
url = f"https://api.themoviedb.org/3/{kind}/{tmdb_id}/external_ids"
log.debug("Fetching external IDs for %s %s", kind, tmdb_id)
r = requests.get(
url,
params={"api_key": api_key},
headers=HEADERS,
timeout=30,
)
r.raise_for_status()
js = r.json()
log.debug("External IDs response: %s", js)
return js
def _apply_tags(path: Path, tags: dict[str, str]) -> None:
if not tags:
return
mkvpropedit = shutil.which("mkvpropedit")
if not mkvpropedit:
log.debug("mkvpropedit not found on PATH; skipping tags")
return
log.debug("Applying tags to %s: %s", path, tags)
xml_lines = ["<?xml version='1.0' encoding='UTF-8'?>", "<Tags>", " <Tag>", " <Targets/>"]
for name, value in tags.items():
xml_lines.append(f" <Simple><Name>{name}</Name><String>{value}</String></Simple>")
xml_lines.extend([" </Tag>", "</Tags>"])
with tempfile.NamedTemporaryFile("w", suffix=".xml", delete=False) as f:
f.write("\n".join(xml_lines))
tmp_path = Path(f.name)
try:
subprocess.run(
[mkvpropedit, str(path), "--tags", f"global:{tmp_path}"],
check=False,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
log.debug("Tags applied via mkvpropedit")
finally:
tmp_path.unlink(missing_ok=True)
def tag_file(path: Path, title: Title, tmdb_id: Optional[int] | None = None) -> None:
log.debug("Tagging file %s with title %r", path, title)
standard_tags: dict[str, str] = {}
custom_tags: dict[str, str] = {}
# To add custom information to the tags
# custom_tags["Text to the left side"] = "Text to the right side"
if config.tag:
custom_tags["Group"] = config.tag
description = getattr(title, "description", None)
if description:
if len(description) > 255:
truncated = description[:255]
if " " in truncated:
truncated = truncated.rsplit(" ", 1)[0]
description = truncated + "..."
custom_tags["Description"] = description
api_key = _api_key()
if not api_key:
log.debug("No TMDB API key set; applying basic tags only")
_apply_tags(path, custom_tags)
return
if isinstance(title, Movie):
kind = "movie"
name = title.name
year = title.year
elif isinstance(title, Episode):
kind = "tv"
name = title.title
year = title.year
else:
_apply_tags(path, custom_tags)
return
tmdb_title: Optional[str] = None
if tmdb_id is None:
tmdb_id, tmdb_title = search_tmdb(name, year, kind)
log.debug("Search result: %r (ID %s)", tmdb_title, tmdb_id)
if not tmdb_id or not tmdb_title or not fuzzy_match(tmdb_title, name):
log.debug("TMDB search did not match; skipping external ID lookup")
_apply_tags(path, custom_tags)
return
tmdb_url = f"https://www.themoviedb.org/{'movie' if kind == 'movie' else 'tv'}/{tmdb_id}"
standard_tags["TMDB"] = tmdb_url
try:
ids = external_ids(tmdb_id, kind)
except requests.RequestException as exc:
log.debug("Failed to fetch external IDs: %s", exc)
ids = {}
else:
log.debug("External IDs found: %s", ids)
imdb_id = ids.get("imdb_id")
if imdb_id:
standard_tags["IMDB"] = f"https://www.imdb.com/title/{imdb_id}"
tvdb_id = ids.get("tvdb_id")
if tvdb_id:
tvdb_prefix = "movies" if kind == "movie" else "series"
standard_tags["TVDB"] = f"https://thetvdb.com/dereferrer/{tvdb_prefix}/{tvdb_id}"
merged_tags = {
**custom_tags,
**standard_tags,
}
_apply_tags(path, merged_tags)
__all__ = [
"search_tmdb",
"get_title",
"get_year",
"external_ids",
"tag_file",
"fuzzy_match",
]

View File

@@ -0,0 +1,192 @@
import re
import sys
import typing
from typing import Optional
from pycaption import Caption, CaptionList, CaptionNode, CaptionReadError, WebVTTReader, WebVTTWriter
class CaptionListExt(CaptionList):
@typing.no_type_check
def __init__(self, iterable=None, layout_info=None):
self.first_segment_mpegts = 0
super().__init__(iterable, layout_info)
class CaptionExt(Caption):
@typing.no_type_check
def __init__(self, start, end, nodes, style=None, layout_info=None, segment_index=0, mpegts=0, cue_time=0.0):
style = style or {}
self.segment_index: int = segment_index
self.mpegts: float = mpegts
self.cue_time: float = cue_time
super().__init__(start, end, nodes, style, layout_info)
class WebVTTReaderExt(WebVTTReader):
# HLS extension support <https://datatracker.ietf.org/doc/html/rfc8216#section-3.5>
RE_TIMESTAMP_MAP = re.compile(r"X-TIMESTAMP-MAP.*")
RE_MPEGTS = re.compile(r"MPEGTS:(\d+)")
RE_LOCAL = re.compile(r"LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))")
def _parse(self, lines: list[str]) -> CaptionList:
captions = CaptionListExt()
start = None
end = None
nodes: list[CaptionNode] = []
layout_info = None
found_timing = False
segment_index = -1
mpegts = 0
cue_time = 0.0
# The first segment MPEGTS is needed to calculate the rest. It is possible that
# the first segment contains no cue and is ignored by pycaption, this acts as a fallback.
captions.first_segment_mpegts = 0
for i, line in enumerate(lines):
if "-->" in line:
found_timing = True
timing_line = i
last_start_time = captions[-1].start if captions else 0
try:
start, end, layout_info = self._parse_timing_line(line, last_start_time)
except CaptionReadError as e:
new_msg = f"{e.args[0]} (line {timing_line})"
tb = sys.exc_info()[2]
raise type(e)(new_msg).with_traceback(tb) from None
elif "" == line:
if found_timing and nodes:
found_timing = False
caption = CaptionExt(
start,
end,
nodes,
layout_info=layout_info,
segment_index=segment_index,
mpegts=mpegts,
cue_time=cue_time,
)
captions.append(caption)
nodes = []
elif "WEBVTT" in line:
# Merged segmented VTT doesn't have index information, track manually.
segment_index += 1
mpegts = 0
cue_time = 0.0
elif m := self.RE_TIMESTAMP_MAP.match(line):
if r := self.RE_MPEGTS.search(m.group()):
mpegts = int(r.group(1))
cue_time = self._parse_local(m.group())
# Early assignment in case the first segment contains no cue.
if segment_index == 0:
captions.first_segment_mpegts = mpegts
else:
if found_timing:
if nodes:
nodes.append(CaptionNode.create_break())
nodes.append(CaptionNode.create_text(self._decode(line)))
else:
# it's a comment or some metadata; ignore it
pass
# Add a last caption if there are remaining nodes
if nodes:
caption = CaptionExt(start, end, nodes, layout_info=layout_info, segment_index=segment_index, mpegts=mpegts)
captions.append(caption)
return captions
@staticmethod
def _parse_local(string: str) -> float:
"""
Parse WebVTT LOCAL time and convert it to seconds.
"""
m = WebVTTReaderExt.RE_LOCAL.search(string)
if not m:
return 0
parsed = m.groups()
if not parsed:
return 0
hours = int(parsed[1])
minutes = int(parsed[2])
seconds = int(parsed[3])
milliseconds = int(parsed[4])
return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600)
def merge_segmented_webvtt(vtt_raw: str, segment_durations: Optional[list[int]] = None, timescale: int = 1) -> str:
"""
Merge Segmented WebVTT data.
Parameters:
vtt_raw: The concatenated WebVTT files to merge. All WebVTT headers must be
appropriately spaced apart, or it may produce unwanted effects like
considering headers as captions, timestamp lines, etc.
segment_durations: A list of each segment's duration. If not provided it will try
to get it from the X-TIMESTAMP-MAP headers, specifically the MPEGTS number.
timescale: The number of time units per second.
This parses the X-TIMESTAMP-MAP data to compute new absolute timestamps, replacing
the old start and end timestamp values. All X-TIMESTAMP-MAP header information will
be removed from the output as they are no longer of concern. Consider this function
the opposite of a WebVTT Segmenter, a WebVTT Joiner of sorts.
Algorithm borrowed from N_m3u8DL-RE and shaka-player.
"""
MPEG_TIMESCALE = 90_000
vtt = WebVTTReaderExt().read(vtt_raw)
for lang in vtt.get_languages():
prev_caption = None
duplicate_index: list[int] = []
captions = vtt.get_captions(lang)
if captions[0].segment_index == 0:
first_segment_mpegts = captions[0].mpegts
else:
first_segment_mpegts = segment_durations[0] if segment_durations else captions.first_segment_mpegts
caption: CaptionExt
for i, caption in enumerate(captions):
# DASH WebVTT doesn't have MPEGTS timestamp like HLS. Instead,
# calculate the timestamp from SegmentTemplate/SegmentList duration.
likely_dash = first_segment_mpegts == 0 and caption.mpegts == 0
if likely_dash and segment_durations:
duration = segment_durations[caption.segment_index]
caption.mpegts = MPEG_TIMESCALE * (duration / timescale)
if caption.mpegts == 0:
continue
# Commeted to fix DSNP subs being out of sync and mistimed.
# seconds = (caption.mpegts - first_segment_mpegts) / MPEG_TIMESCALE - caption.cue_time
# offset = seconds * 1_000_000 # pycaption use microseconds
# if caption.start < offset:
# caption.start += offset
# caption.end += offset
# If the difference between current and previous captions is <=1ms
# and the payload is equal then splice.
if (
prev_caption
and not caption.is_empty()
and (caption.start - prev_caption.end) <= 1000 # 1ms in microseconds
and caption.get_text() == prev_caption.get_text()
):
prev_caption.end = caption.end
duplicate_index.append(i)
prev_caption = caption
# Remove duplicate
captions[:] = [c for c_index, c in enumerate(captions) if c_index not in set(duplicate_index)]
return WebVTTWriter().write(vtt)

View File

@@ -0,0 +1,24 @@
from typing import Union
from lxml import etree
from lxml.etree import ElementTree
def load_xml(xml: Union[str, bytes]) -> ElementTree:
"""Safely parse XML data to an ElementTree, without namespaces in tags."""
if not isinstance(xml, bytes):
xml = xml.encode("utf8")
root = etree.fromstring(xml)
for elem in root.getiterator():
if not hasattr(elem.tag, "find"):
# e.g. comment elements
continue
elem.tag = etree.QName(elem).localname
for name, value in elem.attrib.items():
local_name = etree.QName(name).localname
if local_name == name:
continue
del elem.attrib[name]
elem.attrib[local_name] = value
etree.cleanup_namespaces(root)
return root

48
unshackle/core/vault.py Normal file
View File

@@ -0,0 +1,48 @@
from abc import ABCMeta, abstractmethod
from typing import Iterator, Optional, Union
from uuid import UUID
class Vault(metaclass=ABCMeta):
def __init__(self, name: str):
self.name = name
def __str__(self) -> str:
return f"{self.name} {type(self).__name__}"
@abstractmethod
def get_key(self, kid: Union[UUID, str], service: str) -> Optional[str]:
"""
Get Key from Vault by KID (Key ID) and Service.
It does not get Key by PSSH as the PSSH can be different depending on it's implementation,
or even how it was crafted. Some PSSH values may also actually be a CENC Header rather
than a PSSH MP4 Box too, which makes the value even more confusingly different.
However, the KID never changes unless the video file itself has changed too, meaning the
key for the presumed-matching KID wouldn't work, further proving matching by KID is
superior.
"""
@abstractmethod
def get_keys(self, service: str) -> Iterator[tuple[str, str]]:
"""Get All Keys from Vault by Service."""
@abstractmethod
def add_key(self, service: str, kid: Union[UUID, str], key: str) -> bool:
"""Add KID:KEY to the Vault."""
@abstractmethod
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
"""
Add Multiple Content Keys with Key IDs for Service to the Vault.
Pre-existing Content Keys are ignored/skipped.
Raises PermissionError if the user has no permission to create the table.
"""
@abstractmethod
def get_services(self) -> Iterator[str]:
"""Get a list of Service Tags from Vault."""
__all__ = ("Vault",)

69
unshackle/core/vaults.py Normal file
View File

@@ -0,0 +1,69 @@
from typing import Any, Iterator, Optional, Union
from uuid import UUID
from unshackle.core.config import config
from unshackle.core.utilities import import_module_by_path
from unshackle.core.vault import Vault
_VAULTS = sorted(
(path for path in config.directories.vaults.glob("*.py") if path.stem.lower() != "__init__"), key=lambda x: x.stem
)
_MODULES = {path.stem: getattr(import_module_by_path(path), path.stem) for path in _VAULTS}
class Vaults:
"""Keeps hold of Key Vaults with convenience functions, e.g. searching all vaults."""
def __init__(self, service: Optional[str] = None):
self.service = service or ""
self.vaults = []
def __iter__(self) -> Iterator[Vault]:
return iter(self.vaults)
def __len__(self) -> int:
return len(self.vaults)
def load(self, type_: str, **kwargs: Any) -> None:
"""Load a Vault into the vaults list."""
module = _MODULES.get(type_)
if not module:
raise ValueError(f"Unable to find vault command by the name '{type_}'.")
vault = module(**kwargs)
self.vaults.append(vault)
def get_key(self, kid: Union[UUID, str]) -> tuple[Optional[str], Optional[Vault]]:
"""Get Key from the first Vault it can by KID (Key ID) and Service."""
for vault in self.vaults:
key = vault.get_key(kid, self.service)
if key and key.count("0") != len(key):
return key, vault
return None, None
def add_key(self, kid: Union[UUID, str], key: str, excluding: Optional[Vault] = None) -> int:
"""Add a KID:KEY to all Vaults, optionally with an exclusion."""
success = 0
for vault in self.vaults:
if vault != excluding:
try:
success += vault.add_key(self.service, kid, key)
except (PermissionError, NotImplementedError):
pass
return success
def add_keys(self, kid_keys: dict[Union[UUID, str], str]) -> int:
"""
Add multiple KID:KEYs to all Vaults. Duplicate Content Keys are skipped.
PermissionErrors when the user cannot create Tables are absorbed and ignored.
"""
success = 0
for vault in self.vaults:
try:
success += bool(vault.add_keys(self.service, kid_keys))
except (PermissionError, NotImplementedError):
pass
return success
__all__ = ("Vaults",)

View File

@@ -0,0 +1,269 @@
import base64
import hashlib
import json
import re
from collections.abc import Generator
from datetime import datetime
from http.cookiejar import CookieJar
from typing import Optional, Union
import click
from langcodes import Language
from unshackle.core.constants import AnyTrack
from unshackle.core.credential import Credential
from unshackle.core.manifests import DASH
from unshackle.core.search_result import SearchResult
from unshackle.core.service import Service
from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
from unshackle.core.tracks import Chapter, Subtitle, Tracks
class EXAMPLE(Service):
"""
Service code for domain.com
Version: 1.0.0
Authorization: Cookies
Security: FHD@L3
Use full URL (for example - https://domain.com/details/20914) or title ID (for example - 20914).
"""
TITLE_RE = r"^(?:https?://?domain\.com/details/)?(?P<title_id>[^/]+)"
GEOFENCE = ("US", "UK")
@staticmethod
@click.command(name="EXAMPLE", short_help="https://domain.com")
@click.argument("title", type=str)
@click.option("-m", "--movie", is_flag=True, default=False, help="Specify if it's a movie")
@click.option("-d", "--device", type=str, default="android_tv", help="Select device from the config file")
@click.pass_context
def cli(ctx, **kwargs):
return EXAMPLE(ctx, **kwargs)
def __init__(self, ctx, title, movie, device):
super().__init__(ctx)
self.title = title
self.movie = movie
self.device = device
if self.config is None:
raise Exception("Config is missing!")
else:
profile_name = ctx.parent.params.get("profile")
if profile_name is None:
profile_name = "default"
self.profile = profile_name
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
super().authenticate(cookies, credential)
if not cookies:
raise EnvironmentError("Service requires Cookies for Authentication.")
jwt_token = next((cookie.value for cookie in cookies if cookie.name == "streamco_token"), None)
payload = json.loads(base64.urlsafe_b64decode(jwt_token.split(".")[1] + "==").decode("utf-8"))
profile_id = payload.get("profileId", None)
self.session.headers.update({"user-agent": self.config["client"][self.device]["user_agent"]})
cache = self.cache.get(f"tokens_{self.device}_{self.profile}")
if cache:
if cache.data["expires_in"] > int(datetime.now().timestamp()):
self.log.info("Using cached tokens")
else:
self.log.info("Refreshing tokens")
refresh = self.session.post(
url=self.config["endpoints"]["refresh"], data={"refresh_token": cache.data["refresh_data"]}
).json()
cache.set(data=refresh)
else:
self.log.info("Retrieving new tokens")
token = self.session.post(
url=self.config["endpoints"]["login"],
data={
"token": jwt_token,
"profileId": profile_id,
},
).json()
cache.set(data=token)
self.token = cache.data["token"]
self.user_id = cache.data["userId"]
def search(self) -> Generator[SearchResult, None, None]:
search = self.session.get(
url=self.config["endpoints"]["search"], params={"q": self.title, "token": self.token}
).json()
for result in search["entries"]:
yield SearchResult(
id_=result["id"],
title=result["title"],
label="SERIES" if result["programType"] == "series" else "MOVIE",
url=result["url"],
)
def get_titles(self) -> Titles_T:
self.title = re.match(self.TITLE_RE, self.title).group(1)
metadata = self.session.get(
url=self.config["endpoints"]["metadata"].format(title_id=self.title), params={"token": self.token}
).json()
if metadata["programType"] == "movie":
self.movie = True
if self.movie:
return Movies(
[
Movie(
id_=metadata["id"],
service=self.__class__,
name=metadata["title"],
description=metadata["description"],
year=metadata["releaseYear"] if metadata["releaseYear"] > 0 else None,
language=Language.find(metadata["languages"][0]),
data=metadata,
)
]
)
else:
episodes = []
for season in metadata["seasons"]:
if "Trailers" not in season["title"]:
season_data = self.session.get(url=season["url"], params={"token": self.token}).json()
for episode in season_data["entries"]:
episodes.append(
Episode(
id_=episode["id"],
service=self.__class__,
title=metadata["title"],
season=episode["season"],
number=episode["episode"],
name=episode["title"],
description=episode["description"],
year=metadata["releaseYear"] if metadata["releaseYear"] > 0 else None,
language=Language.find(metadata["languages"][0]),
data=episode,
)
)
return Series(episodes)
def get_tracks(self, title: Title_T) -> Tracks:
streams = self.session.post(
url=self.config["endpoints"]["streams"],
params={
"token": self.token,
"guid": title.id,
},
data={
"type": self.config["client"][self.device]["type"],
},
).json()["media"]
self.license = {
"url": streams["drm"]["url"],
"data": streams["drm"]["data"],
"session": streams["drm"]["session"],
}
manifest_url = streams["url"].split("?")[0]
self.log.debug(f"Manifest URL: {manifest_url}")
tracks = DASH.from_url(url=manifest_url, session=self.session).to_tracks(language=title.language)
# Remove DRM-free ("clear") audio tracks
tracks.audio = [
track for track in tracks.audio if "clear" not in track.data["dash"]["representation"].get("id")
]
for track in tracks.audio:
if track.channels == 6.0:
track.channels = 5.1
track_label = track.data["dash"]["adaptation_set"].get("label")
if track_label and "Audio Description" in track_label:
track.descriptive = True
tracks.subtitles.clear()
if streams.get("captions"):
for subtitle in streams["captions"]:
tracks.add(
Subtitle(
id_=hashlib.md5(subtitle["url"].encode()).hexdigest()[0:6],
url=subtitle["url"],
codec=Subtitle.Codec.from_mime("vtt"),
language=Language.get(subtitle["language"]),
# cc=True if '(cc)' in subtitle['name'] else False,
sdh=True,
)
)
if not self.movie:
title.data["chapters"] = self.session.get(
url=self.config["endpoints"]["metadata"].format(title_id=title.id), params={"token": self.token}
).json()["chapters"]
return tracks
def get_chapters(self, title: Title_T) -> list[Chapter]:
chapters = []
if title.data.get("chapters", []):
for chapter in title.data["chapters"]:
if chapter["name"] == "Intro":
chapters.append(Chapter(timestamp=chapter["start"], name="Opening"))
chapters.append(Chapter(timestamp=chapter["end"]))
if chapter["name"] == "Credits":
chapters.append(Chapter(timestamp=chapter["start"], name="Credits"))
return chapters
def get_playready_license(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> Optional[bytes]:
"""Retrieve a PlayReady license for a given track."""
license_url = self.config["endpoints"].get("playready_license")
if not license_url:
raise ValueError("PlayReady license endpoint not configured")
response = self.session.post(
url=license_url,
data=challenge,
headers={
"user-agent": self.config["client"][self.device]["license_user_agent"],
},
)
response.raise_for_status()
return response.content
def get_widevine_license(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> Optional[Union[bytes, str]]:
license_url = self.license.get("url") or self.config["endpoints"].get("widevine_license")
if not license_url:
raise ValueError("Widevine license endpoint not configured")
response = self.session.post(
url=license_url,
data=challenge,
params={
"session": self.license.get("session"),
"userId": self.user_id,
},
headers={
"dt-custom-data": self.license.get("data"),
"user-agent": self.config["client"][self.device]["license_user_agent"],
},
)
response.raise_for_status()
try:
return response.json().get("license")
except ValueError:
return response.content

View File

@@ -0,0 +1,12 @@
endpoints:
login: https://api.domain.com/v1/login
metadata: https://api.domain.com/v1/metadata/{title_id}.json
streams: https://api.domain.com/v1/streams
playready_license: https://api.domain.com/v1/license/playready
widevine_license: https://api.domain.com/v1/license/widevine
client:
android_tv:
user_agent: USER_AGENT
license_user_agent: LICENSE_USER_AGENT
type: DATA

168
unshackle/unshackle.yaml Normal file
View File

@@ -0,0 +1,168 @@
# Group or Username to postfix to the end of all download filenames following a dash
tag: user_tag
# Set terminal background color (custom option not in CONFIG.md)
set_terminal_bg: false
# Muxing configuration
muxing:
set_title: false
# Login credentials for each Service
credentials:
EXAMPLE: email@example.com:password
EXAMPLE2: username:password
# Override default directories used across unshackle
directories:
cache: Cache
cookies: Cookies
dcsl: DCSL # Device Certificate Status List
downloads: Downloads
logs: Logs
temp: Temp
wvds: WVDs
prds: PRDs
# Additional directories that can be configured:
# commands: Commands
# services: Services
# vaults: Vaults
# fonts: Fonts
# Pre-define which Widevine or PlayReady device to use for each Service
cdm:
default: WVD_1
EXAMPLE: PRD_1
# Use pywidevine Serve-compliant Remote CDMs
remote_cdm:
- name: "chrome"
device_name: chrome
device_type: CHROME
system_id: 27175
security_level: 3
host: https://domain.com/api
secret: secret_key
- name: "chrome-2"
device_name: chrome
device_type: CHROME
system_id: 26830
security_level: 3
host: https://domain-2.com/api
secret: secret_key
# Key Vaults store your obtained Content Encryption Keys (CEKs)
key_vaults:
- type: SQLite
name: Local
path: key_store.db
# Additional vault types:
# - type: API
# name: "Remote Vault"
# uri: "https://key-vault.example.com"
# token: "secret_token"
# - type: MySQL
# name: "MySQL Vault"
# host: "127.0.0.1"
# port: 3306
# database: vault
# username: user
# password: pass
# Choose what software to use to download data
downloader: aria2c
# Options: requests | aria2c | curl_impersonate | n_m3u8dl_re
# Can also be a mapping:
# downloader:
# NF: requests
# AMZN: n_m3u8dl_re
# DSNP: n_m3u8dl_re
# default: requests
# aria2c downloader configuration
aria2c:
max_concurrent_downloads: 4
max_connection_per_server: 3
split: 5
file_allocation: falloc # none | prealloc | falloc | trunc
# N_m3u8DL-RE downloader configuration
n_m3u8dl_re:
thread_count: 16
ad_keyword: "advertisement"
use_proxy: true
# curl_impersonate downloader configuration
curl_impersonate:
browser: chrome120
# Pre-define default options and switches of the dl command
dl:
best: true
sub_format: srt
downloads: 4
workers: 16
lang:
- en
- fr
EXAMPLE:
bitrate: CBR
# Chapter Name to use when exporting a Chapter without a Name
chapter_fallback_name: "Chapter {j:02}"
# Case-Insensitive dictionary of headers for all Services
headers:
Accept-Language: "en-US,en;q=0.8"
User-Agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36"
# Override default filenames used across unshackle
filenames:
log: "unshackle_{name}_{time}.log"
config: "config.yaml"
root_config: "unshackle.yaml"
chapters: "Chapters_{title}_{random}.txt"
subtitle: "Subtitle_{id}_{language}.srt"
# API key for The Movie Database (TMDB)
tmdb_api_key: ""
# Configuration for pywidevine's serve functionality
serve:
users:
secret_key_for_user:
devices:
- generic_nexus_4464_l3
username: user
# devices:
# - '/path/to/device.wvd'
# Configuration data for each Service
services:
# Service-specific configuration goes here
# EXAMPLE:
# api_key: "service_specific_key"
# Legacy NordVPN configuration (use proxy_providers instead)
nordvpn:
username: ""
password: ""
servers:
- us: 12
# External proxy provider services
proxy_providers:
nordvpn:
username: username_from_service_credentials
password: password_from_service_credentials
servers:
- us: 12 # force US server #12 for US proxies
basic:
GB:
- "socks5://username:password@bhx.socks.ipvanish.com:1080" # 1 (Birmingham)
- "socks5://username:password@gla.socks.ipvanish.com:1080" # 2 (Glasgow)
AU:
- "socks5://username:password@syd.socks.ipvanish.com:1080" # 1 (Sydney)
- "https://username:password@au-syd.prod.surfshark.com" # 2 (Sydney)
- "https://username:password@au-bne.prod.surfshark.com" # 3 (Brisbane)
BG: "https://username:password@bg-sof.prod.surfshark.com"

118
unshackle/utils/base62.py Normal file
View File

@@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""
base62
~~~~~~
Originated from http://blog.suminb.com/archives/558
"""
__title__ = "base62"
__author__ = "Sumin Byeon"
__email__ = "suminb@gmail.com"
__version__ = "1.0.0"
BASE = 62
CHARSET_DEFAULT = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
CHARSET_INVERTED = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
def encode(n, charset=CHARSET_DEFAULT):
"""Encodes a given integer ``n``."""
chs = []
while n > 0:
n, r = divmod(n, BASE)
chs.insert(0, charset[r])
if not chs:
return "0"
return "".join(chs)
def encodebytes(barray, charset=CHARSET_DEFAULT):
"""Encodes a bytestring into a base62 string.
:param barray: A byte array
:type barray: bytes
:rtype: str
"""
_check_type(barray, bytes)
# Count the number of leading zeros.
leading_zeros_count = 0
for i in range(len(barray)):
if barray[i] != 0:
break
leading_zeros_count += 1
# Encode the leading zeros as "0" followed by a character indicating the count.
# This pattern may occur several times if there are many leading zeros.
n, r = divmod(leading_zeros_count, len(charset) - 1)
zero_padding = f"0{charset[-1]}" * n
if r:
zero_padding += f"0{charset[r]}"
# Special case: the input is empty, or is entirely null bytes.
if leading_zeros_count == len(barray):
return zero_padding
value = encode(int.from_bytes(barray, "big"), charset=charset)
return zero_padding + value
def decode(encoded, charset=CHARSET_DEFAULT):
"""Decodes a base62 encoded value ``encoded``.
:type encoded: str
:rtype: int
"""
_check_type(encoded, str)
length, i, v = len(encoded), 0, 0
for x in encoded:
v += _value(x, charset=charset) * (BASE ** (length - (i + 1)))
i += 1
return v
def decodebytes(encoded, charset=CHARSET_DEFAULT):
"""Decodes a string of base62 data into a bytes object.
:param encoded: A string to be decoded in base62
:type encoded: str
:rtype: bytes
"""
leading_null_bytes = b""
while encoded.startswith("0") and len(encoded) >= 2:
leading_null_bytes += b"\x00" * _value(encoded[1], charset)
encoded = encoded[2:]
decoded = decode(encoded, charset=charset)
buf = bytearray()
while decoded > 0:
buf.append(decoded & 0xFF)
decoded //= 256
buf.reverse()
return leading_null_bytes + bytes(buf)
def _value(ch, charset):
"""Decodes an individual digit of a base62 encoded string."""
try:
return charset.index(ch)
except ValueError:
raise ValueError("base62: Invalid character (%s)" % ch)
def _check_type(value, expected_type):
"""Checks if the input is in an appropriate type."""
if not isinstance(value, expected_type):
msg = "Expected {} object, not {}".format(expected_type, value.__class__.__name__)
raise TypeError(msg)

View File

@@ -0,0 +1,24 @@
import platform
def get_os_arch(name: str) -> str:
"""Builds a name-os-arch based on the input name, system, architecture."""
os_name = platform.system().lower()
os_arch = platform.machine().lower()
# Map platform.system() output to desired OS name
if os_name == "windows":
os_name = "win"
elif os_name == "darwin":
os_name = "osx"
else:
os_name = "linux"
# Map platform.machine() output to desired architecture
if os_arch in ["x86_64", "amd64"]:
os_arch = "x64"
elif os_arch == "arm64":
os_arch = "arm64"
# Construct the dependency name in the desired format using the input name
return f"{name}-{os_name}-{os_arch}"

184
unshackle/vaults/API.py Normal file
View File

@@ -0,0 +1,184 @@
from typing import Iterator, Optional, Union
from uuid import UUID
from requests import Session
from unshackle.core import __version__
from unshackle.core.vault import Vault
class API(Vault):
"""Key Vault using a simple RESTful HTTP API call."""
def __init__(self, name: str, uri: str, token: str):
super().__init__(name)
self.uri = uri.rstrip("/")
self.session = Session()
self.session.headers.update({"User-Agent": f"unshackle v{__version__}"})
self.session.headers.update({"Authorization": f"Bearer {token}"})
def get_key(self, kid: Union[UUID, str], service: str) -> Optional[str]:
if isinstance(kid, UUID):
kid = kid.hex
data = self.session.get(
url=f"{self.uri}/{service.lower()}/{kid}", headers={"Accept": "application/json"}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
content_key = data.get("content_key")
if not content_key:
return None
if not isinstance(content_key, str):
raise ValueError(f"Expected {content_key} to be {str}, was {type(content_key)}")
return content_key
def get_keys(self, service: str) -> Iterator[tuple[str, str]]:
page = 1
while True:
data = self.session.get(
url=f"{self.uri}/{service.lower()}",
params={"page": page, "total": 10},
headers={"Accept": "application/json"},
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.PageInvalid,
4: Exceptions.ServiceTagInvalid,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
content_keys = data.get("content_keys")
if content_keys:
if not isinstance(content_keys, dict):
raise ValueError(f"Expected {content_keys} to be {dict}, was {type(content_keys)}")
for key_id, key in content_keys.items():
yield key_id, key
pages = int(data["pages"])
if pages <= page:
break
page += 1
def add_key(self, service: str, kid: Union[UUID, str], key: str) -> bool:
if isinstance(kid, UUID):
kid = kid.hex
data = self.session.post(
url=f"{self.uri}/{service.lower()}/{kid}", json={"content_key": key}, headers={"Accept": "application/json"}
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
5: Exceptions.ContentKeyInvalid,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
# the kid:key was new to the vault (optional)
added = bool(data.get("added"))
# the key for kid was changed/updated (optional)
updated = bool(data.get("updated"))
return added or updated
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
data = self.session.post(
url=f"{self.uri}/{service.lower()}",
json={"content_keys": {str(kid).replace("-", ""): key for kid, key in kid_keys.items()}},
headers={"Accept": "application/json"},
).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
3: Exceptions.ServiceTagInvalid,
4: Exceptions.KeyIdInvalid,
5: Exceptions.ContentKeyInvalid,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
# each kid:key that was new to the vault (optional)
added = int(data.get("added"))
# each key for a kid that was changed/updated (optional)
updated = int(data.get("updated"))
return added + updated
def get_services(self) -> Iterator[str]:
data = self.session.post(url=self.uri, headers={"Accept": "application/json"}).json()
code = int(data.get("code", 0))
message = data.get("message")
error = {
0: None,
1: Exceptions.AuthRejected,
2: Exceptions.TooManyRequests,
}.get(code, ValueError)
if error:
raise error(f"{message} ({code})")
service_list = data.get("service_list", [])
if not isinstance(service_list, list):
raise ValueError(f"Expected {service_list} to be {list}, was {type(service_list)}")
for service in service_list:
yield service
class Exceptions:
class AuthRejected(Exception):
"""Authentication Error Occurred, is your token valid? Do you have permission to make this call?"""
class TooManyRequests(Exception):
"""Rate Limited; Sent too many requests in a given amount of time."""
class PageInvalid(Exception):
"""Requested page does not exist."""
class ServiceTagInvalid(Exception):
"""The Service Tag is invalid."""
class KeyIdInvalid(Exception):
"""The Key ID is invalid."""
class ContentKeyInvalid(Exception):
"""The Content Key is invalid."""

326
unshackle/vaults/HTTP.py Normal file
View File

@@ -0,0 +1,326 @@
import json
from enum import Enum
from typing import Iterator, Optional, Union
from uuid import UUID
from requests import Session
from unshackle.core import __version__
from unshackle.core.vault import Vault
class InsertResult(Enum):
FAILURE = 0
SUCCESS = 1
ALREADY_EXISTS = 2
class HTTP(Vault):
"""Key Vault using HTTP API with support for both query parameters and JSON payloads."""
def __init__(self, name: str, host: str, password: str, username: Optional[str] = None, api_mode: str = "query"):
"""
Initialize HTTP Vault.
Args:
name: Vault name
host: Host URL
password: Password for query mode or API token for json mode
username: Username (required for query mode, ignored for json mode)
api_mode: "query" for query parameters or "json" for JSON API
"""
super().__init__(name)
self.url = host.rstrip("/")
self.password = password
self.username = username
self.api_mode = api_mode.lower()
self.current_title = None
self.session = Session()
self.session.headers.update({"User-Agent": f"unshackle v{__version__}"})
self.api_session_id = None
# Validate configuration based on mode
if self.api_mode == "query" and not self.username:
raise ValueError("Username is required for query mode")
elif self.api_mode not in ["query", "json"]:
raise ValueError("api_mode must be either 'query' or 'json'")
def request(self, method: str, params: dict = None) -> dict:
"""Make a request to the JSON API vault."""
if self.api_mode != "json":
raise ValueError("request method is only available in json mode")
request_payload = {
"method": method,
"params": {
**(params or {}),
"session_id": self.api_session_id,
},
"token": self.password,
}
r = self.session.post(self.url, json=request_payload)
if r.status_code == 404:
return {"status": "not_found"}
if not r.ok:
raise ValueError(f"API returned HTTP Error {r.status_code}: {r.reason.title()}")
try:
res = r.json()
except json.JSONDecodeError:
if r.status_code == 404:
return {"status": "not_found"}
raise ValueError(f"API returned an invalid response: {r.text}")
if res.get("status_code") != 200:
raise ValueError(f"API returned an error: {res['status_code']} - {res['message']}")
if session_id := res.get("message", {}).get("session_id"):
self.api_session_id = session_id
return res.get("message", res)
def get_key(self, kid: Union[UUID, str], service: str) -> Optional[str]:
if isinstance(kid, UUID):
kid = kid.hex
if self.api_mode == "json":
try:
title = getattr(self, "current_title", None)
response = self.request(
"GetKey",
{
"kid": kid,
"service": service.lower(),
"title": title,
},
)
if response.get("status") == "not_found":
return None
keys = response.get("keys", [])
for key_entry in keys:
if key_entry["kid"] == kid:
return key_entry["key"]
except Exception as e:
print(f"Failed to get key ({e.__class__.__name__}: {e})")
return None
return None
else: # query mode
response = self.session.get(
self.url,
params={"service": service.lower(), "username": self.username, "password": self.password, "kid": kid},
)
data = response.json()
if data.get("status_code") != 200 or not data.get("keys"):
return None
return data["keys"][0]["key"]
def get_keys(self, service: str) -> Iterator[tuple[str, str]]:
if self.api_mode == "json":
# JSON API doesn't support getting all keys, so return empty iterator
# This will cause the copy command to rely on the API's internal duplicate handling
return iter([])
else: # query mode
response = self.session.get(
self.url, params={"service": service.lower(), "username": self.username, "password": self.password}
)
data = response.json()
if data.get("status_code") != 200 or not data.get("keys"):
return
for key_entry in data["keys"]:
yield key_entry["kid"], key_entry["key"]
def add_key(self, service: str, kid: Union[UUID, str], key: str) -> bool:
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
if isinstance(kid, UUID):
kid = kid.hex
title = getattr(self, "current_title", None)
if self.api_mode == "json":
try:
response = self.request(
"InsertKey",
{
"kid": kid,
"key": key,
"service": service.lower(),
"title": title,
},
)
if response.get("status") == "not_found":
return False
return response.get("inserted", False)
except Exception:
return False
else: # query mode
response = self.session.get(
self.url,
params={
"service": service.lower(),
"username": self.username,
"password": self.password,
"kid": kid,
"key": key,
"title": title,
},
)
data = response.json()
return data.get("status_code") == 200
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
for kid, key in kid_keys.items():
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
processed_kid_keys = {
str(kid).replace("-", "") if isinstance(kid, UUID) else kid: key for kid, key in kid_keys.items()
}
inserted_count = 0
title = getattr(self, "current_title", None)
if self.api_mode == "json":
for kid, key in processed_kid_keys.items():
try:
response = self.request(
"InsertKey",
{
"kid": kid,
"key": key,
"service": service.lower(),
"title": title,
},
)
if response.get("status") == "not_found":
continue
if response.get("inserted", False):
inserted_count += 1
except Exception:
continue
else: # query mode
for kid, key in processed_kid_keys.items():
response = self.session.get(
self.url,
params={
"service": service.lower(),
"username": self.username,
"password": self.password,
"kid": kid,
"key": key,
"title": title,
},
)
data = response.json()
if data.get("status_code") == 200 and data.get("inserted", True):
inserted_count += 1
return inserted_count
def get_services(self) -> Iterator[str]:
if self.api_mode == "json":
try:
response = self.request("GetServices")
services = response.get("services", [])
for service in services:
yield service
except Exception:
return iter([])
else: # query mode
response = self.session.get(
self.url, params={"username": self.username, "password": self.password, "list_services": True}
)
data = response.json()
if data.get("status_code") != 200:
return
services = data.get("services", [])
for service in services:
yield service
def set_title(self, title: str):
"""
Set a title to be used for the next key insertions.
This is optional and will be sent with add_key requests if available.
"""
self.current_title = title
def insert_key_with_result(
self, service: str, kid: Union[UUID, str], key: str, title: Optional[str] = None
) -> InsertResult:
"""
Insert a key and return detailed result information.
This method provides more granular feedback than the standard add_key method.
Available in both API modes.
"""
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
if isinstance(kid, UUID):
kid = kid.hex
if title is None:
title = getattr(self, "current_title", None)
if self.api_mode == "json":
try:
response = self.request(
"InsertKey",
{
"kid": kid,
"key": key,
"service": service.lower(),
"title": title,
},
)
if response.get("status") == "not_found":
return InsertResult.FAILURE
if response.get("inserted", False):
return InsertResult.SUCCESS
else:
return InsertResult.ALREADY_EXISTS
except Exception:
return InsertResult.FAILURE
else: # query mode
response = self.session.get(
self.url,
params={
"service": service.lower(),
"username": self.username,
"password": self.password,
"kid": kid,
"key": key,
"title": title,
},
)
try:
data = response.json()
if data.get("status_code") == 200:
if data.get("inserted", True):
return InsertResult.SUCCESS
else:
return InsertResult.ALREADY_EXISTS
else:
return InsertResult.FAILURE
except Exception:
return InsertResult.FAILURE

244
unshackle/vaults/MySQL.py Normal file
View File

@@ -0,0 +1,244 @@
import threading
from typing import Iterator, Optional, Union
from uuid import UUID
import pymysql
from pymysql.cursors import DictCursor
from unshackle.core.services import Services
from unshackle.core.vault import Vault
class MySQL(Vault):
"""Key Vault using a remotely-accessed mysql database connection."""
def __init__(self, name: str, host: str, database: str, username: str, **kwargs):
"""
All extra arguments provided via **kwargs will be sent to pymysql.connect.
This can be used to provide more specific connection information.
"""
super().__init__(name)
self.slug = f"{host}:{database}:{username}"
self.conn_factory = ConnectionFactory(
dict(host=host, db=database, user=username, cursorclass=DictCursor, **kwargs)
)
self.permissions = self.get_permissions()
if not self.has_permission("SELECT"):
raise PermissionError(f"MySQL vault {self.slug} has no SELECT permission.")
def get_key(self, kid: Union[UUID, str], service: str) -> Optional[str]:
if not self.has_table(service):
# no table, no key, simple
return None
if isinstance(kid, UUID):
kid = kid.hex
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
# TODO: SQL injection risk
f"SELECT `id`, `key_` FROM `{service}` WHERE `kid`=%s AND `key_`!=%s",
(kid, "0" * 32),
)
cek = cursor.fetchone()
if not cek:
return None
return cek["key_"]
finally:
cursor.close()
def get_keys(self, service: str) -> Iterator[tuple[str, str]]:
if not self.has_table(service):
# no table, no keys, simple
return None
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
# TODO: SQL injection risk
f"SELECT `kid`, `key_` FROM `{service}` WHERE `key_`!=%s",
("0" * 32,),
)
for row in cursor.fetchall():
yield row["kid"], row["key_"]
finally:
cursor.close()
def add_key(self, service: str, kid: Union[UUID, str], key: str) -> bool:
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
if not self.has_permission("INSERT", table=service):
raise PermissionError(f"MySQL vault {self.slug} has no INSERT permission.")
if not self.has_table(service):
try:
self.create_table(service)
except PermissionError:
return False
if isinstance(kid, UUID):
kid = kid.hex
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
# TODO: SQL injection risk
f"SELECT `id` FROM `{service}` WHERE `kid`=%s AND `key_`=%s",
(kid, key),
)
if cursor.fetchone():
# table already has this exact KID:KEY stored
return True
cursor.execute(
# TODO: SQL injection risk
f"INSERT INTO `{service}` (kid, key_) VALUES (%s, %s)",
(kid, key),
)
finally:
conn.commit()
cursor.close()
return True
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
for kid, key in kid_keys.items():
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
if not self.has_permission("INSERT", table=service):
raise PermissionError(f"MySQL vault {self.slug} has no INSERT permission.")
if not self.has_table(service):
try:
self.create_table(service)
except PermissionError:
return 0
if not isinstance(kid_keys, dict):
raise ValueError(f"The kid_keys provided is not a dictionary, {kid_keys!r}")
if not all(isinstance(kid, (str, UUID)) and isinstance(key_, str) for kid, key_ in kid_keys.items()):
raise ValueError("Expecting dict with Key of str/UUID and value of str.")
if any(isinstance(kid, UUID) for kid, key_ in kid_keys.items()):
kid_keys = {kid.hex if isinstance(kid, UUID) else kid: key_ for kid, key_ in kid_keys.items()}
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.executemany(
# TODO: SQL injection risk
f"INSERT IGNORE INTO `{service}` (kid, key_) VALUES (%s, %s)",
kid_keys.items(),
)
return cursor.rowcount
finally:
conn.commit()
cursor.close()
def get_services(self) -> Iterator[str]:
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute("SHOW TABLES")
for table in cursor.fetchall():
# each entry has a key named `Tables_in_<db name>`
yield Services.get_tag(list(table.values())[0])
finally:
cursor.close()
def has_table(self, name: str) -> bool:
"""Check if the Vault has a Table with the specified name."""
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
"SELECT count(TABLE_NAME) FROM information_schema.TABLES WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s",
(conn.db, name),
)
return list(cursor.fetchone().values())[0] == 1
finally:
cursor.close()
def create_table(self, name: str):
"""Create a Table with the specified name if not yet created."""
if self.has_table(name):
return
if not self.has_permission("CREATE"):
raise PermissionError(f"MySQL vault {self.slug} has no CREATE permission.")
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
# TODO: SQL injection risk
f"""
CREATE TABLE IF NOT EXISTS {name} (
id int AUTO_INCREMENT PRIMARY KEY,
kid VARCHAR(64) NOT NULL,
key_ VARCHAR(64) NOT NULL,
UNIQUE(kid, key_)
);
"""
)
finally:
conn.commit()
cursor.close()
def get_permissions(self) -> list:
"""Get and parse Grants to a more easily usable list tuple array."""
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute("SHOW GRANTS")
grants = cursor.fetchall()
grants = [next(iter(x.values())) for x in grants]
grants = [tuple(x[6:].split(" TO ")[0].split(" ON ")) for x in list(grants)]
grants = [
(
list(map(str.strip, perms.replace("ALL PRIVILEGES", "*").split(","))),
location.replace("`", "").split("."),
)
for perms, location in grants
]
return grants
finally:
conn.commit()
cursor.close()
def has_permission(self, operation: str, database: Optional[str] = None, table: Optional[str] = None) -> bool:
"""Check if the current connection has a specific permission."""
grants = [x for x in self.permissions if x[0] == ["*"] or operation.upper() in x[0]]
if grants and database:
grants = [x for x in grants if x[1][0] in (database, "*")]
if grants and table:
grants = [x for x in grants if x[1][1] in (table, "*")]
return bool(grants)
class ConnectionFactory:
def __init__(self, con: dict):
self._con = con
self._store = threading.local()
def _create_connection(self) -> pymysql.Connection:
return pymysql.connect(**self._con)
def get(self) -> pymysql.Connection:
if not hasattr(self._store, "conn"):
self._store.conn = self._create_connection()
return self._store.conn

179
unshackle/vaults/SQLite.py Normal file
View File

@@ -0,0 +1,179 @@
import sqlite3
import threading
from pathlib import Path
from sqlite3 import Connection
from typing import Iterator, Optional, Union
from uuid import UUID
from unshackle.core.services import Services
from unshackle.core.vault import Vault
class SQLite(Vault):
"""Key Vault using a locally-accessed sqlite DB file."""
def __init__(self, name: str, path: Union[str, Path]):
super().__init__(name)
self.path = Path(path).expanduser()
# TODO: Use a DictCursor or such to get fetches as dict?
self.conn_factory = ConnectionFactory(self.path)
def get_key(self, kid: Union[UUID, str], service: str) -> Optional[str]:
if not self.has_table(service):
# no table, no key, simple
return None
if isinstance(kid, UUID):
kid = kid.hex
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(f"SELECT `id`, `key_` FROM `{service}` WHERE `kid`=? AND `key_`!=?", (kid, "0" * 32))
cek = cursor.fetchone()
if not cek:
return None
return cek[1]
finally:
cursor.close()
def get_keys(self, service: str) -> Iterator[tuple[str, str]]:
if not self.has_table(service):
# no table, no keys, simple
return None
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(f"SELECT `kid`, `key_` FROM `{service}` WHERE `key_`!=?", ("0" * 32,))
for kid, key_ in cursor.fetchall():
yield kid, key_
finally:
cursor.close()
def add_key(self, service: str, kid: Union[UUID, str], key: str) -> bool:
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
if not self.has_table(service):
self.create_table(service)
if isinstance(kid, UUID):
kid = kid.hex
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
# TODO: SQL injection risk
f"SELECT `id` FROM `{service}` WHERE `kid`=? AND `key_`=?",
(kid, key),
)
if cursor.fetchone():
# table already has this exact KID:KEY stored
return True
cursor.execute(
# TODO: SQL injection risk
f"INSERT INTO `{service}` (kid, key_) VALUES (?, ?)",
(kid, key),
)
finally:
conn.commit()
cursor.close()
return True
def add_keys(self, service: str, kid_keys: dict[Union[UUID, str], str]) -> int:
for kid, key in kid_keys.items():
if not key or key.count("0") == len(key):
raise ValueError("You cannot add a NULL Content Key to a Vault.")
if not self.has_table(service):
self.create_table(service)
if not isinstance(kid_keys, dict):
raise ValueError(f"The kid_keys provided is not a dictionary, {kid_keys!r}")
if not all(isinstance(kid, (str, UUID)) and isinstance(key_, str) for kid, key_ in kid_keys.items()):
raise ValueError("Expecting dict with Key of str/UUID and value of str.")
if any(isinstance(kid, UUID) for kid, key_ in kid_keys.items()):
kid_keys = {kid.hex if isinstance(kid, UUID) else kid: key_ for kid, key_ in kid_keys.items()}
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.executemany(
# TODO: SQL injection risk
f"INSERT OR IGNORE INTO `{service}` (kid, key_) VALUES (?, ?)",
kid_keys.items(),
)
return cursor.rowcount
finally:
conn.commit()
cursor.close()
def get_services(self) -> Iterator[str]:
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
for (name,) in cursor.fetchall():
if name != "sqlite_sequence":
yield Services.get_tag(name)
finally:
cursor.close()
def has_table(self, name: str) -> bool:
"""Check if the Vault has a Table with the specified name."""
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute("SELECT count(name) FROM sqlite_master WHERE type='table' AND name=?", (name,))
return cursor.fetchone()[0] == 1
finally:
cursor.close()
def create_table(self, name: str):
"""Create a Table with the specified name if not yet created."""
if self.has_table(name):
return
conn = self.conn_factory.get()
cursor = conn.cursor()
try:
cursor.execute(
# TODO: SQL injection risk
f"""
CREATE TABLE IF NOT EXISTS {name} (
"id" INTEGER NOT NULL UNIQUE,
"kid" TEXT NOT NULL COLLATE NOCASE,
"key_" TEXT NOT NULL COLLATE NOCASE,
PRIMARY KEY("id" AUTOINCREMENT),
UNIQUE("kid", "key_")
);
"""
)
finally:
conn.commit()
cursor.close()
class ConnectionFactory:
def __init__(self, path: Union[str, Path]):
self._path = path
self._store = threading.local()
def _create_connection(self) -> Connection:
return sqlite3.connect(self._path)
def get(self) -> Connection:
if not hasattr(self._store, "conn"):
self._store.conn = self._create_connection()
return self._store.conn

View File