diff --git a/pyproject.toml b/pyproject.toml index 4dc7f7f..a91199c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ dependencies = [ "httpx>=0.28.1,<0.29", "cryptography>=45.0.0", "subby", + "pysubs2>=1.7.0,<2", ] [project.urls] diff --git a/unshackle/core/tracks/subtitle.py b/unshackle/core/tracks/subtitle.py index d2a5cad..aad937a 100644 --- a/unshackle/core/tracks/subtitle.py +++ b/unshackle/core/tracks/subtitle.py @@ -10,6 +10,7 @@ from pathlib import Path from typing import Any, Callable, Iterable, Optional, Union import pycaption +import pysubs2 import requests from construct import Container from pycaption import Caption, CaptionList, CaptionNode, WebVTTReader @@ -33,6 +34,9 @@ class Subtitle(Track): TimedTextMarkupLang = "TTML" # https://wikipedia.org/wiki/Timed_Text_Markup_Language WebVTT = "VTT" # https://wikipedia.org/wiki/WebVTT SAMI = "SMI" # https://wikipedia.org/wiki/SAMI + MicroDVD = "SUB" # https://wikipedia.org/wiki/MicroDVD + MPL2 = "MPL2" # MPL2 subtitle format + TMP = "TMP" # TMP subtitle format # MPEG-DASH box-encapsulated subtitle formats fTTML = "STPP" # https://www.w3.org/TR/2018/REC-ttml-imsc1.0.1-20180424 fVTT = "WVTT" # https://www.w3.org/TR/webvtt1 @@ -56,6 +60,12 @@ class Subtitle(Track): return Subtitle.Codec.WebVTT elif mime in ("smi", "sami"): return Subtitle.Codec.SAMI + elif mime in ("sub", "microdvd"): + return Subtitle.Codec.MicroDVD + elif mime == "mpl2": + return Subtitle.Codec.MPL2 + elif mime == "tmp": + return Subtitle.Codec.TMP elif mime == "stpp": return Subtitle.Codec.fTTML elif mime == "wvtt": @@ -391,15 +401,67 @@ class Subtitle(Track): # Fall back to existing conversion method on any error return self._convert_standard(codec) + def convert_with_pysubs2(self, codec: Subtitle.Codec) -> Path: + """ + Convert subtitle using pysubs2 library for broad format support. + + pysubs2 is a pure-Python library supporting SubRip (SRT), SubStation Alpha + (SSA/ASS), WebVTT, TTML, SAMI, MicroDVD, MPL2, and TMP formats. + """ + if not self.path or not self.path.exists(): + raise ValueError("You must download the subtitle track first.") + + if self.codec == codec: + return self.path + + output_path = self.path.with_suffix(f".{codec.value.lower()}") + original_path = self.path + + codec_to_pysubs2_format = { + Subtitle.Codec.SubRip: "srt", + Subtitle.Codec.SubStationAlpha: "ssa", + Subtitle.Codec.SubStationAlphav4: "ass", + Subtitle.Codec.WebVTT: "vtt", + Subtitle.Codec.TimedTextMarkupLang: "ttml", + Subtitle.Codec.SAMI: "sami", + Subtitle.Codec.MicroDVD: "microdvd", + Subtitle.Codec.MPL2: "mpl2", + Subtitle.Codec.TMP: "tmp", + } + + pysubs2_output_format = codec_to_pysubs2_format.get(codec) + if pysubs2_output_format is None: + return self._convert_standard(codec) + + try: + subs = pysubs2.load(str(self.path), encoding="utf-8") + + subs.save(str(output_path), format_=pysubs2_output_format, encoding="utf-8") + + if original_path.exists() and original_path != output_path: + original_path.unlink() + + self.path = output_path + self.codec = codec + + if callable(self.OnConverted): + self.OnConverted(codec) + + return output_path + + except Exception: + return self._convert_standard(codec) + def convert(self, codec: Subtitle.Codec) -> Path: """ Convert this Subtitle to another Format. The conversion method is determined by the 'conversion_method' setting in config: - - 'auto' (default): Uses subby for WebVTT/SAMI, standard for others + - 'auto' (default): Uses pysubs2 (supports SRT/SSA/ASS/WebVTT/TTML/SAMI) - 'subby': Always uses subby with CommonIssuesFixer - 'subtitleedit': Uses SubtitleEdit when available, falls back to pycaption - 'pycaption': Uses only pycaption library + - 'pysubs2': Uses pysubs2 library (same as auto) """ # Check configuration for conversion method conversion_method = config.subtitle.get("conversion_method", "auto") @@ -407,15 +469,13 @@ class Subtitle(Track): if conversion_method == "subby": return self.convert_with_subby(codec) elif conversion_method == "subtitleedit": - return self._convert_standard(codec) # SubtitleEdit is used in standard conversion + return self._convert_standard(codec) elif conversion_method == "pycaption": return self._convert_pycaption_only(codec) + elif conversion_method == "pysubs2": + return self.convert_with_pysubs2(codec) elif conversion_method == "auto": - # Use subby for formats it handles better - if self.codec in (Subtitle.Codec.WebVTT, Subtitle.Codec.SAMI): - return self.convert_with_subby(codec) - else: - return self._convert_standard(codec) + return self.convert_with_pysubs2(codec) else: return self._convert_standard(codec) diff --git a/uv.lock b/uv.lock index 1cea081..f73edd4 100644 --- a/uv.lock +++ b/uv.lock @@ -1174,6 +1174,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725, upload-time = "2019-09-20T02:06:22.938Z" }, ] +[[package]] +name = "pysubs2" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/4a/becf78d9d3df56e6c4a9c50b83794e5436b6c5ab6dd8a3f934e94c89338c/pysubs2-1.8.0.tar.gz", hash = "sha256:3397bb58a4a15b1325ba2ae3fd4d7c214e2c0ddb9f33190d6280d783bb433b20", size = 1130048, upload-time = "2024-12-24T12:39:47.769Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/09/0fc0719162e5ad723f71d41cf336f18b6b5054d70dc0fe42ace6b4d2bdc9/pysubs2-1.8.0-py3-none-any.whl", hash = "sha256:05716f5039a9ebe32cd4d7673f923cf36204f3a3e99987f823ab83610b7035a0", size = 43516, upload-time = "2024-12-24T12:39:44.469Z" }, +] + [[package]] name = "pywidevine" version = "1.8.0" @@ -1523,6 +1532,7 @@ dependencies = [ { name = "pymp4" }, { name = "pymysql" }, { name = "pyplayready" }, + { name = "pysubs2" }, { name = "pywidevine", extra = ["serve"] }, { name = "pyyaml" }, { name = "requests", extra = ["socks"] }, @@ -1562,7 +1572,7 @@ requires-dist = [ { name = "httpx", specifier = ">=0.28.1,<0.29" }, { name = "jsonpickle", specifier = ">=3.0.4,<4" }, { name = "langcodes", specifier = ">=3.4.0,<4" }, - { name = "lxml", specifier = ">=5.2.1,<6" }, + { name = "lxml", specifier = ">=5.2.1,<7" }, { name = "pproxy", specifier = ">=2.7.9,<3" }, { name = "protobuf", specifier = ">=4.25.3,<5" }, { name = "pycaption", specifier = ">=2.2.6,<3" }, @@ -1572,6 +1582,7 @@ requires-dist = [ { name = "pymp4", specifier = ">=1.4.0,<2" }, { name = "pymysql", specifier = ">=1.1.0,<2" }, { name = "pyplayready", specifier = ">=0.6.0,<0.7" }, + { name = "pysubs2", specifier = ">=1.7.0,<2" }, { name = "pywidevine", extras = ["serve"], specifier = ">=1.8.0,<2" }, { name = "pyyaml", specifier = ">=6.0.1,<7" }, { name = "requests", extras = ["socks"], specifier = ">=2.31.0,<3" },