mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-03-23 18:22:09 +01:00
Compare commits
106 Commits
2022.08.08
...
2022.09.01
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
adba24d207 | ||
|
|
5d7c7d6569 | ||
|
|
d2c8aadf79 | ||
|
|
1ac7f46184 | ||
|
|
05deb747bb | ||
|
|
b505e8517a | ||
|
|
f2e9fa3ef7 | ||
|
|
50a399326f | ||
|
|
1ff88b7aec | ||
|
|
825d3ce386 | ||
|
|
92aa6d6883 | ||
|
|
b2a4db425b | ||
|
|
de49cdbe9d | ||
|
|
9f9c85dda4 | ||
|
|
11734714c2 | ||
|
|
b86ca447ce | ||
|
|
f8c7ba9984 | ||
|
|
76f2bb175d | ||
|
|
f26af78a8a | ||
|
|
bfbecd1174 | ||
|
|
9bd13fe5bb | ||
|
|
459262ac97 | ||
|
|
82ea226c61 | ||
|
|
da4db748fa | ||
|
|
e1eabd7beb | ||
|
|
d81ba7d491 | ||
|
|
5135ed3d4a | ||
|
|
c4b2df872d | ||
|
|
224b5a35f7 | ||
|
|
50ac0e5416 | ||
|
|
e0992d5558 | ||
|
|
5e01315aa1 | ||
|
|
4e4982ab5b | ||
|
|
89e4d86171 | ||
|
|
a1af516259 | ||
|
|
1d64a59547 | ||
|
|
ca7f8b8f31 | ||
|
|
164b03c486 | ||
|
|
e5458d1d88 | ||
|
|
b5e7a2e69d | ||
|
|
2516cafb28 | ||
|
|
fd404bec7e | ||
|
|
fe7866d0ed | ||
|
|
5314b52192 | ||
|
|
13db4e7b9e | ||
|
|
07275b708b | ||
|
|
b85703d11a | ||
|
|
992dc6b486 | ||
|
|
822d66e591 | ||
|
|
8d1ad6378f | ||
|
|
2d1019542a | ||
|
|
b25cac650f | ||
|
|
90a1df305b | ||
|
|
0a6b4b82e9 | ||
|
|
1704c47ba8 | ||
|
|
b76e9cedb3 | ||
|
|
48c88e088c | ||
|
|
a831c2ea90 | ||
|
|
be13a6e525 | ||
|
|
8a3da4c68c | ||
|
|
4d37d4a77c | ||
|
|
7d3b98be4c | ||
|
|
2b3e43e247 | ||
|
|
f60ef66371 | ||
|
|
25836db6be | ||
|
|
587021cd9f | ||
|
|
580ce00782 | ||
|
|
2f1a299c50 | ||
|
|
f6ca640b12 | ||
|
|
3ce2933693 | ||
|
|
c200096c03 | ||
|
|
6d3e7424bf | ||
|
|
5c6d2ef9d1 | ||
|
|
460eb9c50e | ||
|
|
9fd03a1696 | ||
|
|
55937202b7 | ||
|
|
1e4fca9a87 | ||
|
|
49b4ceaedf | ||
|
|
d711839760 | ||
|
|
48732becfe | ||
|
|
6440c45ff3 | ||
|
|
ef6342bd07 | ||
|
|
e183bb8c9b | ||
|
|
7695f5a0a7 | ||
|
|
cb7cc448c0 | ||
|
|
63be30e3e0 | ||
|
|
43cf982ac3 | ||
|
|
7e82397441 | ||
|
|
66c4afd828 | ||
|
|
0e0ce898f6 | ||
|
|
a6125983ab | ||
|
|
8f84770acd | ||
|
|
62b58c0936 | ||
|
|
8f53dc44a0 | ||
|
|
1cddfdc52b | ||
|
|
cea4b857f0 | ||
|
|
ffcd62c289 | ||
|
|
a1c5bd82ec | ||
|
|
5da42f2b9b | ||
|
|
1155ecef29 | ||
|
|
96623ab5c6 | ||
|
|
7e798d725e | ||
|
|
8420a4d063 | ||
|
|
b5e9a641f5 | ||
|
|
c220d9efc8 | ||
|
|
81e0195998 |
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a broken site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -62,7 +62,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.08 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -70,8 +70,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.08, Current version: 2022.08.08
|
||||
yt-dlp is up to date (2022.08.08)
|
||||
Latest version: 2022.09.01, Current version: 2022.09.01
|
||||
yt-dlp is up to date (2022.09.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -74,7 +74,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.08 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -82,8 +82,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.08, Current version: 2022.08.08
|
||||
yt-dlp is up to date (2022.08.08)
|
||||
Latest version: 2022.09.01, Current version: 2022.09.01
|
||||
yt-dlp is up to date (2022.09.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm requesting a site-specific feature
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -70,7 +70,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.08 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -78,8 +78,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.08, Current version: 2022.08.08
|
||||
yt-dlp is up to date (2022.08.08)
|
||||
Latest version: 2022.09.01, Current version: 2022.09.01
|
||||
yt-dlp is up to date (2022.09.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
@@ -18,7 +18,7 @@ body:
|
||||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
@@ -55,7 +55,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.08 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -63,8 +63,8 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.08, Current version: 2022.08.08
|
||||
yt-dlp is up to date (2022.08.08)
|
||||
Latest version: 2022.09.01, Current version: 2022.09.01
|
||||
yt-dlp is up to date (2022.09.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
@@ -20,7 +20,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -51,7 +51,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.08 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -59,7 +59,7 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.08, Current version: 2022.08.08
|
||||
yt-dlp is up to date (2022.08.08)
|
||||
Latest version: 2022.09.01, Current version: 2022.09.01
|
||||
yt-dlp is up to date (2022.09.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
@@ -26,7 +26,7 @@ body:
|
||||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2022.08.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
@@ -57,7 +57,7 @@ body:
|
||||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2022.08.08 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
@@ -65,7 +65,7 @@ body:
|
||||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2022.08.08, Current version: 2022.08.08
|
||||
yt-dlp is up to date (2022.08.08)
|
||||
Latest version: 2022.09.01, Current version: 2022.09.01
|
||||
yt-dlp is up to date (2022.09.01)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
||||
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@@ -194,7 +194,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python -m pip install --upgrade pip setuptools wheel py2exe
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.2-py3-none-any.whl" -r requirements.txt
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@@ -230,7 +230,7 @@ jobs:
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.2-py3-none-any.whl" -r requirements.txt
|
||||
pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.3-py3-none-any.whl" -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@@ -257,7 +257,7 @@ jobs:
|
||||
|
||||
- name: Get Changelog
|
||||
run: |
|
||||
changelog=$(grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)' Changelog.md) || true
|
||||
changelog=$(grep -oPz '(?s)(?<=### ${{ needs.prepare.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)' Changelog.md) || true
|
||||
echo "changelog<<EOF" >> $GITHUB_ENV
|
||||
echo "$changelog" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
||||
@@ -195,7 +195,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
# * A value
|
||||
# * MD5 checksum; start the string with md5:
|
||||
# * A regular expression; start the string with re:
|
||||
# * Any Python type (for example int or float)
|
||||
# * Any Python type, e.g. int or float
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -261,7 +261,7 @@ The aforementioned metafields are the critical data that the extraction does not
|
||||
|
||||
For pornographic sites, appropriate `age_limit` must also be returned.
|
||||
|
||||
The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet.
|
||||
The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - e.g. when the video is a live stream that has not started yet.
|
||||
|
||||
[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
|
||||
|
||||
|
||||
14
CONTRIBUTORS
14
CONTRIBUTORS
@@ -294,3 +294,17 @@ haobinliang
|
||||
Mehavoid
|
||||
winterbird-code
|
||||
yashkc2025
|
||||
aldoridhoni
|
||||
bashonly
|
||||
jacobtruman
|
||||
masta79
|
||||
palewire
|
||||
cgrigis
|
||||
DavidH-2022
|
||||
dfaker
|
||||
jackyyf
|
||||
ohaiibuzzle
|
||||
SamantazFox
|
||||
shreyasminocha
|
||||
tejasa97
|
||||
xenov
|
||||
|
||||
112
Changelog.md
112
Changelog.md
@@ -11,6 +11,102 @@
|
||||
-->
|
||||
|
||||
|
||||
### 2022.09.01
|
||||
|
||||
* Add option `--use-extractors`
|
||||
* Merge youtube-dl: Upto [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7)
|
||||
* Add yt-dlp version to infojson
|
||||
* Fix `--break-per-url --max-downloads`
|
||||
* Fix bug in `--alias`
|
||||
* [cookies] Support firefox container in `--cookies-from-browser` by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
* [downloader/external] Smarter detection of executable
|
||||
* [extractor/generic] Don't return JW player without formats
|
||||
* [FormatSort] Fix `aext` for `--prefer-free-formats`
|
||||
* [jsinterp] Various improvements by [pukkandan](https://github.com/pukkandan), [dirkf](https://github.com/dirkf), [elyse0](https://github.com/elyse0)
|
||||
* [cache] Mechanism to invalidate old cache
|
||||
* [utils] Add `deprecation_warning`
|
||||
* [utils] Add `orderedSet_from_options`
|
||||
* [utils] `Popen`: Restore `LD_LIBRARY_PATH` when using PyInstaller by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [build] `make tar` should not follow `DESTDIR` by [satan1st](https://github.com/satan1st)
|
||||
* [build] Update pyinstaller by [shirt-dev](https://github.com/shirt-dev)
|
||||
* [test] Fix `test_youtube_signature`
|
||||
* [cleanup] Misc fixes and cleanup by [DavidH-2022](https://github.com/DavidH-2022), [MrRawes](https://github.com/MrRawes), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/epoch] Add extractor by [tejasa97](https://github.com/tejasa97)
|
||||
* [extractor/eurosport] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/IslamChannel] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/newspicks] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [extractor/triller] Add extractor by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/VQQ] Add extractors by [elyse0](https://github.com/elyse0)
|
||||
* [extractor/youtube] Improvements to nsig extraction
|
||||
* [extractor/youtube] Fix bug in format sorting
|
||||
* [extractor/youtube] Update iOS Innertube clients by [SamantazFox](https://github.com/SamantazFox)
|
||||
* [extractor/youtube] Use device-specific user agent by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/youtube] Add `--compat-option no-youtube-prefer-utc-upload-date` by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/arte] Bug fix by [cgrigis](https://github.com/cgrigis)
|
||||
* [extractor/bilibili] Extract `flac` with premium account by [jackyyf](https://github.com/jackyyf)
|
||||
* [extractor/BiliBiliSearch] Don't sort by date
|
||||
* [extractor/BiliBiliSearch] Fix infinite loop
|
||||
* [extractor/bitchute] Mark errors as expected
|
||||
* [extractor/crunchyroll:beta] Use anonymous access by [tejing1](https://github.com/tejing1)
|
||||
* [extractor/huya] Fix stream extraction by [ohaiibuzzle](https://github.com/ohaiibuzzle)
|
||||
* [extractor/medaltv] Fix extraction by [xenova](https://github.com/xenova)
|
||||
* [extractor/mediaset] Fix embed extraction
|
||||
* [extractor/mixcloud] All formats are audio-only
|
||||
* [extractor/rtbf] Fix jwt extraction by [elyse0](https://github.com/elyse0)
|
||||
* [extractor/screencastomatic] Support `--video-password` by [shreyasminocha](https://github.com/shreyasminocha)
|
||||
* [extractor/stripchat] Don't modify input URL by [dfaker](https://github.com/dfaker)
|
||||
* [extractor/uktv] Improve `_VALID_URL` by [dirkf](https://github.com/dirkf)
|
||||
* [extractor/vimeo:user] Fix `_VALID_URL`
|
||||
|
||||
|
||||
### 2022.08.19
|
||||
|
||||
* Fix bug in `--download-archive`
|
||||
* [jsinterp] **Fix for new youtube players** and related improvements by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
|
||||
* [phantomjs] Add function to execute JS without a DOM by [MinePlayersPE](https://github.com/MinePlayersPE), [pukkandan](https://github.com/pukkandan)
|
||||
* [build] Exclude devscripts from installs by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [cleanup] Misc fixes and cleanup
|
||||
* [extractor/youtube] **Add fallback to phantomjs** for nsig
|
||||
* [extractor/youtube] Fix error reporting of "Incomplete data"
|
||||
* [extractor/youtube] Improve format sorting for IOS formats
|
||||
* [extractor/youtube] Improve signature caching
|
||||
* [extractor/instagram] Fix extraction by [bashonly](https://github.com/bashonly), [pritam20ps05](https://github.com/pritam20ps05)
|
||||
* [extractor/rai] Minor fix by [nixxo](https://github.com/nixxo)
|
||||
* [extractor/rtbf] Fix stream extractor by [elyse0](https://github.com/elyse0)
|
||||
* [extractor/SovietsCloset] Fix extractor by [ChillingPepper](https://github.com/ChillingPepper)
|
||||
* [extractor/zattoo] Fix Zattoo resellers by [goggle](https://github.com/goggle)
|
||||
|
||||
### 2022.08.14
|
||||
|
||||
* Merge youtube-dl: Upto [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56)
|
||||
* [jsinterp] Handle **new youtube signature functions**
|
||||
* [jsinterp] Truncate error messages
|
||||
* [extractor] Fix format sorting of `channels`
|
||||
* [ffmpeg] Disable avconv unless `--prefer-avconv`
|
||||
* [ffmpeg] Smarter detection of ffprobe filename
|
||||
* [embedthumbnail] Detect `libatomicparsley.so`
|
||||
* [ThumbnailsConvertor] Fix conversion after `fixup_webp`
|
||||
* [utils] Fix `get_compatible_ext`
|
||||
* [build] Fix changelog
|
||||
* [update] Set executable bit-mask by [pukkandan](https://github.com/pukkandan), [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [devscripts] Fix import
|
||||
* [docs] Consistent use of `e.g.` by [Lesmiscore](https://github.com/Lesmiscore)
|
||||
* [cleanup] Misc fixes and cleanup
|
||||
* [extractor/moview] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
* [extractor/parler] Add extractor by [palewire](https://github.com/palewire)
|
||||
* [extractor/patreon] Ignore erroneous media attachments by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/truth] Add extractor by [palewire](https://github.com/palewire)
|
||||
* [extractor/aenetworks] Add formats parameter by [jacobtruman](https://github.com/jacobtruman)
|
||||
* [extractor/crunchyroll] Improve `_VALID_URL`s
|
||||
* [extractor/doodstream] Add `wf` domain by [aldoridhoni](https://github.com/aldoridhoni)
|
||||
* [extractor/facebook] Add reel support by [bashonly](https://github.com/bashonly)
|
||||
* [extractor/MLB] New extractor by [ischmidt20](https://github.com/ischmidt20)
|
||||
* [extractor/rai] Misc fixes by [nixxo](https://github.com/nixxo)
|
||||
* [extractor/toggo] Improve `_VALID_URL` by [masta79](https://github.com/masta79)
|
||||
* [extractor/tubitv] Extract additional formats by [shirt-dev](https://github.com/shirt-dev)
|
||||
* [extractor/zattoo] Potential fix for resellers
|
||||
|
||||
|
||||
### 2022.08.08
|
||||
|
||||
* **Remove Python 3.6 support**
|
||||
@@ -20,10 +116,10 @@
|
||||
* `--compat-option no-live-chat` should disable danmaku
|
||||
* Fix misleading DRM message
|
||||
* Import ctypes only when necessary
|
||||
* Minor bugfixes by [pukkandan](https://github.com/pukkandan)
|
||||
* Reject entire playlists faster with `--match-filter` by [pukkandan](https://github.com/pukkandan)
|
||||
* Minor bugfixes
|
||||
* Reject entire playlists faster with `--match-filter`
|
||||
* Remove filtered entries from `-J`
|
||||
* Standardize retry mechanism by [pukkandan](https://github.com/pukkandan)
|
||||
* Standardize retry mechanism
|
||||
* Validate `--merge-output-format`
|
||||
* [downloader] Add average speed to final progress line
|
||||
* [extractor] Add field `audio_channels`
|
||||
@@ -31,7 +127,7 @@
|
||||
* [ffmpeg] Set `ffmpeg_location` in a contextvar
|
||||
* [FFmpegThumbnailsConvertor] Fix conversion from GIF
|
||||
* [MetadataParser] Don't set `None` when the field didn't match
|
||||
* [outtmpl] Smarter replacing of unsupported characters by [pukkandan](https://github.com/pukkandan)
|
||||
* [outtmpl] Smarter replacing of unsupported characters
|
||||
* [outtmpl] Treat empty values as None in filenames
|
||||
* [utils] sanitize_open: Allow any IO stream as stdout
|
||||
* [build, devscripts] Add devscript to set a build variant
|
||||
@@ -64,7 +160,7 @@
|
||||
* [extractor/bbc] Fix news articles by [ajj8](https://github.com/ajj8)
|
||||
* [extractor/camtasia] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz)
|
||||
* [extractor/cloudflarestream] Fix video_id padding by [haobinliang](https://github.com/haobinliang)
|
||||
* [extractor/crunchyroll] Fix conversion of thumbnail from GIF by [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/crunchyroll] Fix conversion of thumbnail from GIF
|
||||
* [extractor/crunchyroll] Handle missing metadata correctly by [Burve](https://github.com/Burve), [pukkandan](https://github.com/pukkandan)
|
||||
* [extractor/crunchyroll:beta] Extract timestamp and fix tests by [tejing1](https://github.com/tejing1)
|
||||
* [extractor/crunchyroll:beta] Use streams API by [tejing1](https://github.com/tejing1)
|
||||
@@ -211,7 +307,7 @@
|
||||
|
||||
* [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119)
|
||||
* **Add option `--download-sections` to download video partially**
|
||||
* Chapter regex and time ranges are accepted (Eg: `--download-sections *1:10-2:20`)
|
||||
* Chapter regex and time ranges are accepted, e.g. `--download-sections *1:10-2:20`
|
||||
* Add option `--alias`
|
||||
* Add option `--lazy-playlist` to process entries as they are received
|
||||
* Add option `--retry-sleep`
|
||||
@@ -1375,7 +1471,7 @@
|
||||
|
||||
* Add new option `--netrc-location`
|
||||
* [outtmpl] Allow alternate fields using `,`
|
||||
* [outtmpl] Add format type `B` to treat the value as bytes (eg: to limit the filename to a certain number of bytes)
|
||||
* [outtmpl] Add format type `B` to treat the value as bytes, e.g. to limit the filename to a certain number of bytes
|
||||
* Separate the options `--ignore-errors` and `--no-abort-on-error`
|
||||
* Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
* [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao)
|
||||
@@ -1765,7 +1861,7 @@
|
||||
|
||||
* Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada)
|
||||
* Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments)
|
||||
* Add extractor option `skip` for `youtube`. Eg: `--extractor-args youtube:skip=hls,dash`
|
||||
* Add extractor option `skip` for `youtube`, e.g. `--extractor-args youtube:skip=hls,dash`
|
||||
* Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest`
|
||||
* Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options
|
||||
* [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
@@ -28,12 +28,12 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
||||
[](https://github.com/sponsors/coletdjnz)
|
||||
|
||||
* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
|
||||
* Added support for downloading YoutubeWebArchive videos
|
||||
* Added support for new websites MainStreaming, PRX, nzherald, etc
|
||||
* Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc
|
||||
* Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc
|
||||
|
||||
|
||||
|
||||
## [Ashish0804](https://github.com/Ashish0804)
|
||||
## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
|
||||
|
||||
[](https://ko-fi.com/ashish0804)
|
||||
|
||||
@@ -48,4 +48,5 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
||||
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
||||
|
||||
* Download live from start to end for YouTube
|
||||
* Added support for new websites mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
|
||||
* Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
|
||||
* Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc
|
||||
|
||||
3
Makefile
3
Makefile
@@ -33,7 +33,6 @@ completion-zsh: completions/zsh/_yt-dlp
|
||||
lazy-extractors: yt_dlp/extractor/lazy_extractors.py
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
DESTDIR ?= .
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
MANDIR ?= $(PREFIX)/man
|
||||
SHAREDIR ?= $(PREFIX)/share
|
||||
@@ -134,7 +133,7 @@ yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscrip
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
yt-dlp.tar.gz: all
|
||||
@tar -czf $(DESTDIR)/yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
--exclude '*.pyc' \
|
||||
|
||||
232
README.md
232
README.md
@@ -71,7 +71,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
||||
|
||||
# NEW FEATURES
|
||||
|
||||
* Merged with **youtube-dl v2021.12.17+ [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294177265ba35b45746dbb600965076ed150)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
* Merged with **youtube-dl v2021.12.17+ [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
@@ -141,12 +141,13 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
||||
* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
|
||||
* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||
* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
|
||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
|
||||
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
||||
* youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpful, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
|
||||
@@ -320,7 +321,7 @@ To build the standalone executable, you must have Python and `pyinstaller` (plus
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`.
|
||||
|
||||
Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
|
||||
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
|
||||
|
||||
@@ -329,7 +330,7 @@ You will need the build tools `python` (3.6+), `zip`, `make` (GNU), `pandoc`\* a
|
||||
|
||||
After installing these, simply run `make`.
|
||||
|
||||
You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The dependencies marked with **\*** are not needed for this)
|
||||
You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this)
|
||||
|
||||
### Standalone Py2Exe Builds (Windows)
|
||||
|
||||
@@ -375,8 +376,14 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--list-extractors List all supported extractors and exit
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors and exit
|
||||
--force-generic-extractor Force extraction to use the generic extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. Eg:
|
||||
--use-extractors NAMES Extractor names to use separated by commas.
|
||||
You can also use regexes, "all", "default"
|
||||
and "end" (end URL matching); e.g. --ies
|
||||
"holodex.*,end,youtube". Prefix the name
|
||||
with a "-" to exclude it, e.g. --ies
|
||||
default,-generic. Use --list-extractors for
|
||||
a list of extractor names. (Alias: --ies)
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. E.g.
|
||||
"gvsearch2:python" downloads two videos from
|
||||
google videos for the search term "python".
|
||||
Use the value "auto" to let yt-dlp guess
|
||||
@@ -425,7 +432,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
an alias starts with a dash "-", it is
|
||||
prefixed with "--". Arguments are parsed
|
||||
according to the Python string formatting
|
||||
mini-language. Eg: --alias get-audio,-X
|
||||
mini-language. E.g. --alias get-audio,-X
|
||||
"-S=aext:{0},abr -x --audio-format {0}"
|
||||
creates options "--get-audio" and "-X" that
|
||||
takes an argument (ARG0) and expands to
|
||||
@@ -439,10 +446,10 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To
|
||||
enable SOCKS proxy, specify a proper scheme.
|
||||
Eg: socks5://user:pass@127.0.0.1:1080/. Pass
|
||||
in an empty string (--proxy "") for direct
|
||||
connection
|
||||
enable SOCKS proxy, specify a proper scheme,
|
||||
e.g. socks5://user:pass@127.0.0.1:1080/.
|
||||
Pass in an empty string (--proxy "") for
|
||||
direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
@@ -471,17 +478,17 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
compatibility, START-STOP is also supported.
|
||||
Use negative indices to count from the right
|
||||
and negative STEP to download in reverse
|
||||
order. Eg: "-I 1:3,7,-5::2" used on a
|
||||
order. E.g. "-I 1:3,7,-5::2" used on a
|
||||
playlist of size 15 will download the videos
|
||||
at index 1,2,3,7,11,13,15
|
||||
--min-filesize SIZE Do not download any videos smaller than SIZE
|
||||
(e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE
|
||||
(e.g. 50k or 44.6m)
|
||||
--min-filesize SIZE Do not download any videos smaller than
|
||||
SIZE, e.g. 50k or 44.6M
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE,
|
||||
e.g. 50k or 44.6M
|
||||
--date DATE Download only videos uploaded on this date.
|
||||
The date can be "YYYYMMDD" or in the format
|
||||
[now|today|yesterday][-N[day|week|month|year]].
|
||||
Eg: --date today-2weeks
|
||||
E.g. --date today-2weeks
|
||||
--datebefore DATE Download only videos uploaded on or before
|
||||
this date. The date formats accepted is the
|
||||
same as --date
|
||||
@@ -498,7 +505,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
conditions. Use a "\" to escape "&" or
|
||||
quotes if needed. If used multiple times,
|
||||
the filter matches if atleast one of the
|
||||
conditions are met. Eg: --match-filter
|
||||
conditions are met. E.g. --match-filter
|
||||
!is_live --match-filter "like_count>?100 &
|
||||
description~='(?i)\bcats \& dogs\b'" matches
|
||||
only videos that are not live OR those that
|
||||
@@ -524,8 +531,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
a file that is in the archive
|
||||
--break-on-reject Stop the download process when encountering
|
||||
a file that has been filtered out
|
||||
--break-per-input Make --break-on-existing, --break-on-reject
|
||||
and --max-downloads act only on the current
|
||||
--break-per-input --break-on-existing, --break-on-reject,
|
||||
--max-downloads, and autonumber resets per
|
||||
input URL
|
||||
--no-break-per-input --break-on-existing and similar options
|
||||
terminates the entire download queue
|
||||
@@ -536,11 +543,11 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
-N, --concurrent-fragments N Number of fragments of a dash/hlsnative
|
||||
video that should be downloaded concurrently
|
||||
(default is 1)
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second,
|
||||
e.g. 50K or 4.2M
|
||||
--throttled-rate RATE Minimum download rate in bytes per second
|
||||
below which throttling is assumed and the
|
||||
video data is re-extracted (e.g. 100K)
|
||||
video data is re-extracted, e.g. 100K
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite"
|
||||
--file-access-retries RETRIES Number of times to retry on file access
|
||||
@@ -554,7 +561,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
be a number, linear=START[:END[:STEP=1]] or
|
||||
exp=START[:END[:BASE=2]]. This option can be
|
||||
used multiple times to set the sleep for the
|
||||
different retry types. Eg: --retry-sleep
|
||||
different retry types, e.g. --retry-sleep
|
||||
linear=1::2 --retry-sleep fragment:exp=1:20
|
||||
--skip-unavailable-fragments Skip unavailable fragments for DASH,
|
||||
hlsnative and ISM downloads (default)
|
||||
@@ -566,14 +573,14 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
downloading is finished
|
||||
--no-keep-fragments Delete downloaded fragments after
|
||||
downloading is finished (default)
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
--buffer-size SIZE Size of download buffer, e.g. 1024 or 16K
|
||||
(default is 1024)
|
||||
--resize-buffer The buffer size is automatically resized
|
||||
from an initial value of --buffer-size
|
||||
(default)
|
||||
--no-resize-buffer Do not automatically adjust the buffer size
|
||||
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
|
||||
downloading (e.g. 10485760 or 10M) (default
|
||||
downloading, e.g. 10485760 or 10M (default
|
||||
is disabled). May be useful for bypassing
|
||||
bandwidth throttling imposed by a webserver
|
||||
(experimental)
|
||||
@@ -598,10 +605,10 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
the given regular expression. Time ranges
|
||||
prefixed by a "*" can also be used in place
|
||||
of chapters to download the specified range.
|
||||
Eg: --download-sections "*10:15-15:00"
|
||||
--download-sections "intro". Needs ffmpeg.
|
||||
This option can be used multiple times to
|
||||
download multiple sections
|
||||
Needs ffmpeg. This option can be used
|
||||
multiple times to download multiple
|
||||
sections, e.g. --download-sections
|
||||
"*10:15-15:00" --download-sections "intro"
|
||||
--downloader [PROTO:]NAME Name or path of the external downloader to
|
||||
use (optionally) prefixed by the protocols
|
||||
(http, ftp, m3u8, dash, rstp, rtmp, mms) to
|
||||
@@ -609,7 +616,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
aria2c, avconv, axel, curl, ffmpeg, httpie,
|
||||
wget. You can use this option multiple times
|
||||
to set different downloaders for different
|
||||
protocols. For example, --downloader aria2c
|
||||
protocols. E.g. --downloader aria2c
|
||||
--downloader "dash,m3u8:native" will use
|
||||
aria2c for http/ftp downloads, and the
|
||||
native downloader for dash/m3u8 downloads
|
||||
@@ -700,18 +707,20 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
and dump cookie jar in
|
||||
--no-cookies Do not read/dump cookies from/to file
|
||||
(default)
|
||||
--cookies-from-browser BROWSER[+KEYRING][:PROFILE]
|
||||
The name of the browser and (optionally) the
|
||||
name/path of the profile to load cookies
|
||||
from, separated by a ":". Currently
|
||||
supported browsers are: brave, chrome,
|
||||
chromium, edge, firefox, opera, safari,
|
||||
vivaldi. By default, the most recently
|
||||
accessed profile is used. The keyring used
|
||||
for decrypting Chromium cookies on Linux can
|
||||
be (optionally) specified after the browser
|
||||
name separated by a "+". Currently supported
|
||||
keyrings are: basictext, gnomekeyring, kwallet
|
||||
--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]
|
||||
The name of the browser to load cookies
|
||||
from. Currently supported browsers are:
|
||||
brave, chrome, chromium, edge, firefox,
|
||||
opera, safari, vivaldi. Optionally, the
|
||||
KEYRING used for decrypting Chromium cookies
|
||||
on Linux, the name/path of the PROFILE to
|
||||
load cookies from, and the CONTAINER name
|
||||
(if Firefox) ("none" for no container) can
|
||||
be given with their respective seperators.
|
||||
By default, all containers of the most
|
||||
recently accessed profile are used.
|
||||
Currently supported keyrings are: basictext,
|
||||
gnomekeyring, kwallet
|
||||
--no-cookies-from-browser Do not load cookies from browser (default)
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information (such
|
||||
@@ -791,7 +800,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
"postprocess:", or "postprocess-title:".
|
||||
The video's fields are accessible under the
|
||||
"info" key and the progress attributes are
|
||||
accessible under "progress" key. E.g.:
|
||||
accessible under "progress" key. E.g.
|
||||
--console-title --progress-template
|
||||
"download-title:%(info.id)s-%(progress.eta)s"
|
||||
-v, --verbose Print various debugging information
|
||||
@@ -860,7 +869,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
-F, --list-formats List available formats of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--merge-output-format FORMAT Containers that may be used when merging
|
||||
formats, separated by "/" (Eg: "mp4/mkv").
|
||||
formats, separated by "/", e.g. "mp4/mkv".
|
||||
Ignored if no merge is required. (currently
|
||||
supported: avi, flv, mkv, mov, mp4, webm)
|
||||
|
||||
@@ -874,13 +883,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
--list-subs List available subtitles of each video.
|
||||
Simulate unless --no-simulate is used
|
||||
--sub-format FORMAT Subtitle format; accepts formats preference,
|
||||
Eg: "srt" or "ass/srt/best"
|
||||
e.g. "srt" or "ass/srt/best"
|
||||
--sub-langs LANGS Languages of the subtitles to download (can
|
||||
be regex) or "all" separated by commas. (Eg:
|
||||
--sub-langs "en.*,ja") You can prefix the
|
||||
be regex) or "all" separated by commas, e.g.
|
||||
--sub-langs "en.*,ja". You can prefix the
|
||||
language code with a "-" to exclude it from
|
||||
the requested languages. (Eg: --sub-langs
|
||||
all,-live_chat) Use --list-subs for a list
|
||||
the requested languages, e.g. --sub-langs
|
||||
all,-live_chat. Use --list-subs for a list
|
||||
of available language tags
|
||||
|
||||
## Authentication Options:
|
||||
@@ -929,7 +938,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
m4a, mka, mp3, ogg, opus, vorbis, wav). If
|
||||
target container does not support the
|
||||
video/audio codec, remuxing will fail. You
|
||||
can specify multiple rules; Eg.
|
||||
can specify multiple rules; e.g.
|
||||
"aac>m4a/mov>mp4/mkv" will remux aac to m4a,
|
||||
mov to mp4 and anything else to mkv
|
||||
--recode-video FORMAT Re-encode the video into another format if
|
||||
@@ -954,7 +963,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
|
||||
for ffmpeg/ffprobe, "_i"/"_o" can be
|
||||
appended to the prefix optionally followed
|
||||
by a number to pass the argument before the
|
||||
specified input/output file. Eg: --ppa
|
||||
specified input/output file, e.g. --ppa
|
||||
"Merger+ffmpeg_i1:-v quiet". You can use
|
||||
this option multiple times to give different
|
||||
arguments to different postprocessors.
|
||||
@@ -1081,7 +1090,7 @@ Make chapter entries for, or remove various segments (sponsor,
|
||||
music_offtopic, poi_highlight, all and
|
||||
default (=all). You can prefix the category
|
||||
with a "-" to exclude it. See [1] for
|
||||
description of the categories. Eg:
|
||||
description of the categories. E.g.
|
||||
--sponsorblock-mark all,-preview
|
||||
[1] https://wiki.sponsor.ajay.app/w/Segment_Categories
|
||||
--sponsorblock-remove CATS SponsorBlock categories to be removed from
|
||||
@@ -1140,7 +1149,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
|
||||
|
||||
1. **System Configuration**: `/etc/yt-dlp.conf`
|
||||
|
||||
For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
```
|
||||
# Lines starting with # are comments
|
||||
|
||||
@@ -1178,7 +1187,7 @@ After that you can add credentials for an extractor in the following format, whe
|
||||
```
|
||||
machine <extractor> login <username> password <password>
|
||||
```
|
||||
For example:
|
||||
E.g.
|
||||
```
|
||||
machine youtube login myaccount@gmail.com password my_youtube_password
|
||||
machine twitch login my_twitch_account_name password my_twitch_password
|
||||
@@ -1197,39 +1206,38 @@ The `-o` option is used to indicate a template for the output file names while `
|
||||
|
||||
The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing).
|
||||
|
||||
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
||||
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), e.g. `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
||||
|
||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. E.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||
|
||||
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||
|
||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
|
||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
|
||||
|
||||
1. **Alternatives**: Alternate fields can be specified separated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s`
|
||||
1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
|
||||
|
||||
1. **Replacement**: A replacement value can specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. E.g. `%(uploader|Unknown)s`
|
||||
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
|
||||
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
|
||||
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC
|
||||
|
||||
To summarize, the general syntax for a field is:
|
||||
```
|
||||
%(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type
|
||||
```
|
||||
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. For example, `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video.
|
||||
|
||||
The available fields are:
|
||||
|
||||
- `id` (string): Video identifier
|
||||
- `title` (string): Video title
|
||||
- `fulltitle` (string): Video title ignoring live timestamp and generic title
|
||||
- `url` (string): Video URL
|
||||
- `ext` (string): Video filename extension
|
||||
- `alt_title` (string): A secondary title of the video
|
||||
- `description` (string): The description of the video
|
||||
@@ -1264,26 +1272,6 @@ The available fields are:
|
||||
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
|
||||
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
|
||||
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
|
||||
- `format` (string): A human-readable description of the format
|
||||
- `format_id` (string): Format code specified by `--format`
|
||||
- `format_note` (string): Additional info about the format
|
||||
- `width` (numeric): Width of the video
|
||||
- `height` (numeric): Height of the video
|
||||
- `resolution` (string): Textual description of width and height
|
||||
- `tbr` (numeric): Average bitrate of audio and video in KBit/s
|
||||
- `abr` (numeric): Average audio bitrate in KBit/s
|
||||
- `acodec` (string): Name of the audio codec in use
|
||||
- `asr` (numeric): Audio sampling rate in Hertz
|
||||
- `vbr` (numeric): Average video bitrate in KBit/s
|
||||
- `fps` (numeric): Frame rate
|
||||
- `dynamic_range` (string): The dynamic range of the video
|
||||
- `audio_channels` (numeric): The number of audio channels
|
||||
- `stretched_ratio` (float): `width:height` of the video's pixels, if not square
|
||||
- `vcodec` (string): Name of the video codec in use
|
||||
- `container` (string): Name of the container format
|
||||
- `filesize` (numeric): The number of bytes, if known in advance
|
||||
- `filesize_approx` (numeric): An estimate for the number of bytes
|
||||
- `protocol` (string): The protocol that will be used for the actual download
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
@@ -1302,6 +1290,8 @@ The available fields are:
|
||||
- `webpage_url_basename` (string): The basename of the webpage URL
|
||||
- `webpage_url_domain` (string): The domain of the webpage URL
|
||||
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
|
||||
|
||||
All the fields in [Filtering Formats](#filtering-formats) can also be used
|
||||
|
||||
Available for the video that belongs to some logical chapter or section:
|
||||
|
||||
@@ -1358,13 +1348,13 @@ Available only in `--sponsorblock-chapter-title`:
|
||||
- `category_names` (list): Friendly names of the categories
|
||||
- `name` (string): Friendly name of the smallest category
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
||||
|
||||
Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||
|
||||
**Tip**: Look at the `-j` output to identify which fields are available for the particular URL
|
||||
|
||||
For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
|
||||
For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
|
||||
|
||||
Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
|
||||
|
||||
@@ -1383,13 +1373,13 @@ If you are using an output template inside a Windows batch file then you must es
|
||||
#### Output template examples
|
||||
|
||||
```bash
|
||||
$ yt-dlp --get-filename -o "test video.%(ext)s" BaW_jenozKc
|
||||
$ yt-dlp --print filename -o "test video.%(ext)s" BaW_jenozKc
|
||||
test video.webm # Literal name with correct extension
|
||||
|
||||
$ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc
|
||||
youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters
|
||||
|
||||
$ yt-dlp --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
$ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
|
||||
youtube-dl_test_video_.webm # Restricted file name
|
||||
|
||||
# Download YouTube playlist videos in separate directory indexed by video order in a playlist
|
||||
@@ -1434,7 +1424,7 @@ The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) wh
|
||||
**tl;dr:** [navigate me to examples](#format-selection-examples).
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
The simplest case is requesting a specific format; e.g. with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
|
||||
|
||||
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
|
||||
|
||||
@@ -1461,15 +1451,15 @@ For example, to download the worst quality video-only format you can use `-f wor
|
||||
|
||||
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
|
||||
|
||||
If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
|
||||
|
||||
You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg.
|
||||
You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed); e.g. `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg.
|
||||
|
||||
**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video
|
||||
|
||||
Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`.
|
||||
|
||||
## Filtering Formats
|
||||
|
||||
@@ -1478,6 +1468,7 @@ You can also filter the video formats by putting a condition in brackets, as in
|
||||
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
|
||||
|
||||
- `filesize`: The number of bytes, if known in advance
|
||||
- `filesize_approx`: An estimate for the number of bytes
|
||||
- `width`: Width of the video, if known
|
||||
- `height`: Height of the video, if known
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
@@ -1485,24 +1476,31 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
- `audio_channels`: The number of audio channels
|
||||
- `stretched_ratio`: `width:height` of the video's pixels, if not square
|
||||
|
||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields:
|
||||
|
||||
- `url`: Video URL
|
||||
- `ext`: File extension
|
||||
- `acodec`: Name of the audio codec in use
|
||||
- `vcodec`: Name of the video codec in use
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
- `dynamic_range`: The dynamic range of the video
|
||||
- `format_id`: A short description of the format
|
||||
- `format`: A human-readable description of the format
|
||||
- `format_note`: Additional info about the format
|
||||
- `resolution`: Textual description of width and height
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
|
||||
|
||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter. For example, `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
|
||||
Format selectors can also be grouped using parentheses, for example if you want to download the best pre-merged mp4 and webm formats with a height lower than 480 you can use `-f "(mp4,webm)[height<480]"`.
|
||||
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
|
||||
|
||||
## Sorting Formats
|
||||
|
||||
@@ -1521,7 +1519,7 @@ The available fields are:
|
||||
- `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other)
|
||||
- `codec`: Equivalent to `vcodec,acodec`
|
||||
- `vext`: Video Extension (`mp4` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred.
|
||||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`.
|
||||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
|
||||
- `ext`: Equivalent to `vext,aext`
|
||||
- `filesize`: Exact filesize, if known in advance
|
||||
- `fs_approx`: Approximate filesize calculated from the manifests
|
||||
@@ -1540,11 +1538,11 @@ The available fields are:
|
||||
|
||||
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
||||
|
||||
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||
|
||||
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
|
||||
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
|
||||
|
||||
Note that the default has `codec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
|
||||
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
|
||||
|
||||
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
||||
|
||||
@@ -1685,9 +1683,9 @@ Note that any field created by this can be used in the [output template](#output
|
||||
|
||||
This option also has a few special uses:
|
||||
|
||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
||||
* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description
|
||||
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
|
||||
* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values.
|
||||
|
||||
**Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes.
|
||||
|
||||
@@ -1746,21 +1744,19 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-"
|
||||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
The following extractors use this feature:
|
||||
|
||||
#### youtube
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (Eg: `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly)
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `innertube_host`: Innertube API host to use for all API requests
|
||||
* e.g. `studio.youtube.com`, `youtubei.googleapis.com`
|
||||
* Note: Cookies exported from `www.youtube.com` will not work with hosts other than `*.youtube.com`
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
@@ -1768,17 +1764,16 @@ The following extractors use this feature:
|
||||
* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off
|
||||
|
||||
#### funimation
|
||||
* `language`: Languages to extract. Eg: `funimation:language=english,japanese`
|
||||
* `language`: Languages to extract, e.g. `funimation:language=english,japanese`
|
||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||
|
||||
#### crunchyroll
|
||||
* `language`: Languages to extract. Eg: `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract. Eg: `crunchyroll:hardsub=None,enUS`
|
||||
* `language`: Languages to extract, e.g. `crunchyroll:language=jaJp`
|
||||
* `hardsub`: Which hard-sub versions to extract, e.g. `crunchyroll:hardsub=None,enUS`
|
||||
|
||||
#### crunchyrollbeta
|
||||
* `format`: Which stream type(s) to extract. Default is `adaptive_hls` Eg: `crunchyrollbeta:format=vo_adaptive_hls`
|
||||
* Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract. Default is `None` (no hardsubs). Eg: `crunchyrollbeta:hardsub=en-US,None`
|
||||
* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
|
||||
* `hardsub`: Preference order for which hardsub versions to extract (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
|
||||
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
@@ -1798,11 +1793,11 @@ The following extractors use this feature:
|
||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||
|
||||
#### tiktok
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`. (e.g. `20.2.1`)
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with. (e.g. `221`)
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
|
||||
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download. One of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`. (E.g. `rokfinchannel:tab=streams`)
|
||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
||||
|
||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||
@@ -2061,12 +2056,13 @@ While these options are redundant, they are still expected to be used due to the
|
||||
#### Not recommended
|
||||
While these options still work, their use is not recommended since there are other alternatives to achieve the same
|
||||
|
||||
--force-generic-extractor --ies generic,default
|
||||
--exec-before-download CMD --exec "before_dl:CMD"
|
||||
--no-exec-before-download --no-exec
|
||||
--all-formats -f all
|
||||
--all-subs --sub-langs all --write-subs
|
||||
--print-json -j --no-simulate
|
||||
--autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d
|
||||
--autonumber-size NUMBER Use string formatting, e.g. %(autonumber)03d
|
||||
--autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s
|
||||
--id -o "%(id)s.%(ext)s"
|
||||
--metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT"
|
||||
|
||||
1
devscripts/__init__.py
Normal file
1
devscripts/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Empty file needed to make devscripts.utils properly importable from outside
|
||||
@@ -11,14 +11,17 @@ from ..utils import (
|
||||
|
||||
# These bloat the lazy_extractors, so allow them to passthrough silently
|
||||
ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
|
||||
_WARNED = False
|
||||
|
||||
|
||||
class LazyLoadMetaClass(type):
|
||||
def __getattr__(cls, name):
|
||||
if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS:
|
||||
write_string(
|
||||
'WARNING: Falling back to normal extractor since lazy extractor '
|
||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
|
||||
global _WARNED
|
||||
if ('_real_class' not in cls.__dict__
|
||||
and name not in ALLOWED_CLASSMETHODS and not _WARNED):
|
||||
_WARNED = True
|
||||
write_string('WARNING: Falling back to normal extractor since lazy extractor '
|
||||
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
|
||||
return getattr(cls.real_class, name)
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,9 @@ from inspect import getsource
|
||||
from devscripts.utils import get_filename_args, read_file, write_file
|
||||
|
||||
NO_ATTR = object()
|
||||
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit']
|
||||
STATIC_CLASS_PROPERTIES = [
|
||||
'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit'
|
||||
]
|
||||
CLASS_METHODS = [
|
||||
'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
|
||||
]
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
if [ -z $1 ]; then
|
||||
if [ -z "$1" ]; then
|
||||
test_set='test'
|
||||
elif [ $1 = 'core' ]; then
|
||||
elif [ "$1" = 'core' ]; then
|
||||
test_set="-m not download"
|
||||
elif [ $1 = 'download' ]; then
|
||||
elif [ "$1" = 'download' ]; then
|
||||
test_set="-m download"
|
||||
else
|
||||
echo 'Invalid test type "'$1'". Use "core" | "download"'
|
||||
echo 'Invalid test type "'"$1"'". Use "core" | "download"'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@ def version_to_list(version):
|
||||
def dependency_options():
|
||||
# Due to the current implementation, these are auto-detected, but explicitly add them just in case
|
||||
dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi', 'websockets']
|
||||
excluded_modules = ['test', 'ytdlp_plugins', 'youtube_dl', 'youtube_dlc']
|
||||
excluded_modules = ('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts')
|
||||
|
||||
yield from (f'--hidden-import={module}' for module in dependencies)
|
||||
yield '--collect-submodules=websockets'
|
||||
|
||||
2
setup.py
2
setup.py
@@ -28,7 +28,7 @@ REQUIREMENTS = read_file('requirements.txt').splitlines()
|
||||
|
||||
def packages():
|
||||
if setuptools_available:
|
||||
return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins'))
|
||||
return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'))
|
||||
|
||||
return [
|
||||
'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat',
|
||||
|
||||
@@ -128,6 +128,8 @@
|
||||
- **bbc.co.uk:iplayer:group**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
|
||||
- **BBVTVLive**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
|
||||
- **BBVTVRecordings**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@@ -348,6 +350,8 @@
|
||||
- **ehftv**
|
||||
- **eHow**
|
||||
- **EinsUndEinsTV**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>]
|
||||
- **EinsUndEinsTVLive**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>]
|
||||
- **EinsUndEinsTVRecordings**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>]
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **EllenTube**
|
||||
@@ -360,6 +364,7 @@
|
||||
- **Engadget**
|
||||
- **Epicon**
|
||||
- **EpiconSeries**
|
||||
- **Epoch**
|
||||
- **Eporner**
|
||||
- **EroProfile**: [<abbr title="netrc machine"><em>eroprofile</em></abbr>]
|
||||
- **EroProfile:album**
|
||||
@@ -373,13 +378,17 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EuropeanTour**
|
||||
- **Eurosport**
|
||||
- **EUScreen**
|
||||
- **EWETV**: [<abbr title="netrc machine"><em>ewetv</em></abbr>]
|
||||
- **EWETVLive**: [<abbr title="netrc machine"><em>ewetv</em></abbr>]
|
||||
- **EWETVRecordings**: [<abbr title="netrc machine"><em>ewetv</em></abbr>]
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**: [<abbr title="netrc machine"><em>facebook</em></abbr>]
|
||||
- **facebook:reel**
|
||||
- **FacebookPluginsVideo**
|
||||
- **fancode:live**: [<abbr title="netrc machine"><em>fancode</em></abbr>]
|
||||
- **fancode:vod**: [<abbr title="netrc machine"><em>fancode</em></abbr>]
|
||||
@@ -453,6 +462,8 @@
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
- **GlattvisionTV**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>]
|
||||
- **GlattvisionTVLive**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>]
|
||||
- **GlattvisionTVRecordings**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>]
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**: [<abbr title="netrc machine"><em>globo</em></abbr>]
|
||||
- **GloboArticle**
|
||||
@@ -544,6 +555,8 @@
|
||||
- **iq.com**: International version of iQiyi
|
||||
- **iq.com:album**
|
||||
- **iqiyi**: [<abbr title="netrc machine"><em>iqiyi</em></abbr>] 爱奇艺
|
||||
- **IslamChannel**
|
||||
- **IslamChannelSeries**
|
||||
- **ITProTV**
|
||||
- **ITProTVCourse**
|
||||
- **ITTF**
|
||||
@@ -709,10 +722,13 @@
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
- **MLB**
|
||||
- **MLBTV**: [<abbr title="netrc machine"><em>mlb</em></abbr>]
|
||||
- **MLBVideo**
|
||||
- **MLSSoccer**
|
||||
- **Mnet**
|
||||
- **MNetTV**: [<abbr title="netrc machine"><em>mnettv</em></abbr>]
|
||||
- **MNetTVLive**: [<abbr title="netrc machine"><em>mnettv</em></abbr>]
|
||||
- **MNetTVRecordings**: [<abbr title="netrc machine"><em>mnettv</em></abbr>]
|
||||
- **MochaVideo**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
@@ -726,6 +742,7 @@
|
||||
- **MovieClips**
|
||||
- **MovieFap**
|
||||
- **Moviepilot**
|
||||
- **MoviewPlay**
|
||||
- **Moviezine**
|
||||
- **MovingImage**
|
||||
- **MSN**
|
||||
@@ -798,13 +815,16 @@
|
||||
- **netease:program**: 网易云音乐 - 电台节目
|
||||
- **netease:singer**: 网易云音乐 - 歌手
|
||||
- **netease:song**: 网易云音乐
|
||||
- **NetPlus**: [<abbr title="netrc machine"><em>netplus</em></abbr>]
|
||||
- **NetPlusTV**: [<abbr title="netrc machine"><em>netplus</em></abbr>]
|
||||
- **NetPlusTVLive**: [<abbr title="netrc machine"><em>netplus</em></abbr>]
|
||||
- **NetPlusTVRecordings**: [<abbr title="netrc machine"><em>netplus</em></abbr>]
|
||||
- **Netverse**
|
||||
- **NetversePlaylist**
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **Newgrounds:playlist**
|
||||
- **Newgrounds:user**
|
||||
- **NewsPicks**
|
||||
- **Newstube**
|
||||
- **Newsy**
|
||||
- **NextMedia**: 蘋果日報
|
||||
@@ -903,6 +923,8 @@
|
||||
- **orf:radio**
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>]
|
||||
- **OsnatelTVLive**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>]
|
||||
- **OsnatelTVRecordings**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>]
|
||||
- **OutsideTV**
|
||||
- **PacktPub**: [<abbr title="netrc machine"><em>packtpub</em></abbr>]
|
||||
- **PacktPubCourse**
|
||||
@@ -916,6 +938,7 @@
|
||||
- **ParamountNetwork**
|
||||
- **ParamountPlus**
|
||||
- **ParamountPlusSeries**
|
||||
- **Parler**: Posts on parler.com
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Parlview**
|
||||
- **Patreon**
|
||||
@@ -1009,6 +1032,8 @@
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>]
|
||||
- **QuantumTVLive**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>]
|
||||
- **QuantumTVRecordings**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>]
|
||||
- **Qub**
|
||||
- **R7**
|
||||
- **R7Article**
|
||||
@@ -1117,7 +1142,11 @@
|
||||
- **safari:course**: [<abbr title="netrc machine"><em>safari</em></abbr>] safaribooksonline.com online courses
|
||||
- **Saitosan**
|
||||
- **SAKTV**: [<abbr title="netrc machine"><em>saktv</em></abbr>]
|
||||
- **SAKTVLive**: [<abbr title="netrc machine"><em>saktv</em></abbr>]
|
||||
- **SAKTVRecordings**: [<abbr title="netrc machine"><em>saktv</em></abbr>]
|
||||
- **SaltTV**: [<abbr title="netrc machine"><em>salttv</em></abbr>]
|
||||
- **SaltTVLive**: [<abbr title="netrc machine"><em>salttv</em></abbr>]
|
||||
- **SaltTVRecordings**: [<abbr title="netrc machine"><em>salttv</em></abbr>]
|
||||
- **SampleFocus**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
@@ -1307,6 +1336,8 @@
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict**: (**Currently broken**)
|
||||
- **TravelChannel**
|
||||
- **Triller**: [<abbr title="netrc machine"><em>triller</em></abbr>]
|
||||
- **TrillerUser**: [<abbr title="netrc machine"><em>triller</em></abbr>]
|
||||
- **Trilulilu**
|
||||
- **Trovo**
|
||||
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
|
||||
@@ -1314,6 +1345,7 @@
|
||||
- **TrovoVod**
|
||||
- **TrueID**
|
||||
- **TruNews**
|
||||
- **Truth**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
- **TubeTuGraz**: [<abbr title="netrc machine"><em>tubetugraz</em></abbr>] tube.tugraz.at
|
||||
@@ -1481,6 +1513,8 @@
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **vqq:series**
|
||||
- **vqq:video**
|
||||
- **Vrak**
|
||||
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
||||
- **VrtNU**: [<abbr title="netrc machine"><em>vrtnu</em></abbr>] VrtNU.be
|
||||
@@ -1489,6 +1523,8 @@
|
||||
- **VShare**
|
||||
- **VTM**
|
||||
- **VTXTV**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>]
|
||||
- **VTXTVLive**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>]
|
||||
- **VTXTVRecordings**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>]
|
||||
- **VuClip**
|
||||
- **Vupload**
|
||||
- **VVVVID**
|
||||
@@ -1498,6 +1534,8 @@
|
||||
- **Wakanim**
|
||||
- **Walla**
|
||||
- **WalyTV**: [<abbr title="netrc machine"><em>walytv</em></abbr>]
|
||||
- **WalyTVLive**: [<abbr title="netrc machine"><em>walytv</em></abbr>]
|
||||
- **WalyTVRecordings**: [<abbr title="netrc machine"><em>walytv</em></abbr>]
|
||||
- **wasdtv:clip**
|
||||
- **wasdtv:record**
|
||||
- **wasdtv:stream**
|
||||
@@ -1584,7 +1622,7 @@
|
||||
- **youtube:clip**
|
||||
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
|
||||
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections (Eg: #songs)
|
||||
- **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs
|
||||
- **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies)
|
||||
- **youtube:playlist**: YouTube playlists
|
||||
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
|
||||
|
||||
@@ -668,7 +668,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
def test_prepare_outtmpl_and_filename(self):
|
||||
def test(tmpl, expected, *, info=None, **params):
|
||||
params['outtmpl'] = tmpl
|
||||
ydl = YoutubeDL(params)
|
||||
ydl = FakeYDL(params)
|
||||
ydl._num_downloads = 1
|
||||
self.assertEqual(ydl.validate_outtmpl(tmpl), None)
|
||||
|
||||
|
||||
@@ -105,11 +105,11 @@ def generator(test_case, tname):
|
||||
info_dict = tc.get('info_dict', {})
|
||||
params = tc.get('params', {})
|
||||
if not info_dict.get('id'):
|
||||
raise Exception('Test definition incorrect. \'id\' key is not present')
|
||||
raise Exception(f'Test {tname} definition incorrect - "id" key is not present')
|
||||
elif not info_dict.get('ext'):
|
||||
if params.get('skip_download') and params.get('ignore_no_formats_error'):
|
||||
continue
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. \'ext\' key is not present')
|
||||
raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file')
|
||||
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
@@ -161,7 +161,9 @@ def generator(test_case, tname):
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine) or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503):
|
||||
if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
|
||||
or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
|
||||
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
||||
raise
|
||||
|
||||
if try_num == RETRIES:
|
||||
|
||||
@@ -11,41 +11,46 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import contextlib
|
||||
import subprocess
|
||||
|
||||
from yt_dlp.utils import encodeArgument
|
||||
from yt_dlp.utils import Popen
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
try:
|
||||
_DEV_NULL = subprocess.DEVNULL
|
||||
except AttributeError:
|
||||
_DEV_NULL = open(os.devnull, 'wb')
|
||||
LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'
|
||||
|
||||
|
||||
class TestExecution(unittest.TestCase):
|
||||
def test_import(self):
|
||||
subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir)
|
||||
|
||||
def test_module_exec(self):
|
||||
subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
|
||||
stdout, stderr, returncode = Popen.run(
|
||||
[*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
print(stderr, file=sys.stderr)
|
||||
self.assertEqual(returncode, 0)
|
||||
return stdout.strip(), stderr.strip()
|
||||
|
||||
def test_main_exec(self):
|
||||
subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
self.run_yt_dlp()
|
||||
|
||||
def test_import(self):
|
||||
self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))
|
||||
|
||||
def test_module_exec(self):
|
||||
self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))
|
||||
|
||||
def test_cmdline_umlauts(self):
|
||||
p = subprocess.Popen(
|
||||
[sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'],
|
||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
_, stderr = self.run_yt_dlp(opts=('ä', '--version'))
|
||||
self.assertFalse(stderr)
|
||||
|
||||
def test_lazy_extractors(self):
|
||||
try:
|
||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
|
||||
cwd=rootDir, stdout=subprocess.DEVNULL)
|
||||
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
|
||||
|
||||
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
|
||||
self.assertFalse(stderr)
|
||||
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
|
||||
finally:
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove('yt_dlp/extractor/lazy_extractors.py')
|
||||
os.remove(LAZY_EXTRACTORS)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -7,8 +7,10 @@ import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import math
|
||||
import re
|
||||
|
||||
from yt_dlp.jsinterp import JSInterpreter
|
||||
from yt_dlp.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
@@ -19,6 +21,9 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function x3(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x3'), 42)
|
||||
|
||||
jsi = JSInterpreter('function x3(){42}')
|
||||
self.assertEqual(jsi.call_function('x3'), None)
|
||||
|
||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x5'), 42)
|
||||
|
||||
@@ -45,14 +50,32 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 ** 5}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
||||
self.assertEqual(jsi.call_function('f'), 17)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
|
||||
self.assertEqual(jsi.call_function('f'), 5)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 1 == 2}')
|
||||
self.assertEqual(jsi.call_function('f'), False)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
||||
self.assertFalse(jsi.call_function('f'))
|
||||
|
||||
def test_array_access(self):
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
||||
|
||||
def test_parens(self):
|
||||
@@ -62,6 +85,10 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
||||
self.assertEqual(jsi.call_function('f'), 9)
|
||||
|
||||
def test_quotes(self):
|
||||
jsi = JSInterpreter(R'function f(){return "a\"\\("}')
|
||||
self.assertEqual(jsi.call_function('f'), R'a"\(')
|
||||
|
||||
def test_assignments(self):
|
||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), 31)
|
||||
@@ -104,17 +131,33 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}''')
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
def test_builtins(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return NaN }
|
||||
''')
|
||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 86000)
|
||||
jsi = JSInterpreter('''
|
||||
function x(dt) { return new Date(dt) - 0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
||||
|
||||
def test_call(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return 2; }
|
||||
function y(a) { return x() + a; }
|
||||
function y(a) { return x() + (a?a:0); }
|
||||
function z() { return y(3); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('z'), 5)
|
||||
self.assertEqual(jsi.call_function('y'), 2)
|
||||
|
||||
def test_for_loop(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
||||
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
@@ -153,21 +196,53 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
def test_catch(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_finally(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_nested_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {try {
|
||||
try{throw 10} finally {throw 42}
|
||||
} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_for_loop_continue(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
|
||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_break(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
|
||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||
return 42 }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_literal_list(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { [1, 2, "asdf", [5, 6, 7]][3] }
|
||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
||||
|
||||
@@ -177,6 +252,162 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 7)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=5; return (a -= 1, a+=3, a); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 7)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_void(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return void 42; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), None)
|
||||
|
||||
def test_return_function(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [1, function(){return 1}][1] }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x')([]), 1)
|
||||
|
||||
def test_null(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return null; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [null >= 0, null <= 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [True, True])
|
||||
|
||||
def test_undefined(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return undefined === undefined; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), True)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return undefined; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return v; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined >= 0, undefined <= 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
||||
''')
|
||||
for y in jsi.call_function('x'):
|
||||
self.assertTrue(math.isnan(y))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return v**0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 1)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
|
||||
|
||||
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_object(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return {}; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), {})
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [42, 0])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a; return a?.qq; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
def test_regex(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; return a; }
|
||||
''')
|
||||
self.assertIsInstance(jsi.call_function('x'), re.Pattern)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').flags & re.I, re.I)
|
||||
|
||||
jsi = JSInterpreter(R'''
|
||||
function x() { let a=/,][}",],()}(\[)/; return a; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').pattern, r',][}",],()}(\[)')
|
||||
|
||||
def test_char_code_at(self):
|
||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
||||
self.assertEqual(jsi.call_function('x', 4), None)
|
||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
||||
|
||||
def test_bitwise_operators_overflow(self):
|
||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
||||
|
||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -413,6 +413,10 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
|
||||
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
|
||||
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
||||
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None)
|
||||
|
||||
@@ -94,6 +94,38 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
|
||||
'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
|
||||
'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
|
||||
'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
||||
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
|
||||
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
|
||||
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -101,6 +133,7 @@ _NSIG_TESTS = [
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||
|
||||
@@ -29,6 +29,7 @@ from .cookies import load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .extractor import gen_extractor_classes, get_info_extractor
|
||||
from .extractor.common import UnsupportedURLIE
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .minicurses import format_text
|
||||
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
|
||||
@@ -47,7 +48,7 @@ from .postprocessor import (
|
||||
get_postprocessor,
|
||||
)
|
||||
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
|
||||
from .update import detect_variant
|
||||
from .update import REPOSITORY, current_git_head, detect_variant
|
||||
from .utils import (
|
||||
DEFAULT_OUTTMPL,
|
||||
IDENTITY,
|
||||
@@ -89,6 +90,7 @@ from .utils import (
|
||||
args_to_str,
|
||||
bug_reports_message,
|
||||
date_from_str,
|
||||
deprecation_warning,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
encode_compat_str,
|
||||
@@ -115,6 +117,7 @@ from .utils import (
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
orderedSet_from_options,
|
||||
parse_filesize,
|
||||
preferredencoding,
|
||||
prepend_extension,
|
||||
@@ -236,7 +239,7 @@ class YoutubeDL:
|
||||
Default is 'only_download' for CLI, but False for API
|
||||
skip_playlist_after_errors: Number of allowed failures until the rest of
|
||||
the playlist is skipped
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
allowed_extractors: List of regexes to match against extractor names that are allowed
|
||||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
and don't overwrite any file if False
|
||||
@@ -272,7 +275,7 @@ class YoutubeDL:
|
||||
subtitleslangs: List of languages of the subtitles to download (can be regex).
|
||||
The list may contain "all" to refer to all the available
|
||||
subtitles. The language can be prefixed with a "-" to
|
||||
exclude it from the requested languages. Eg: ['all', '-live_chat']
|
||||
exclude it from the requested languages, e.g. ['all', '-live_chat']
|
||||
keepvideo: Keep the video file after post-processing
|
||||
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||
skip_download: Skip the actual download of the video file
|
||||
@@ -301,8 +304,9 @@ class YoutubeDL:
|
||||
should act on each input URL as opposed to for the entire queue
|
||||
cookiefile: File name or text stream from where cookies should be read and dumped to
|
||||
cookiesfrombrowser: A tuple containing the name of the browser, the profile
|
||||
name/pathfrom where cookies are loaded, and the name of the
|
||||
keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
|
||||
name/path from where cookies are loaded, the name of the keyring,
|
||||
and the container name, e.g. ('chrome', ) or
|
||||
('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
|
||||
legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
|
||||
support RFC 5746 secure renegotiation
|
||||
nocheckcertificate: Do not verify SSL certificates
|
||||
@@ -444,6 +448,7 @@ class YoutubeDL:
|
||||
* index: Section number (Optional)
|
||||
force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
|
||||
noprogress: Do not print the progress bar
|
||||
live_from_start: Whether to download livestreams videos from the start
|
||||
|
||||
The following parameters are not used by YoutubeDL itself, they are used by
|
||||
the downloader (see yt_dlp/downloader/common.py):
|
||||
@@ -470,11 +475,13 @@ class YoutubeDL:
|
||||
discontinuities such as ad breaks (default: False)
|
||||
extractor_args: A dictionary of arguments to be passed to the extractors.
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Eg: {'youtube': {'skip': ['dash', 'hls']}}
|
||||
E.g. {'youtube': {'skip': ['dash', 'hls']}}
|
||||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||||
|
||||
The following options are deprecated and may be removed in the future:
|
||||
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
- Use allowed_extractors = ['generic', 'default']
|
||||
playliststart: - Use playlist_items
|
||||
Playlist item to start at.
|
||||
playlistend: - Use playlist_items
|
||||
@@ -626,7 +633,7 @@ class YoutubeDL:
|
||||
for msg in self.params.get('_warnings', []):
|
||||
self.report_warning(msg)
|
||||
for msg in self.params.get('_deprecation_warnings', []):
|
||||
self.deprecation_warning(msg)
|
||||
self.deprecated_feature(msg)
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
if 'list-formats' in self.params['compat_opts']:
|
||||
@@ -756,13 +763,6 @@ class YoutubeDL:
|
||||
self._ies_instances[ie_key] = ie
|
||||
ie.set_downloader(self)
|
||||
|
||||
def _get_info_extractor_class(self, ie_key):
|
||||
ie = self._ies.get(ie_key)
|
||||
if ie is None:
|
||||
ie = get_info_extractor(ie_key)
|
||||
self.add_info_extractor(ie)
|
||||
return ie
|
||||
|
||||
def get_info_extractor(self, ie_key):
|
||||
"""
|
||||
Get an instance of an IE with name ie_key, it will try to get one from
|
||||
@@ -779,8 +779,19 @@ class YoutubeDL:
|
||||
"""
|
||||
Add the InfoExtractors returned by gen_extractors to the end of the list
|
||||
"""
|
||||
for ie in gen_extractor_classes():
|
||||
self.add_info_extractor(ie)
|
||||
all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
|
||||
all_ies['end'] = UnsupportedURLIE()
|
||||
try:
|
||||
ie_names = orderedSet_from_options(
|
||||
self.params.get('allowed_extractors', ['default']), {
|
||||
'all': list(all_ies),
|
||||
'default': [name for name, ie in all_ies.items() if ie._ENABLED],
|
||||
}, use_regex=True)
|
||||
except re.error as e:
|
||||
raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
|
||||
for name in ie_names:
|
||||
self.add_info_extractor(all_ies[name])
|
||||
self.write_debug(f'Loaded {len(ie_names)} extractors')
|
||||
|
||||
def add_post_processor(self, pp, when='post_process'):
|
||||
"""Add a PostProcessor object to the end of the chain."""
|
||||
@@ -826,9 +837,11 @@ class YoutubeDL:
|
||||
def to_stdout(self, message, skip_eol=False, quiet=None):
|
||||
"""Print message to stdout"""
|
||||
if quiet is not None:
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
|
||||
'Use "YoutubeDL.to_screen" instead')
|
||||
if skip_eol is not False:
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
|
||||
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
|
||||
'Use "YoutubeDL.to_screen" instead')
|
||||
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
|
||||
|
||||
def to_screen(self, message, skip_eol=False, quiet=None):
|
||||
@@ -964,11 +977,14 @@ class YoutubeDL:
|
||||
return
|
||||
self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
|
||||
|
||||
def deprecation_warning(self, message):
|
||||
def deprecation_warning(self, message, *, stacklevel=0):
|
||||
deprecation_warning(
|
||||
message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
|
||||
|
||||
def deprecated_feature(self, message):
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(f'DeprecationWarning: {message}')
|
||||
else:
|
||||
self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
|
||||
self.params['logger'].warning(f'Deprecated Feature: {message}')
|
||||
self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
|
||||
|
||||
def report_error(self, message, *args, **kwargs):
|
||||
'''
|
||||
@@ -1028,7 +1044,7 @@ class YoutubeDL:
|
||||
|
||||
def get_output_path(self, dir_type='', filename=None):
|
||||
paths = self.params.get('paths', {})
|
||||
assert isinstance(paths, dict)
|
||||
assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
|
||||
path = os.path.join(
|
||||
expand_path(paths.get('home', '').strip()),
|
||||
expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
|
||||
@@ -1046,7 +1062,7 @@ class YoutubeDL:
|
||||
|
||||
# outtmpl should be expand_path'ed before template dict substitution
|
||||
# because meta fields may contain env variables we don't want to
|
||||
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
|
||||
# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
|
||||
# title "Hello $PATH", we don't want `$PATH` to be expanded.
|
||||
return expand_path(outtmpl).replace(sep, '')
|
||||
|
||||
@@ -1411,11 +1427,11 @@ class YoutubeDL:
|
||||
ie_key = 'Generic'
|
||||
|
||||
if ie_key:
|
||||
ies = {ie_key: self._get_info_extractor_class(ie_key)}
|
||||
ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
|
||||
else:
|
||||
ies = self._ies
|
||||
|
||||
for ie_key, ie in ies.items():
|
||||
for key, ie in ies.items():
|
||||
if not ie.suitable(url):
|
||||
continue
|
||||
|
||||
@@ -1424,14 +1440,16 @@ class YoutubeDL:
|
||||
'and will probably not work.')
|
||||
|
||||
temp_id = ie.get_temp_id(url)
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
|
||||
self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
|
||||
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
|
||||
if self.params.get('break_on_existing', False):
|
||||
raise ExistingVideoReached()
|
||||
break
|
||||
return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
|
||||
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
|
||||
self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
|
||||
tb=False if extractors_restricted else None)
|
||||
|
||||
def _handle_extraction_exceptions(func):
|
||||
@functools.wraps(func)
|
||||
@@ -1977,8 +1995,8 @@ class YoutubeDL:
|
||||
filter_parts.append(string)
|
||||
|
||||
def _remove_unused_ops(tokens):
|
||||
# Remove operators that we don't use and join them with the surrounding strings
|
||||
# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
# Remove operators that we don't use and join them with the surrounding strings.
|
||||
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||
last_string, last_start, last_end, last_line = None, None, None, None
|
||||
for type, string, start, end, line in tokens:
|
||||
@@ -2510,9 +2528,6 @@ class YoutubeDL:
|
||||
'--live-from-start is passed, but there are no formats that can be downloaded from the start. '
|
||||
'If you want to download from the current time, use --no-live-from-start'))
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
if not url:
|
||||
@@ -2525,7 +2540,10 @@ class YoutubeDL:
|
||||
return True
|
||||
|
||||
# Filter out malformed formats for better extraction robustness
|
||||
formats = list(filter(is_wellformed, formats))
|
||||
formats = list(filter(is_wellformed, formats or []))
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
|
||||
formats_dict = {}
|
||||
|
||||
@@ -2727,42 +2745,26 @@ class YoutubeDL:
|
||||
if lang not in available_subs:
|
||||
available_subs[lang] = cap_info
|
||||
|
||||
if (not self.params.get('writesubtitles') and not
|
||||
self.params.get('writeautomaticsub') or not
|
||||
available_subs):
|
||||
if not available_subs or (
|
||||
not self.params.get('writesubtitles')
|
||||
and not self.params.get('writeautomaticsub')):
|
||||
return None
|
||||
|
||||
all_sub_langs = tuple(available_subs.keys())
|
||||
if self.params.get('allsubtitles', False):
|
||||
requested_langs = all_sub_langs
|
||||
elif self.params.get('subtitleslangs', False):
|
||||
# A list is used so that the order of languages will be the same as
|
||||
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
|
||||
requested_langs = []
|
||||
for lang_re in self.params.get('subtitleslangs'):
|
||||
discard = lang_re[0] == '-'
|
||||
if discard:
|
||||
lang_re = lang_re[1:]
|
||||
if lang_re == 'all':
|
||||
if discard:
|
||||
requested_langs = []
|
||||
else:
|
||||
requested_langs.extend(all_sub_langs)
|
||||
continue
|
||||
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
|
||||
if discard:
|
||||
for lang in current_langs:
|
||||
while lang in requested_langs:
|
||||
requested_langs.remove(lang)
|
||||
else:
|
||||
requested_langs.extend(current_langs)
|
||||
requested_langs = orderedSet(requested_langs)
|
||||
try:
|
||||
requested_langs = orderedSet_from_options(
|
||||
self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
|
||||
except re.error as e:
|
||||
raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
|
||||
elif normal_sub_langs:
|
||||
requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
|
||||
else:
|
||||
requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
|
||||
if requested_langs:
|
||||
self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
|
||||
self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
|
||||
|
||||
formats_query = self.params.get('subtitlesformat', 'best')
|
||||
formats_preference = formats_query.split('/') if formats_query else []
|
||||
@@ -3270,6 +3272,7 @@ class YoutubeDL:
|
||||
self.to_screen(f'[info] {e}')
|
||||
if not self.params.get('break_per_url'):
|
||||
raise
|
||||
self._num_downloads = 0
|
||||
else:
|
||||
if self.params.get('dump_single_json', False):
|
||||
self.post_extract(res)
|
||||
@@ -3318,6 +3321,12 @@ class YoutubeDL:
|
||||
return info_dict
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
info_dict.setdefault('_type', 'video')
|
||||
info_dict.setdefault('_version', {
|
||||
'version': __version__,
|
||||
'current_git_head': current_git_head(),
|
||||
'release_git_head': RELEASE_GIT_HEAD,
|
||||
'repository': REPOSITORY,
|
||||
})
|
||||
|
||||
if remove_private_keys:
|
||||
reject = lambda k, v: v is None or k.startswith('__') or k in {
|
||||
@@ -3443,7 +3452,7 @@ class YoutubeDL:
|
||||
return False
|
||||
|
||||
vid_ids = [self._make_archive_id(info_dict)]
|
||||
vid_ids.extend(info_dict.get('_old_archive_ids', []))
|
||||
vid_ids.extend(info_dict.get('_old_archive_ids') or [])
|
||||
return any(id_ in self.archive for id_ in vid_ids)
|
||||
|
||||
def record_download_archive(self, info_dict):
|
||||
@@ -3682,7 +3691,8 @@ class YoutubeDL:
|
||||
if VARIANT not in (None, 'pip'):
|
||||
source += '*'
|
||||
write_debug(join_nonempty(
|
||||
'yt-dlp version', __version__,
|
||||
f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
|
||||
__version__,
|
||||
f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
|
||||
'' if source == 'unknown' else f'({source})',
|
||||
delim=' '))
|
||||
@@ -3698,18 +3708,8 @@ class YoutubeDL:
|
||||
if self.params['compat_opts']:
|
||||
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
|
||||
|
||||
if source == 'source':
|
||||
try:
|
||||
stdout, _, _ = Popen.run(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if re.fullmatch('[0-9a-f]+', stdout.strip()):
|
||||
write_debug(f'Git HEAD: {stdout.strip()}')
|
||||
except Exception:
|
||||
with contextlib.suppress(Exception):
|
||||
sys.exc_clear()
|
||||
|
||||
if current_git_head():
|
||||
write_debug(f'Git HEAD: {current_git_head()}')
|
||||
write_debug(system_identifier())
|
||||
|
||||
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
|
||||
|
||||
@@ -63,6 +63,8 @@ from .utils import (
|
||||
)
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
||||
|
||||
def _exit(status=0, *args):
|
||||
for msg in args:
|
||||
@@ -344,10 +346,16 @@ def validate_options(opts):
|
||||
|
||||
# Cookies from browser
|
||||
if opts.cookiesfrombrowser:
|
||||
mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser)
|
||||
container = None
|
||||
mobj = re.fullmatch(r'''(?x)
|
||||
(?P<name>[^+:]+)
|
||||
(?:\s*\+\s*(?P<keyring>[^:]+))?
|
||||
(?:\s*:\s*(?P<profile>.+?))?
|
||||
(?:\s*::\s*(?P<container>.+))?
|
||||
''', opts.cookiesfrombrowser)
|
||||
if mobj is None:
|
||||
raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}')
|
||||
browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile')
|
||||
browser_name, keyring, profile, container = mobj.group('name', 'keyring', 'profile', 'container')
|
||||
browser_name = browser_name.lower()
|
||||
if browser_name not in SUPPORTED_BROWSERS:
|
||||
raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". '
|
||||
@@ -357,7 +365,7 @@ def validate_options(opts):
|
||||
if keyring not in SUPPORTED_KEYRINGS:
|
||||
raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". '
|
||||
f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
|
||||
opts.cookiesfrombrowser = (browser_name, profile, keyring)
|
||||
opts.cookiesfrombrowser = (browser_name, profile, keyring, container)
|
||||
|
||||
# MetadataParser
|
||||
def metadataparser_actions(f):
|
||||
@@ -766,6 +774,7 @@ def parse_options(argv=None):
|
||||
'windowsfilenames': opts.windowsfilenames,
|
||||
'ignoreerrors': opts.ignoreerrors,
|
||||
'force_generic_extractor': opts.force_generic_extractor,
|
||||
'allowed_extractors': opts.allowed_extractors or ['default'],
|
||||
'ratelimit': opts.ratelimit,
|
||||
'throttledratelimit': opts.throttledratelimit,
|
||||
'overwrites': opts.overwrites,
|
||||
|
||||
@@ -14,4 +14,5 @@ if __package__ is None and not hasattr(sys, 'frozen'):
|
||||
import yt_dlp
|
||||
|
||||
if __name__ == '__main__':
|
||||
yt_dlp._IN_CLI = True
|
||||
yt_dlp.main()
|
||||
|
||||
@@ -6,7 +6,8 @@ import re
|
||||
import shutil
|
||||
import traceback
|
||||
|
||||
from .utils import expand_path, write_json_file
|
||||
from .utils import expand_path, traverse_obj, version_tuple, write_json_file
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class Cache:
|
||||
@@ -45,12 +46,20 @@ class Cache:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
self._ydl.write_debug(f'Saving {section}.{key} to cache')
|
||||
write_json_file(data, fn)
|
||||
write_json_file({'yt-dlp_version': __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}')
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
def _validate(self, data, min_ver):
|
||||
version = traverse_obj(data, 'yt-dlp_version')
|
||||
if not version: # Backward compatibility
|
||||
data, version = {'data': data}, '2022.08.19'
|
||||
if not min_ver or version_tuple(version) >= version_tuple(min_ver):
|
||||
return data['data']
|
||||
self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})')
|
||||
|
||||
def load(self, section, key, dtype='json', default=None, *, min_ver=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
@@ -61,8 +70,8 @@ class Cache:
|
||||
try:
|
||||
with open(cache_fn, encoding='utf-8') as cachef:
|
||||
self._ydl.write_debug(f'Loading {section}.{key} from cache')
|
||||
return json.load(cachef)
|
||||
except ValueError:
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except (ValueError, KeyError):
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except OSError as oe:
|
||||
|
||||
@@ -3,6 +3,7 @@ import contextlib
|
||||
import http.cookiejar
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
@@ -24,7 +25,13 @@ from .dependencies import (
|
||||
sqlite3,
|
||||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path
|
||||
from .utils import (
|
||||
Popen,
|
||||
YoutubeDLCookieJar,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
try_call,
|
||||
)
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
@@ -85,8 +92,9 @@ def _create_progress_bar(logger):
|
||||
def load_cookies(cookie_file, browser_specification, ydl):
|
||||
cookie_jars = []
|
||||
if browser_specification is not None:
|
||||
browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
|
||||
browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
|
||||
cookie_jars.append(
|
||||
extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
|
||||
|
||||
if cookie_file is not None:
|
||||
is_filename = YoutubeDLCookieJar.is_path(cookie_file)
|
||||
@@ -101,9 +109,9 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
||||
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
|
||||
def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
|
||||
if browser_name == 'firefox':
|
||||
return _extract_firefox_cookies(profile, logger)
|
||||
return _extract_firefox_cookies(profile, container, logger)
|
||||
elif browser_name == 'safari':
|
||||
return _extract_safari_cookies(profile, logger)
|
||||
elif browser_name in CHROMIUM_BASED_BROWSERS:
|
||||
@@ -112,7 +120,7 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(),
|
||||
raise ValueError(f'unknown browser: {browser_name}')
|
||||
|
||||
|
||||
def _extract_firefox_cookies(profile, logger):
|
||||
def _extract_firefox_cookies(profile, container, logger):
|
||||
logger.info('Extracting cookies from firefox')
|
||||
if not sqlite3:
|
||||
logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
|
||||
@@ -131,11 +139,36 @@ def _extract_firefox_cookies(profile, logger):
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
||||
container_id = None
|
||||
if container not in (None, 'none'):
|
||||
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
|
||||
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
|
||||
raise FileNotFoundError(f'could not read containers.json in {search_root}')
|
||||
with open(containers_path) as containers:
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
|
||||
)), None)
|
||||
if not isinstance(container_id, int):
|
||||
raise ValueError(f'could not find firefox container "{container}" in containers.json')
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
|
||||
cursor = None
|
||||
try:
|
||||
cursor = _open_database_copy(cookie_database_path, tmpdir)
|
||||
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
|
||||
if isinstance(container_id, int):
|
||||
logger.debug(
|
||||
f'Only loading cookies from firefox container "{container}", ID {container_id}')
|
||||
cursor.execute(
|
||||
'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
|
||||
(f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
|
||||
elif container == 'none':
|
||||
logger.debug('Only loading cookies not belonging to any container')
|
||||
cursor.execute(
|
||||
'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
|
||||
else:
|
||||
cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
|
||||
jar = YoutubeDLCookieJar()
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
table = cursor.fetchall()
|
||||
@@ -948,11 +981,11 @@ def _is_path(value):
|
||||
return os.path.sep in value
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None):
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
if browser_name not in SUPPORTED_BROWSERS:
|
||||
raise ValueError(f'unsupported browser: "{browser_name}"')
|
||||
if keyring not in (None, *SUPPORTED_KEYRINGS):
|
||||
raise ValueError(f'unsupported keyring: "{keyring}"')
|
||||
if profile is not None and _is_path(profile):
|
||||
profile = os.path.expanduser(profile)
|
||||
return browser_name, profile, keyring
|
||||
return browser_name, profile, keyring, container
|
||||
|
||||
@@ -92,6 +92,7 @@ class FileDownloader:
|
||||
|
||||
for func in (
|
||||
'deprecation_warning',
|
||||
'deprecated_feature',
|
||||
'report_error',
|
||||
'report_file_already_downloaded',
|
||||
'report_warning',
|
||||
|
||||
@@ -515,16 +515,14 @@ _BY_NAME = {
|
||||
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
|
||||
}
|
||||
|
||||
_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
|
||||
|
||||
|
||||
def list_external_downloaders():
|
||||
return sorted(_BY_NAME.keys())
|
||||
|
||||
|
||||
def get_external_downloader(external_downloader):
|
||||
""" Given the name of the executable, see whether we support the given
|
||||
downloader . """
|
||||
# Drop .exe extension on Windows
|
||||
""" Given the name of the executable, see whether we support the given downloader """
|
||||
bn = os.path.splitext(os.path.basename(external_downloader))[0]
|
||||
return _BY_NAME.get(bn, _BY_EXE.get(bn))
|
||||
return _BY_NAME.get(bn) or next((
|
||||
klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
|
||||
), None)
|
||||
|
||||
@@ -184,7 +184,7 @@ def build_fragments_list(boot_info):
|
||||
first_frag_number = fragment_run_entry_table[0]['first']
|
||||
fragments_counter = itertools.count(first_frag_number)
|
||||
for segment, fragments_count in segment_run_table['segment_run']:
|
||||
# In some live HDS streams (for example Rai), `fragments_count` is
|
||||
# In some live HDS streams (e.g. Rai), `fragments_count` is
|
||||
# abnormal and causing out-of-memory errors. It's OK to change the
|
||||
# number of fragments for live streams as they are updated periodically
|
||||
if fragments_count == 4294967295 and boot_info['live']:
|
||||
|
||||
@@ -65,8 +65,8 @@ class FragmentFD(FileDownloader):
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, err, frag_index, count, retries):
|
||||
self.deprecation_warning(
|
||||
'yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. Use yt_dlp.downloader.FileDownloader.report_retry instead')
|
||||
self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. '
|
||||
'Use yt_dlp.downloader.FileDownloader.report_retry instead')
|
||||
return self.report_retry(err, count, retries, frag_index)
|
||||
|
||||
def report_skip_fragment(self, frag_index, err=None):
|
||||
|
||||
@@ -1,5 +1,28 @@
|
||||
# flake8: noqa: F401
|
||||
|
||||
from .youtube import ( # Youtube is moved to the top to improve performance
|
||||
YoutubeIE,
|
||||
YoutubeClipIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeNotificationsIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
|
||||
from .abc import (
|
||||
ABCIE,
|
||||
ABCIViewIE,
|
||||
@@ -470,6 +493,7 @@ from .epicon import (
|
||||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
@@ -491,6 +515,7 @@ from .espn import (
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .europeantour import EuropeanTourIE
|
||||
from .eurosport import EurosportIE
|
||||
from .euscreen import EUScreenIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
@@ -500,6 +525,7 @@ from .facebook import (
|
||||
FacebookIE,
|
||||
FacebookPluginsVideoIE,
|
||||
FacebookRedirectURLIE,
|
||||
FacebookReelIE,
|
||||
)
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
@@ -719,6 +745,10 @@ from .iqiyi import (
|
||||
IqIE,
|
||||
IqAlbumIE
|
||||
)
|
||||
from .islamchannel import (
|
||||
IslamChannelIE,
|
||||
IslamChannelSeriesIE,
|
||||
)
|
||||
from .itprotv import (
|
||||
ITProTVIE,
|
||||
ITProTVCourseIE
|
||||
@@ -956,6 +986,7 @@ from .mixcloud import (
|
||||
from .mlb import (
|
||||
MLBIE,
|
||||
MLBVideoIE,
|
||||
MLBTVIE,
|
||||
)
|
||||
from .mlssoccer import MLSSoccerIE
|
||||
from .mnet import MnetIE
|
||||
@@ -974,6 +1005,7 @@ from .motherless import (
|
||||
from .motorsport import MotorsportIE
|
||||
from .movieclips import MovieClipsIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
from .moview import MoviewPlayIE
|
||||
from .moviezine import MoviezineIE
|
||||
from .movingimage import MovingImageIE
|
||||
from .msn import MSNIE
|
||||
@@ -1076,6 +1108,7 @@ from .newgrounds import (
|
||||
NewgroundsPlaylistIE,
|
||||
NewgroundsUserIE,
|
||||
)
|
||||
from .newspicks import NewsPicksIE
|
||||
from .newstube import NewstubeIE
|
||||
from .newsy import NewsyIE
|
||||
from .nextmedia import (
|
||||
@@ -1236,6 +1269,7 @@ from .paramountplus import (
|
||||
ParamountPlusIE,
|
||||
ParamountPlusSeriesIE,
|
||||
)
|
||||
from .parler import ParlerIE
|
||||
from .parlview import ParlviewIE
|
||||
from .patreon import (
|
||||
PatreonIE,
|
||||
@@ -1724,6 +1758,12 @@ from .telequebec import (
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tempo import TempoIE
|
||||
from .tencent import (
|
||||
VQQSeriesIE,
|
||||
VQQVideoIE,
|
||||
WeTvEpisodeIE,
|
||||
WeTvSeriesIE,
|
||||
)
|
||||
from .tennistv import TennisTVIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
@@ -1783,6 +1823,10 @@ from .toongoggles import ToonGogglesIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .triller import (
|
||||
TrillerIE,
|
||||
TrillerUserIE,
|
||||
)
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
TrovoIE,
|
||||
@@ -1792,6 +1836,7 @@ from .trovo import (
|
||||
)
|
||||
from .trueid import TrueIDIE
|
||||
from .trunews import TruNewsIE
|
||||
from .truth import TruthIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE
|
||||
@@ -2087,7 +2132,6 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wetv import WeTvEpisodeIE, WeTvSeriesIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimtv import WimTVIE
|
||||
@@ -2170,42 +2214,44 @@ from .younow import (
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeClipIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeNotificationsIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeLivestreamEmbedIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
BBVTVLiveIE,
|
||||
BBVTVRecordingsIE,
|
||||
EinsUndEinsTVIE,
|
||||
EinsUndEinsTVLiveIE,
|
||||
EinsUndEinsTVRecordingsIE,
|
||||
EWETVIE,
|
||||
EWETVLiveIE,
|
||||
EWETVRecordingsIE,
|
||||
GlattvisionTVIE,
|
||||
GlattvisionTVLiveIE,
|
||||
GlattvisionTVRecordingsIE,
|
||||
MNetTVIE,
|
||||
NetPlusIE,
|
||||
MNetTVLiveIE,
|
||||
MNetTVRecordingsIE,
|
||||
NetPlusTVIE,
|
||||
NetPlusTVLiveIE,
|
||||
NetPlusTVRecordingsIE,
|
||||
OsnatelTVIE,
|
||||
OsnatelTVLiveIE,
|
||||
OsnatelTVRecordingsIE,
|
||||
QuantumTVIE,
|
||||
QuantumTVLiveIE,
|
||||
QuantumTVRecordingsIE,
|
||||
SaltTVIE,
|
||||
SaltTVLiveIE,
|
||||
SaltTVRecordingsIE,
|
||||
SAKTVIE,
|
||||
SAKTVLiveIE,
|
||||
SAKTVRecordingsIE,
|
||||
VTXTVIE,
|
||||
VTXTVLiveIE,
|
||||
VTXTVRecordingsIE,
|
||||
WalyTVIE,
|
||||
WalyTVLiveIE,
|
||||
WalyTVRecordingsIE,
|
||||
ZattooIE,
|
||||
ZattooLiveIE,
|
||||
ZattooMoviesIE,
|
||||
|
||||
@@ -365,7 +365,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
# read breadcrumb on top of page
|
||||
breadcrumb = self._extract_breadcrumb_list(webpage, video_id)
|
||||
if breadcrumb:
|
||||
# breadcrumb list translates to: (example is 1st test for this IE)
|
||||
# breadcrumb list translates to: (e.g. 1st test for this IE)
|
||||
# Home > Anime (genre) > Isekai Shokudo 2 (series name) > Episode 1 "Cheese cakes" "Morning again" (episode title)
|
||||
# hence this works
|
||||
info['series'] = breadcrumb[-2]
|
||||
|
||||
@@ -28,14 +28,17 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
query = {'mbr': 'true'}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
|
||||
}
|
||||
if auth:
|
||||
query['auth'] = auth
|
||||
TP_SMIL_QUERY = [{
|
||||
'assetTypes': 'high_video_ak',
|
||||
'switch': 'hls_high_ak'
|
||||
'switch': 'hls_high_ak',
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3'
|
||||
'assetTypes': 'high_video_s3',
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3',
|
||||
'switch': 'hls_high_fastly',
|
||||
|
||||
@@ -95,24 +95,24 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
|
||||
# all obtained by exhaustive testing
|
||||
_COUNTRIES_MAP = {
|
||||
'DE_FR': {
|
||||
'DE_FR': (
|
||||
'BL', 'DE', 'FR', 'GF', 'GP', 'MF', 'MQ', 'NC',
|
||||
'PF', 'PM', 'RE', 'WF', 'YT',
|
||||
},
|
||||
),
|
||||
# with both of the below 'BE' sometimes works, sometimes doesn't
|
||||
'EUR_DE_FR': {
|
||||
'EUR_DE_FR': (
|
||||
'AT', 'BL', 'CH', 'DE', 'FR', 'GF', 'GP', 'LI',
|
||||
'MC', 'MF', 'MQ', 'NC', 'PF', 'PM', 'RE', 'WF',
|
||||
'YT',
|
||||
},
|
||||
'SAT': {
|
||||
),
|
||||
'SAT': (
|
||||
'AD', 'AT', 'AX', 'BG', 'BL', 'CH', 'CY', 'CZ',
|
||||
'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF',
|
||||
'GR', 'HR', 'HU', 'IE', 'IS', 'IT', 'KN', 'LI',
|
||||
'LT', 'LU', 'LV', 'MC', 'MF', 'MQ', 'MT', 'NC',
|
||||
'NL', 'NO', 'PF', 'PL', 'PM', 'PT', 'RE', 'RO',
|
||||
'SE', 'SI', 'SK', 'SM', 'VA', 'WF', 'YT',
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -218,6 +218,9 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
durl = traverse_obj(video_info, ('dash', 'video'))
|
||||
audios = traverse_obj(video_info, ('dash', 'audio')) or []
|
||||
flac_audio = traverse_obj(video_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
entries = []
|
||||
|
||||
RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
|
||||
@@ -620,14 +623,15 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
||||
'keyword': query,
|
||||
'page': page_num,
|
||||
'context': '',
|
||||
'order': 'pubdate',
|
||||
'duration': 0,
|
||||
'tids_2': '',
|
||||
'__refresh__': 'true',
|
||||
'search_type': 'video',
|
||||
'tids': 0,
|
||||
'highlight': 1,
|
||||
})['data'].get('result') or []
|
||||
})['data'].get('result')
|
||||
if not videos:
|
||||
break
|
||||
for video in videos:
|
||||
yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
|
||||
|
||||
|
||||
@@ -65,10 +65,12 @@ class BitChuteIE(InfoExtractor):
|
||||
error = self._html_search_regex(r'<h1 class="page-title">([^<]+)</h1>', webpage, 'error', default='Cannot find video')
|
||||
if error == 'Video Unavailable':
|
||||
raise GeoRestrictedError(error)
|
||||
raise ExtractorError(error)
|
||||
raise ExtractorError(error, expected=True)
|
||||
formats = entries[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
if not formats:
|
||||
raise self.raise_no_formats('Video is unavailable', expected=True, video_id=video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
|
||||
@@ -331,7 +331,7 @@ class InfoExtractor:
|
||||
playable_in_embed: Whether this video is allowed to play in embedded
|
||||
players on other sites. Can be True (=always allowed),
|
||||
False (=never allowed), None (=unknown), or a string
|
||||
specifying the criteria for embedability (Eg: 'whitelist')
|
||||
specifying the criteria for embedability; e.g. 'whitelist'
|
||||
availability: Under what condition the video is available. One of
|
||||
'private', 'premium_only', 'subscriber_only', 'needs_auth',
|
||||
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
||||
@@ -452,8 +452,8 @@ class InfoExtractor:
|
||||
|
||||
_extract_from_webpage may raise self.StopExtraction() to stop further
|
||||
processing of the webpage and obtain exclusive rights to it. This is useful
|
||||
when the extractor cannot reliably be matched using just the URL.
|
||||
Eg: invidious/peertube instances
|
||||
when the extractor cannot reliably be matched using just the URL,
|
||||
e.g. invidious/peertube instances
|
||||
|
||||
Embed-only extractors can be defined by setting _VALID_URL = False.
|
||||
|
||||
@@ -480,6 +480,9 @@ class InfoExtractor:
|
||||
will be used by geo restriction bypass mechanism similarly
|
||||
to _GEO_COUNTRIES.
|
||||
|
||||
The _ENABLED attribute should be set to False for IEs that
|
||||
are disabled by default and must be explicitly enabled.
|
||||
|
||||
The _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
"""
|
||||
@@ -491,6 +494,7 @@ class InfoExtractor:
|
||||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
_ENABLED = True
|
||||
_NETRC_MACHINE = None
|
||||
IE_DESC = None
|
||||
SEARCH_KEY = None
|
||||
@@ -1669,8 +1673,8 @@ class InfoExtractor:
|
||||
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
|
||||
|
||||
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
|
||||
'res', 'fps', 'hdr:12', 'channels', 'codec:vp9.2', 'size', 'br', 'asr',
|
||||
'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
|
||||
'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
|
||||
'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
|
||||
ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
|
||||
'height', 'width', 'proto', 'vext', 'abr', 'aext',
|
||||
'fps', 'fs_approx', 'source', 'id')
|
||||
@@ -1689,7 +1693,7 @@ class InfoExtractor:
|
||||
'order_free': ('webm', 'mp4', 'flv', '', 'none')},
|
||||
'aext': {'type': 'ordered', 'field': 'audio_ext',
|
||||
'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'),
|
||||
'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')},
|
||||
'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')},
|
||||
'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
|
||||
'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
|
||||
'field': ('vcodec', 'acodec'),
|
||||
@@ -1762,9 +1766,8 @@ class InfoExtractor:
|
||||
if field not in self.settings:
|
||||
if key in ('forced', 'priority'):
|
||||
return False
|
||||
self.ydl.deprecation_warning(
|
||||
f'Using arbitrary fields ({field}) for format sorting is deprecated '
|
||||
'and may be removed in a future version')
|
||||
self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
|
||||
'deprecated and may be removed in a future version')
|
||||
self.settings[field] = {}
|
||||
propObj = self.settings[field]
|
||||
if key not in propObj:
|
||||
@@ -1849,9 +1852,8 @@ class InfoExtractor:
|
||||
if self._get_field_setting(field, 'type') == 'alias':
|
||||
alias, field = field, self._get_field_setting(field, 'field')
|
||||
if self._get_field_setting(alias, 'deprecated'):
|
||||
self.ydl.deprecation_warning(
|
||||
f'Format sorting alias {alias} is deprecated '
|
||||
f'and may be removed in a future version. Please use {field} instead')
|
||||
self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
|
||||
'be removed in a future version. Please use {field} instead')
|
||||
reverse = match.group('reverse') is not None
|
||||
closest = match.group('separator') == '~'
|
||||
limit_text = match.group('limit')
|
||||
@@ -2367,7 +2369,7 @@ class InfoExtractor:
|
||||
audio_group_id = last_stream_inf.get('AUDIO')
|
||||
# As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
|
||||
# references a rendition group MUST have a CODECS attribute.
|
||||
# However, this is not always respected, for example, [2]
|
||||
# However, this is not always respected. E.g. [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing an audio group it represents a complete
|
||||
@@ -3003,8 +3005,8 @@ class InfoExtractor:
|
||||
segment_number += 1
|
||||
segment_time += segment_d
|
||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
||||
# No media template
|
||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# No media template,
|
||||
# e.g. https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||
# or any YouTube dashsegments video
|
||||
fragments = []
|
||||
segment_index = 0
|
||||
@@ -3021,7 +3023,7 @@ class InfoExtractor:
|
||||
representation_ms_info['fragments'] = fragments
|
||||
elif 'segment_urls' in representation_ms_info:
|
||||
# Segment URLs with no SegmentTimeline
|
||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||
# E.g. https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||
fragments = []
|
||||
segment_duration = float_or_none(
|
||||
@@ -3249,8 +3251,8 @@ class InfoExtractor:
|
||||
media_tags.extend(re.findall(
|
||||
# We only allow video|audio followed by a whitespace or '>'.
|
||||
# Allowing more characters may end up in significant slow down (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
|
||||
# http://www.porntrex.com/maps/videositemap.xml).
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11979,
|
||||
# e.g. http://www.porntrex.com/maps/videositemap.xml).
|
||||
r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
|
||||
for media_tag, _, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
@@ -3258,7 +3260,7 @@ class InfoExtractor:
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = strip_or_none(media_attributes.get('src'))
|
||||
src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||
if src:
|
||||
f = parse_content_type(media_attributes.get('type'))
|
||||
_, formats = _media_formats(src, media_type, f)
|
||||
@@ -3269,7 +3271,7 @@ class InfoExtractor:
|
||||
s_attr = extract_attributes(source_tag)
|
||||
# data-video-src and data-src are non standard but seen
|
||||
# several times in the wild
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(s_attr.get('type'))
|
||||
@@ -3706,7 +3708,7 @@ class InfoExtractor:
|
||||
desc += f'; "{cls.SEARCH_KEY}:" prefix'
|
||||
if search_examples:
|
||||
_COUNTS = ('', '5', '10', 'all')
|
||||
desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
|
||||
desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
|
||||
if not cls.working():
|
||||
desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
|
||||
|
||||
@@ -3872,7 +3874,7 @@ class InfoExtractor:
|
||||
def _extract_from_webpage(cls, url, webpage):
|
||||
for embed_url in orderedSet(
|
||||
cls._extract_embed_urls(url, webpage) or [], lazy=True):
|
||||
yield cls.url_result(embed_url, cls)
|
||||
yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
@@ -3941,3 +3943,12 @@ class SearchInfoExtractor(InfoExtractor):
|
||||
@classproperty
|
||||
def SEARCH_KEY(cls):
|
||||
return cls._SEARCH_KEY
|
||||
|
||||
|
||||
class UnsupportedURLIE(InfoExtractor):
|
||||
_VALID_URL = '.*'
|
||||
_ENABLED = False
|
||||
IE_DESC = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
@@ -114,7 +114,14 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?!series/|watch/)(?:[^/]+/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:(?P<prefix>www|m)\.)?(?P<url>
|
||||
crunchyroll\.(?:com|fr)/(?:
|
||||
media(?:-|/\?id=)|
|
||||
(?!series/|watch/)(?:[^/]+/){1,2}[^/?&#]*?
|
||||
)(?P<id>[0-9]+)
|
||||
)(?:[/?&#]|$)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
'info_dict': {
|
||||
@@ -713,15 +720,20 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
|
||||
def _get_params(self, lang):
|
||||
if not CrunchyrollBetaBaseIE.params:
|
||||
if self._get_cookies(f'https://beta.crunchyroll.com/{lang}').get('etp_rt'):
|
||||
grant_type, key = 'etp_rt_cookie', 'accountAuthClientId'
|
||||
else:
|
||||
grant_type, key = 'client_id', 'anonClientId'
|
||||
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(
|
||||
f'https://beta.crunchyroll.com/{lang}', None, note='Retrieving main page'), None)
|
||||
api_domain = app_config['cxApiParams']['apiDomain']
|
||||
basic_token = str(base64.b64encode(('%s:' % app_config['cxApiParams']['accountAuthClientId']).encode('ascii')), 'ascii')
|
||||
|
||||
auth_response = self._download_json(
|
||||
f'{api_domain}/auth/v1/token', None, note='Authenticating with cookie',
|
||||
f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={
|
||||
'Authorization': 'Basic ' + basic_token
|
||||
}, data='grant_type=etp_rt_cookie'.encode('ascii'))
|
||||
'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii')
|
||||
}, data=f'grant_type={grant_type}'.encode('ascii'))
|
||||
policy_response = self._download_json(
|
||||
f'{api_domain}/index/v2', None, note='Retrieving signed policy',
|
||||
headers={
|
||||
@@ -740,25 +752,14 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
|
||||
CrunchyrollBetaBaseIE.params = (api_domain, bucket, params)
|
||||
return CrunchyrollBetaBaseIE.params
|
||||
|
||||
def _redirect_from_beta(self, url, lang, internal_id, display_id, is_episode, iekey):
|
||||
initial_state, app_config = self._get_beta_embedded_json(self._download_webpage(url, display_id), display_id)
|
||||
content_data = initial_state['content']['byId'][internal_id]
|
||||
if is_episode:
|
||||
video_id = content_data['external_id'].split('.')[1]
|
||||
series_id = content_data['episode_metadata']['series_slug_title']
|
||||
else:
|
||||
series_id = content_data['slug_title']
|
||||
series_id = re.sub(r'-{2,}', '-', series_id)
|
||||
url = f'https://www.crunchyroll.com/{lang}{series_id}'
|
||||
if is_episode:
|
||||
url = url + f'/{display_id}-{video_id}'
|
||||
self.to_screen(f'{display_id}: Not logged in. Redirecting to non-beta site - {url}')
|
||||
return self.url_result(url, iekey, display_id)
|
||||
|
||||
|
||||
class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:beta'
|
||||
_VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{2}(?:-\w{2})?/)?)watch/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://beta\.crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<id>\w+)
|
||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
|
||||
'info_dict': {
|
||||
@@ -780,7 +781,7 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/',
|
||||
'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
@@ -789,10 +790,6 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
return self._redirect_from_beta(url, lang, internal_id, display_id, True, CrunchyrollIE.ie_key())
|
||||
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
episode_response = self._download_json(
|
||||
@@ -867,7 +864,11 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
|
||||
|
||||
class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist:beta'
|
||||
_VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{2}(?:-\w{2})?/)?)series/(?P<id>\w+)/(?P<display_id>[\w\-]*)/?(?:\?|$)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://beta\.crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
series/(?P<id>\w+)
|
||||
(?:/(?P<display_id>[\w-]+))?/?(?:[?#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'info_dict': {
|
||||
@@ -876,16 +877,12 @@ class CrunchyrollBetaShowIE(CrunchyrollBetaBaseIE):
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA',
|
||||
'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id')
|
||||
|
||||
if not self._get_cookies(url).get('etp_rt'):
|
||||
return self._redirect_from_beta(url, lang, internal_id, display_id, False, CrunchyrollShowPlaylistIE.ie_key())
|
||||
|
||||
api_domain, bucket, params = self._get_params(lang)
|
||||
|
||||
series_response = self._download_json(
|
||||
|
||||
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class DoodStreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm)/[ed]/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm|wf)/[ed]/(?P<id>[a-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://dood.to/e/5s1wmbdacezb',
|
||||
'md5': '4568b83b31e13242b3f1ff96c55f0595',
|
||||
|
||||
46
yt_dlp/extractor/epoch.py
Normal file
46
yt_dlp/extractor/epoch.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class EpochIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.theepochtimes\.com/[\w-]+_(?P<id>\d+).html'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/they-can-do-audio-video-physical-surveillance-on-you-24h-365d-a-year-rex-lee-on-intrusive-apps_4661688.html',
|
||||
'info_dict': {
|
||||
'id': 'a3dd732c-4750-4bc8-8156-69180668bda1',
|
||||
'ext': 'mp4',
|
||||
'title': '‘They Can Do Audio, Video, Physical Surveillance on You 24H/365D a Year’: Rex Lee on Intrusive Apps',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/the-communist-partys-cyberattacks-on-america-explained-rex-lee-talks-tech-hybrid-warfare_4342413.html',
|
||||
'info_dict': {
|
||||
'id': '276c7f46-3bbf-475d-9934-b9bbe827cf0a',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Communist Party’s Cyberattacks on America Explained; Rex Lee Talks Tech Hybrid Warfare',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.theepochtimes.com/kash-patel-a-6-year-saga-of-government-corruption-from-russiagate-to-mar-a-lago_4690250.html',
|
||||
'info_dict': {
|
||||
'id': 'aa9ceecd-a127-453d-a2de-7153d6fd69b6',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youmaker_video_id = self._search_regex(r'data-trailer="[\w-]+" data-id="([\w-]+)"', webpage, 'url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': youmaker_video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': self._html_extract_title(webpage)
|
||||
}
|
||||
99
yt_dlp/extractor/eurosport.py
Normal file
99
yt_dlp/extractor/eurosport.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
|
||||
|
||||
class EurosportIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?P<id>vid\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '2480939',
|
||||
'ext': 'mp4',
|
||||
'title': 'Highlights: Rafael Nadal brushes aside Caper Ruud to win record-extending 14th French Open title',
|
||||
'description': 'md5:b564db73ecfe4b14ebbd8e62a3692c76',
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388285-69245968-2560-1440.png',
|
||||
'duration': 195.0,
|
||||
'display_id': 'vid1694147',
|
||||
'timestamp': 1654446698,
|
||||
'upload_date': '20220605',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '2481254',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:149dcc5dfb38ab7352acc008cc9fb071',
|
||||
'duration': 130.0,
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388422-69248708-2560-1440.png',
|
||||
'description': 'md5:a0c8a7f6b285e48ae8ddbe7aa85cfee6',
|
||||
'display_id': 'vid1694283',
|
||||
'timestamp': 1654456090,
|
||||
'upload_date': '20220605',
|
||||
}
|
||||
}, {
|
||||
# geo-fence but can bypassed by xff
|
||||
'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml',
|
||||
'info_dict': {
|
||||
'id': '2582552',
|
||||
'ext': 'mp4',
|
||||
'title': '‘Incredible ride!’ - Marlen Reusser storms to Stage 4 win at Tour de France Femmes',
|
||||
'duration': 188.0,
|
||||
'display_id': 'vid1722221',
|
||||
'timestamp': 1658936167,
|
||||
'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg',
|
||||
'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
|
||||
'upload_date': '20220727',
|
||||
}
|
||||
}]
|
||||
|
||||
_TOKEN = None
|
||||
|
||||
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
|
||||
# but this method require to get sha256 hash
|
||||
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
|
||||
|
||||
def _real_initialize(self):
|
||||
if EurosportIE._TOKEN is None:
|
||||
EurosportIE._TOKEN = self._download_json(
|
||||
'https://eu3-prod-direct.eurosport.com/token?realm=eurosport', None,
|
||||
'Trying to get token')['data']['attributes']['token']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
json_data = self._download_json(
|
||||
f'https://eu3-prod-direct.eurosport.com/playback/v2/videoPlaybackInfo/sourceSystemId/eurosport-{display_id}',
|
||||
display_id, query={'usePreAuth': True}, headers={'Authorization': f'Bearer {EurosportIE._TOKEN}'})['data']
|
||||
|
||||
json_ld_data = self._search_json_ld(webpage, display_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for stream_type in json_data['attributes']['streaming']:
|
||||
if stream_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4')
|
||||
elif stream_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
elif stream_type == 'mss':
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': json_data['id'],
|
||||
'title': json_ld_data.get('title') or self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': json_ld_data.get('thumbnails'),
|
||||
'description': (json_ld_data.get('description')
|
||||
or self._html_search_meta(['og:description', 'description'], webpage)),
|
||||
'duration': json_ld_data.get('duration'),
|
||||
'timestamp': json_ld_data.get('timestamp'),
|
||||
}
|
||||
@@ -772,3 +772,30 @@ class FacebookRedirectURLIE(InfoExtractor):
|
||||
if not redirect_url:
|
||||
raise ExtractorError('Invalid facebook redirect URL', expected=True)
|
||||
return self.url_result(redirect_url)
|
||||
|
||||
|
||||
class FacebookReelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)'
|
||||
IE_NAME = 'facebook:reel'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
|
||||
'description': 'md5:24ea7ef062215d295bdde64e778f5474',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '1738535909799870',
|
||||
'duration': 9.536,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20211121',
|
||||
'timestamp': 1637502604,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
|
||||
|
||||
@@ -3,7 +3,6 @@ import re
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from . import gen_extractor_classes
|
||||
from .common import InfoExtractor # isort: split
|
||||
from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -26,6 +25,7 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
@@ -2805,7 +2805,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = []
|
||||
for ie in gen_extractor_classes():
|
||||
for ie in self._downloader._ies.values():
|
||||
gen = ie.extract_from_webpage(self._downloader, url, webpage)
|
||||
current_embeds = []
|
||||
try:
|
||||
@@ -2840,8 +2840,9 @@ class GenericIE(InfoExtractor):
|
||||
try:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
self.report_detected('JW Player data')
|
||||
return merge_dicts(info, info_dict)
|
||||
if traverse_obj(info, 'formats', ('entries', ..., 'formats')):
|
||||
self.report_detected('JW Player data')
|
||||
return merge_dicts(info, info_dict)
|
||||
except ExtractorError:
|
||||
# See https://github.com/ytdl-org/youtube-dl/pull/16735
|
||||
pass
|
||||
@@ -3035,7 +3036,7 @@ class GenericIE(InfoExtractor):
|
||||
self.report_detected('Twitter card')
|
||||
if not found:
|
||||
# We look for Open Graph info:
|
||||
# We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
|
||||
# We have to match any number spaces between elements, some sites try to align them, e.g.: statigr.am
|
||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||
if m_video_type is not None:
|
||||
|
||||
@@ -6,7 +6,6 @@ from ..compat import compat_urlparse, compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
@@ -55,11 +54,7 @@ class HuyaLiveIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id=video_id)
|
||||
json_stream = self._search_regex(r'"stream":\s+"([a-zA-Z0-9+=/]+)"', webpage, 'stream', default=None)
|
||||
if not json_stream:
|
||||
raise ExtractorError('Video is offline', expected=True)
|
||||
stream_data = self._parse_json(compat_b64decode(json_stream).decode(), video_id=video_id,
|
||||
transform_source=js_to_json)
|
||||
stream_data = self._search_json(r'stream:\s+', webpage, 'stream', video_id=video_id, default=None)
|
||||
room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
|
||||
if not room_info:
|
||||
raise ExtractorError('Can not extract the room info', expected=True)
|
||||
@@ -67,6 +62,8 @@ class HuyaLiveIE(InfoExtractor):
|
||||
screen_type = room_info.get('screenType')
|
||||
live_source_type = room_info.get('liveSourceType')
|
||||
stream_info_list = stream_data['data'][0]['gameStreamInfoList']
|
||||
if not stream_info_list:
|
||||
raise ExtractorError('Video is offline', expected=True)
|
||||
formats = []
|
||||
for stream_info in stream_info_list:
|
||||
stream_url = stream_info.get('sFlvUrl')
|
||||
|
||||
@@ -39,37 +39,42 @@ class InstagramBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'instagram'
|
||||
_IS_LOGGED_IN = False
|
||||
|
||||
_API_BASE_URL = 'https://i.instagram.com/api/v1'
|
||||
_LOGIN_URL = 'https://www.instagram.com/accounts/login'
|
||||
_API_HEADERS = {
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-ASBD-ID': '198387',
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'Origin': 'https://www.instagram.com',
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._IS_LOGGED_IN:
|
||||
return
|
||||
|
||||
login_webpage = self._download_webpage(
|
||||
'https://www.instagram.com/accounts/login/', None,
|
||||
note='Downloading login webpage', errnote='Failed to download login webpage')
|
||||
self._LOGIN_URL, None, note='Downloading login webpage', errnote='Failed to download login webpage')
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\._sharedData\s*=\s*({.+?});',
|
||||
login_webpage, 'shared data', default='{}'),
|
||||
None)
|
||||
shared_data = self._parse_json(self._search_regex(
|
||||
r'window\._sharedData\s*=\s*({.+?});', login_webpage, 'shared data', default='{}'), None)
|
||||
|
||||
login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={
|
||||
'Accept': '*/*',
|
||||
'X-IG-App-ID': '936619743392459',
|
||||
'X-ASBD-ID': '198387',
|
||||
'X-IG-WWW-Claim': '0',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRFToken': shared_data['config']['csrf_token'],
|
||||
'X-Instagram-AJAX': shared_data['rollout_hash'],
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}, data=urlencode_postdata({
|
||||
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
|
||||
'username': username,
|
||||
'queryParams': '{}',
|
||||
'optIntoOneTap': 'false',
|
||||
'stopDeletionNonce': '',
|
||||
'trustedDeviceRecords': '{}',
|
||||
}))
|
||||
login = self._download_json(
|
||||
f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
|
||||
**self._API_HEADERS,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-CSRFToken': shared_data['config']['csrf_token'],
|
||||
'X-Instagram-AJAX': shared_data['rollout_hash'],
|
||||
'Referer': 'https://www.instagram.com/',
|
||||
}, data=urlencode_postdata({
|
||||
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
|
||||
'username': username,
|
||||
'queryParams': '{}',
|
||||
'optIntoOneTap': 'false',
|
||||
'stopDeletionNonce': '',
|
||||
'trustedDeviceRecords': '{}',
|
||||
}))
|
||||
|
||||
if not login.get('authenticated'):
|
||||
if login.get('message'):
|
||||
@@ -134,7 +139,7 @@ class InstagramBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _extract_product_media(self, product_media):
|
||||
media_id = product_media.get('code') or product_media.get('id')
|
||||
media_id = product_media.get('code') or _pk_to_id(product_media.get('pk'))
|
||||
vcodec = product_media.get('video_codec')
|
||||
dash_manifest_raw = product_media.get('video_dash_manifest')
|
||||
videos_list = product_media.get('video_versions')
|
||||
@@ -179,7 +184,7 @@ class InstagramBaseIE(InfoExtractor):
|
||||
|
||||
user_info = product_info.get('user') or {}
|
||||
info_dict = {
|
||||
'id': product_info.get('code') or product_info.get('id'),
|
||||
'id': product_info.get('code') or _pk_to_id(product_info.get('pk')),
|
||||
'title': product_info.get('title') or f'Video by {user_info.get("username")}',
|
||||
'description': traverse_obj(product_info, ('caption', 'text'), expected_type=str_or_none),
|
||||
'timestamp': int_or_none(product_info.get('taken_at')),
|
||||
@@ -360,49 +365,74 @@ class InstagramIE(InstagramBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, url = self._match_valid_url(url).group('id', 'url')
|
||||
general_info = self._download_json(
|
||||
f'https://www.instagram.com/graphql/query/?query_hash=9f8827793ef34641b2fb195d4d41151c'
|
||||
f'&variables=%7B"shortcode":"{video_id}",'
|
||||
'"parent_comment_count":10,"has_threaded_comments":true}', video_id, fatal=False, errnote=False,
|
||||
headers={
|
||||
'Accept': '*',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
|
||||
'Authority': 'www.instagram.com',
|
||||
'Referer': 'https://www.instagram.com',
|
||||
'x-ig-app-id': '936619743392459',
|
||||
})
|
||||
media = traverse_obj(general_info, ('data', 'shortcode_media')) or {}
|
||||
media, webpage = {}, ''
|
||||
|
||||
api_check = self._download_json(
|
||||
f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
|
||||
video_id, headers=self._API_HEADERS, fatal=False, note='Setting up session', errnote=False) or {}
|
||||
csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
|
||||
|
||||
if not csrf_token:
|
||||
self.report_warning('No csrf token set by Instagram API', video_id)
|
||||
elif api_check.get('status') != 'ok':
|
||||
self.report_warning('Instagram API is not granting access', video_id)
|
||||
else:
|
||||
if self._get_cookies(url).get('sessionid'):
|
||||
media.update(traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
|
||||
fatal=False, note='Downloading video info', headers={
|
||||
**self._API_HEADERS,
|
||||
'X-CSRFToken': csrf_token.value,
|
||||
}), ('items', 0)) or {})
|
||||
if media:
|
||||
return self._extract_product(media)
|
||||
|
||||
variables = {
|
||||
'shortcode': video_id,
|
||||
'child_comment_count': 3,
|
||||
'fetch_comment_count': 40,
|
||||
'parent_comment_count': 24,
|
||||
'has_threaded_comments': True,
|
||||
}
|
||||
general_info = self._download_json(
|
||||
'https://www.instagram.com/graphql/query/', video_id, fatal=False,
|
||||
headers={
|
||||
**self._API_HEADERS,
|
||||
'X-CSRFToken': csrf_token.value,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
||||
'variables': json.dumps(variables, separators=(',', ':')),
|
||||
})
|
||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
||||
|
||||
if not media:
|
||||
self.report_warning('General metadata extraction failed', video_id)
|
||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
shared_data = self._search_json(
|
||||
r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
|
||||
|
||||
info = self._download_json(
|
||||
f'https://i.instagram.com/api/v1/media/{_id_to_pk(video_id)}/info/', video_id,
|
||||
fatal=False, note='Downloading video info', errnote=False, headers={
|
||||
'Accept': '*',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
|
||||
'Authority': 'www.instagram.com',
|
||||
'Referer': 'https://www.instagram.com',
|
||||
'x-ig-app-id': '936619743392459',
|
||||
})
|
||||
if info:
|
||||
media.update(info['items'][0])
|
||||
return self._extract_product(media)
|
||||
if shared_data and self._LOGIN_URL not in urlh.geturl():
|
||||
media.update(traverse_obj(
|
||||
shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
|
||||
('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
|
||||
else:
|
||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage')
|
||||
webpage = self._download_webpage(
|
||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
|
||||
additional_data = self._search_json(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*', webpage, 'additional data', video_id, fatal=False)
|
||||
if not additional_data:
|
||||
self.raise_login_required('Requested content is not available, rate-limit reached or login required')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.instagram.com/p/{video_id}/embed/', video_id,
|
||||
note='Downloading embed webpage', fatal=False)
|
||||
if not webpage:
|
||||
self.raise_login_required('Requested content was not found, the content might be private')
|
||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||
if product_item:
|
||||
media.update(product_item)
|
||||
return self._extract_product(media)
|
||||
|
||||
additional_data = self._search_json(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*', webpage, 'additional data', video_id, fatal=False)
|
||||
product_item = traverse_obj(additional_data, ('items', 0), expected_type=dict)
|
||||
if product_item:
|
||||
media.update(product_item)
|
||||
return self._extract_product(media)
|
||||
|
||||
media.update(traverse_obj(
|
||||
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
|
||||
media.update(traverse_obj(
|
||||
additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {})
|
||||
|
||||
username = traverse_obj(media, ('owner', 'username')) or self._search_regex(
|
||||
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'username', fatal=False)
|
||||
@@ -649,12 +679,8 @@ class InstagramStoryIE(InstagramBaseIE):
|
||||
|
||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||
videos = traverse_obj(self._download_json(
|
||||
f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}',
|
||||
story_id, errnote=False, fatal=False, headers={
|
||||
'X-IG-App-ID': 936619743392459,
|
||||
'X-ASBD-ID': 198387,
|
||||
'X-IG-WWW-Claim': 0,
|
||||
}), 'reels')
|
||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||
story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
||||
if not videos:
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
|
||||
|
||||
82
yt_dlp/extractor/islamchannel.py
Normal file
82
yt_dlp/extractor/islamchannel.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj, urljoin
|
||||
|
||||
|
||||
class IslamChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.islamchannel\.tv/watch/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.islamchannel.tv/watch/38604310',
|
||||
'info_dict': {
|
||||
'id': '38604310',
|
||||
'title': 'Omar - Young Omar',
|
||||
'description': 'md5:5cc7ddecef064ea7afe52eb5e0e33b55',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-poster="([^"]+)"', webpage, 'data poster', fatal=False) or \
|
||||
self._html_search_meta(('og:image', 'twitter:image'), webpage)
|
||||
|
||||
headers = {
|
||||
'Token': self._search_regex(r'data-token="([^"]+)"', webpage, 'data token'),
|
||||
'Token-Expiry': self._search_regex(r'data-expiry="([^"]+)"', webpage, 'data expiry'),
|
||||
'Uvid': video_id,
|
||||
}
|
||||
show_stream = self._download_json(
|
||||
f'https://v2-streams-elb.simplestreamcdn.com/api/show/stream/{video_id}', video_id,
|
||||
query={
|
||||
'key': self._search_regex(r'data-key="([^"]+)"', webpage, 'data key'),
|
||||
'platform': 'chrome',
|
||||
}, headers=headers)
|
||||
# TODO: show_stream['stream'] and show_stream['drm'] may contain something interesting
|
||||
streams = self._download_json(
|
||||
traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id,
|
||||
headers=headers)
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(('og:title', 'twitter:title'), webpage),
|
||||
'description': self._html_search_meta(('og:description', 'twitter:description', 'description'), webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'thumbnails': [{
|
||||
'id': 'unscaled',
|
||||
'url': thumbnail.split('?')[0],
|
||||
'ext': 'jpg',
|
||||
'preference': 2,
|
||||
}, {
|
||||
'id': 'orig',
|
||||
'url': thumbnail,
|
||||
'ext': 'jpg',
|
||||
'preference': 1,
|
||||
}] if thumbnail else None,
|
||||
}
|
||||
|
||||
|
||||
class IslamChannelSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.islamchannel\.tv/series/(?P<id>[a-f\d-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.islamchannel.tv/series/a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
|
||||
'info_dict': {
|
||||
'id': 'a6cccef3-3ef1-11eb-bc19-06b69c2357cd',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, pl_id)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.finditer(r'<a\s+href="(/watch/\d+)"[^>]+?data-video-type="show">', webpage),
|
||||
pl_id, getter=lambda x: urljoin(url, x.group(1)), ie=IslamChannelIE)
|
||||
48
yt_dlp/extractor/jixie.py
Normal file
48
yt_dlp/extractor/jixie.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, float_or_none, traverse_obj, try_call
|
||||
|
||||
|
||||
class JixieBaseIE(InfoExtractor):
|
||||
"""
|
||||
API Reference:
|
||||
https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
|
||||
https://scripts.jixie.media/jxvideo.3.1.min.js
|
||||
"""
|
||||
|
||||
def _extract_data_from_jixie_id(self, display_id, video_id, webpage):
|
||||
json_data = self._download_json(
|
||||
'https://apidam.jixie.io/api/public/stream', display_id,
|
||||
query={'metadata': 'full', 'video_id': video_id})['data']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for stream in json_data['streams']:
|
||||
if stream.get('type') == 'HLS':
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
|
||||
if json_data.get('drm'):
|
||||
for f in fmt:
|
||||
f['has_drm'] = True
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream.get('url'),
|
||||
'width': stream.get('width'),
|
||||
'height': stream.get('height'),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
|
||||
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
|
||||
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
|
||||
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
|
||||
'tags': try_call(lambda: (json_data['metadata']['keywords'] or None).split(',')),
|
||||
'categories': try_call(lambda: (json_data['metadata']['categories'] or None).split(',')),
|
||||
'uploader_id': json_data.get('owner_id'),
|
||||
}
|
||||
@@ -1,17 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
)
|
||||
|
||||
# Video from www.kompas.tv and video.kompas.com seems use jixie player
|
||||
# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
|
||||
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info
|
||||
from .jixie import JixieBaseIE
|
||||
|
||||
|
||||
class KompasVideoIE(InfoExtractor):
|
||||
class KompasVideoIE(JixieBaseIE):
|
||||
_VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
|
||||
@@ -33,36 +23,4 @@ class KompasVideoIE(InfoExtractor):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
json_data = self._download_json(
|
||||
'https://apidam.jixie.io/api/public/stream', display_id,
|
||||
query={'metadata': 'full', 'video_id': video_id})['data']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for stream in json_data['streams']:
|
||||
if stream.get('type') == 'HLS':
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': stream.get('url'),
|
||||
'width': stream.get('width'),
|
||||
'height': stream.get('height'),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
|
||||
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
|
||||
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
|
||||
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
|
||||
'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')),
|
||||
'categories': try_call(lambda: json_data['metadata']['categories'].split(',')),
|
||||
'uploader_id': json_data.get('owner_id'),
|
||||
}
|
||||
return self._extract_data_from_jixie_id(display_id, video_id, webpage)
|
||||
|
||||
@@ -8,15 +8,33 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
'info_dict': {
|
||||
'id': 'jTBFnLKdLy15K',
|
||||
'ext': 'mp4',
|
||||
'title': "Mornu's clutch",
|
||||
'description': '',
|
||||
'uploader': 'Aciel',
|
||||
'timestamp': 1651628243,
|
||||
'upload_date': '20220504',
|
||||
'uploader_id': '19335460',
|
||||
'uploader_url': 'https://medal.tv/users/19335460',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
|
||||
'md5': '3d19d426fe0b2d91c26e412684e66a06',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
@@ -26,9 +44,15 @@ class MedalTVIE(InfoExtractor):
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': '10619174',
|
||||
'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing',
|
||||
'uploader_url': 'https://medal.tv/users/10619174',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 23,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/2um24TWdty0NA',
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '2um24TWdty0NA',
|
||||
@@ -39,25 +63,42 @@ class MedalTVIE(InfoExtractor):
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': '5156321',
|
||||
'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing',
|
||||
'uploader_url': 'https://medal.tv/users/5156321',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 9,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
|
||||
'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
|
||||
'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
path = self._match_valid_url(url).group('path')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||
webpage, 'hydration data', default='{}'), video_id)
|
||||
next_data = self._search_json(
|
||||
'<script[^>]*__NEXT_DATA__[^>]*>', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
clip = try_get(
|
||||
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||
build_id = next_data.get('buildId')
|
||||
if not build_id:
|
||||
raise ExtractorError(
|
||||
'Could not find build ID.', video_id=video_id)
|
||||
|
||||
locale = next_data.get('locale', 'en')
|
||||
|
||||
api_response = self._download_json(
|
||||
f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)
|
||||
|
||||
clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
@@ -113,9 +154,8 @@ class MedalTVIE(InfoExtractor):
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = try_get(
|
||||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||
author_id = str_or_none(author.get('id'))
|
||||
author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
|
||||
return {
|
||||
|
||||
@@ -172,31 +172,27 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
def _qs(url):
|
||||
return parse_qs(url)
|
||||
|
||||
def _program_guid(qs):
|
||||
return qs.get('programGuid', [None])[0]
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
|
||||
webpage):
|
||||
embed_url = mobj.group('url')
|
||||
embed_qs = _qs(embed_url)
|
||||
embed_qs = parse_qs(embed_url)
|
||||
program_guid = _program_guid(embed_qs)
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
yield self.url_result(embed_url)
|
||||
continue
|
||||
|
||||
video_id = embed_qs.get('id', [None])[0]
|
||||
if not video_id:
|
||||
continue
|
||||
urlh = self._request_webpage(embed_url, video_id, note='Following embed URL redirect')
|
||||
embed_url = urlh.geturl()
|
||||
program_guid = _program_guid(_qs(embed_url))
|
||||
program_guid = _program_guid(parse_qs(embed_url))
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
return entries
|
||||
yield self.url_result(embed_url)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||
|
||||
@@ -159,6 +159,7 @@ class MixcloudIE(MixcloudBaseIE):
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': decrypted,
|
||||
'vcodec': 'none',
|
||||
'downloader_options': {
|
||||
# Mixcloud starts throttling at >~5M
|
||||
'http_chunk_size': 5242880,
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
)
|
||||
|
||||
@@ -267,3 +271,79 @@ class MLBVideoIE(MLBBaseIE):
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['mediaPlayback'][0]
|
||||
|
||||
|
||||
class MLBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
||||
_NETRC_MACHINE = 'mlb'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
||||
'info_dict': {
|
||||
'id': '661581',
|
||||
'ext': 'mp4',
|
||||
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_access_token = None
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._access_token:
|
||||
self.raise_login_required(
|
||||
'All videos are only available to registered users', method='password')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
|
||||
access_token = self._download_json(
|
||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||
headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}, data=data.encode())['access_token']
|
||||
|
||||
entitlement = self._download_webpage(
|
||||
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={str(uuid.uuid4())}', None,
|
||||
headers={
|
||||
'User-Agent': 'okhttp/3.12.1',
|
||||
'Authorization': f'Bearer {access_token}'
|
||||
})
|
||||
|
||||
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
|
||||
self._access_token = self._download_json(
|
||||
'https://us.edge.bamgrid.com/token', None,
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}, data=data.encode())['access_token']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
airings = self._download_json(
|
||||
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
|
||||
video_id)['data']['Airings']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for airing in airings:
|
||||
m3u8_url = self._download_json(
|
||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
||||
headers={
|
||||
'Authorization': self._access_token,
|
||||
'Accept': 'application/vnd.media-service+json; version=2'
|
||||
})['stream']['complete']
|
||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(s, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
||||
}
|
||||
|
||||
43
yt_dlp/extractor/moview.py
Normal file
43
yt_dlp/extractor/moview.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from .jixie import JixieBaseIE
|
||||
|
||||
|
||||
class MoviewPlayIE(JixieBaseIE):
|
||||
_VALID_URL = r'https?://www\.moview\.id/play/\d+/(?P<id>[\w-]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
# drm hls, only use direct link
|
||||
'url': 'https://www.moview.id/play/174/Candy-Monster',
|
||||
'info_dict': {
|
||||
'id': '146182',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'Candy-Monster',
|
||||
'uploader_id': 'Mo165qXUUf',
|
||||
'duration': 528.2,
|
||||
'title': 'Candy Monster',
|
||||
'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?',
|
||||
'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg',
|
||||
}
|
||||
}, {
|
||||
# non-drm hls
|
||||
'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16',
|
||||
'info_dict': {
|
||||
'id': '28210',
|
||||
'ext': 'mp4',
|
||||
'duration': 2595.666667,
|
||||
'display_id': 'Paris-Van-Java-Episode-16',
|
||||
'uploader_id': 'Mo165qXUUf',
|
||||
'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg',
|
||||
'description': 'md5:2a5e18d98eef9b39d7895029cac96c63',
|
||||
'title': 'Paris Van Java Episode 16',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'video_id\s*=\s*"(?P<video_id>[^"]+)', webpage, 'video_id')
|
||||
|
||||
return self._extract_data_from_jixie_id(display_id, video_id, webpage)
|
||||
54
yt_dlp/extractor/newspicks.py
Normal file
54
yt_dlp/extractor/newspicks.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class NewsPicksIE(InfoExtractor):
|
||||
_VALID_URL = r'https://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
||||
'info_dict': {
|
||||
'id': '1813',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'channel': 'HORIE ONE',
|
||||
'channel_id': '11',
|
||||
'release_date': '20220117',
|
||||
'thumbnail': r're:https://.+jpg',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
||||
if not entries:
|
||||
raise ExtractorError('No HTML5 media elements found')
|
||||
info = entries[0]
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
title = self._html_search_meta('og:title', webpage, fatal=False)
|
||||
description = self._html_search_meta(
|
||||
('og:description', 'twitter:title'), webpage, fatal=False)
|
||||
channel = self._html_search_regex(
|
||||
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
||||
if not title or not channel:
|
||||
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
||||
|
||||
release_date = self._search_regex(
|
||||
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
||||
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
||||
})
|
||||
return info
|
||||
@@ -1,3 +1,4 @@
|
||||
import collections
|
||||
import contextlib
|
||||
import json
|
||||
import os
|
||||
@@ -9,8 +10,10 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
Popen,
|
||||
check_executable,
|
||||
format_field,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
shell_quote,
|
||||
)
|
||||
|
||||
|
||||
@@ -49,7 +52,9 @@ class PhantomJSwrapper:
|
||||
This class is experimental.
|
||||
"""
|
||||
|
||||
_TEMPLATE = r'''
|
||||
INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html'
|
||||
|
||||
_BASE_JS = R'''
|
||||
phantom.onError = function(msg, trace) {{
|
||||
var msgStack = ['PHANTOM ERROR: ' + msg];
|
||||
if(trace && trace.length) {{
|
||||
@@ -62,6 +67,9 @@ class PhantomJSwrapper:
|
||||
console.error(msgStack.join('\n'));
|
||||
phantom.exit(1);
|
||||
}};
|
||||
'''
|
||||
|
||||
_TEMPLATE = R'''
|
||||
var page = require('webpage').create();
|
||||
var fs = require('fs');
|
||||
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||
@@ -104,8 +112,7 @@ class PhantomJSwrapper:
|
||||
|
||||
self.exe = check_executable('phantomjs', ['-v'])
|
||||
if not self.exe:
|
||||
raise ExtractorError(
|
||||
'PhantomJS not found, Please download it from https://phantomjs.org/download.html', expected=True)
|
||||
raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)
|
||||
|
||||
self.extractor = extractor
|
||||
|
||||
@@ -116,14 +123,18 @@ class PhantomJSwrapper:
|
||||
'Your copy of PhantomJS is outdated, update it to version '
|
||||
'%s or newer if you encounter any errors.' % required_version)
|
||||
|
||||
self.options = {
|
||||
'timeout': timeout,
|
||||
}
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
tmp = tempfile.NamedTemporaryFile(delete=False)
|
||||
tmp.close()
|
||||
self._TMP_FILES[name] = tmp
|
||||
|
||||
self.options = collections.ChainMap({
|
||||
'timeout': timeout,
|
||||
}, {
|
||||
x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
|
||||
for x in self._TMP_FILE_NAMES
|
||||
})
|
||||
|
||||
def __del__(self):
|
||||
for name in self._TMP_FILE_NAMES:
|
||||
with contextlib.suppress(OSError, KeyError):
|
||||
@@ -169,7 +180,7 @@ class PhantomJSwrapper:
|
||||
In most cases you don't need to add any `jscode`.
|
||||
It is executed in `page.onLoadFinished`.
|
||||
`saveAndExit();` is mandatory, use it instead of `phantom.exit()`
|
||||
It is possible to wait for some element on the webpage, for example:
|
||||
It is possible to wait for some element on the webpage, e.g.
|
||||
var check = function() {
|
||||
var elementFound = page.evaluate(function() {
|
||||
return document.querySelector('#b.done') !== null;
|
||||
@@ -194,31 +205,39 @@ class PhantomJSwrapper:
|
||||
|
||||
self._save_cookies(url)
|
||||
|
||||
replaces = self.options
|
||||
replaces['url'] = url
|
||||
user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
|
||||
replaces['ua'] = user_agent.replace('"', '\\"')
|
||||
replaces['jscode'] = jscode
|
||||
jscode = self._TEMPLATE.format_map(self.options.new_child({
|
||||
'url': url,
|
||||
'ua': user_agent.replace('"', '\\"'),
|
||||
'jscode': jscode,
|
||||
}))
|
||||
|
||||
for x in self._TMP_FILE_NAMES:
|
||||
replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
|
||||
stdout = self.execute(jscode, video_id, note2)
|
||||
|
||||
with open(self._TMP_FILES['script'].name, 'wb') as f:
|
||||
f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
|
||||
|
||||
if video_id is None:
|
||||
self.extractor.to_screen(f'{note2}')
|
||||
else:
|
||||
self.extractor.to_screen(f'{video_id}: {note2}')
|
||||
|
||||
stdout, stderr, returncode = Popen.run(
|
||||
[self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if returncode:
|
||||
raise ExtractorError(f'Executing JS failed:\n{stderr}')
|
||||
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||
html = f.read().decode('utf-8')
|
||||
|
||||
self._load_cookies()
|
||||
|
||||
return html, stdout
|
||||
|
||||
def execute(self, jscode, video_id=None, *, note='Executing JS'):
|
||||
"""Execute JS and return stdout"""
|
||||
if 'phantom.exit();' not in jscode:
|
||||
jscode += ';\nphantom.exit();'
|
||||
jscode = self._BASE_JS + jscode
|
||||
|
||||
with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f:
|
||||
f.write(jscode)
|
||||
self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
|
||||
|
||||
cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name]
|
||||
self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
|
||||
try:
|
||||
stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000,
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
except Exception as e:
|
||||
raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e)
|
||||
if returncode:
|
||||
raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}')
|
||||
|
||||
return stdout
|
||||
|
||||
111
yt_dlp/extractor/parler.py
Normal file
111
yt_dlp/extractor/parler.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ParlerIE(InfoExtractor):
|
||||
IE_DESC = 'Posts on parler.com'
|
||||
_VALID_URL = r'https://parler\.com/feed/(?P<id>[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7',
|
||||
'md5': '16e0f447bf186bb3cf64de5bbbf4d22d',
|
||||
'info_dict': {
|
||||
'id': 'df79fdba-07cc-48fe-b085-3293897520d7',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
|
||||
'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
|
||||
'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
|
||||
'timestamp': 1659744000,
|
||||
'upload_date': '20220806',
|
||||
'uploader': 'Tulsi Gabbard',
|
||||
'uploader_id': 'TulsiGabbard',
|
||||
'uploader_url': 'https://parler.com/TulsiGabbard',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
||||
'md5': '11687e2f5bb353682cee338d181422ed',
|
||||
'info_dict': {
|
||||
'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg',
|
||||
'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
||||
'description': 'This man should run for office',
|
||||
'timestamp': 1659657600,
|
||||
'upload_date': '20220805',
|
||||
'uploader': 'Benny Johnson',
|
||||
'uploader_id': 'BennyJohnson',
|
||||
'uploader_url': 'https://parler.com/BennyJohnson',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
|
||||
'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
|
||||
'info_dict': {
|
||||
'id': 'r5vkSaz8PxQ',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp',
|
||||
'title': 'Tom MacDonald Names Reaction',
|
||||
'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
|
||||
'upload_date': '20220716',
|
||||
'duration': 1267,
|
||||
'uploader': 'Mahesh Chookolingo',
|
||||
'uploader_id': 'maheshchookolingo',
|
||||
'uploader_url': 'http://www.youtube.com/user/maheshchookolingo',
|
||||
'channel': 'Mahesh Chookolingo',
|
||||
'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': list,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'playable_in_embed': True,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id,
|
||||
data=urlencode_postdata({'uuid': video_id}))['data'][0]
|
||||
primary = data['primary']
|
||||
|
||||
embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False)
|
||||
if embed:
|
||||
return self.url_result(embed[0], YoutubeIE)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': traverse_obj(primary, ('video_data', 'videoSrc')),
|
||||
'thumbnail': traverse_obj(primary, ('video_data', 'thumbnailUrl')),
|
||||
'title': '',
|
||||
'description': strip_or_none(clean_html(primary.get('full_body'))) or None,
|
||||
'timestamp': unified_timestamp(primary.get('date_created')),
|
||||
'uploader': strip_or_none(primary.get('name')),
|
||||
'uploader_id': strip_or_none(primary.get('username')),
|
||||
'uploader_url': format_field(strip_or_none(primary.get('username')), None, 'https://parler.com/%s'),
|
||||
'view_count': int_or_none(primary.get('view_count')),
|
||||
'comment_count': int_or_none(traverse_obj(data, ('engagement', 'commentCount'))),
|
||||
'repost_count': int_or_none(traverse_obj(data, ('engagement', 'echoCount'))),
|
||||
}
|
||||
@@ -154,6 +154,28 @@ class PatreonIE(PatreonBaseIE):
|
||||
'channel_url': 'https://www.patreon.com/loish',
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
}, {
|
||||
# bad videos under media (if media is included). Real one is under post_file
|
||||
'url': 'https://www.patreon.com/posts/premium-access-70282931',
|
||||
'info_dict': {
|
||||
'id': '70282931',
|
||||
'ext': 'mp4',
|
||||
'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
|
||||
'channel_url': 'https://www.patreon.com/thenormies',
|
||||
'channel_id': '573397',
|
||||
'uploader_id': '2929435',
|
||||
'uploader': 'The Normies',
|
||||
'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
|
||||
'comment_count': int,
|
||||
'upload_date': '20220809',
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'timestamp': 1660052820,
|
||||
'tags': ['The Office', 'early access', 'uncut'],
|
||||
'uploader_url': 'https://www.patreon.com/thenormies',
|
||||
},
|
||||
'skip': 'Patron-only content',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -166,7 +188,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
'fields[post_tag]': 'value',
|
||||
'fields[campaign]': 'url,name,patron_count',
|
||||
'json-api-use-default-includes': 'false',
|
||||
'include': 'media,user,user_defined_tags,campaign',
|
||||
'include': 'audio,user,user_defined_tags,campaign,attachments_media',
|
||||
})
|
||||
attributes = post['data']['attributes']
|
||||
title = attributes['title'].strip()
|
||||
@@ -190,11 +212,16 @@ class PatreonIE(PatreonBaseIE):
|
||||
media_attributes = i.get('attributes') or {}
|
||||
download_url = media_attributes.get('download_url')
|
||||
ext = mimetype2ext(media_attributes.get('mimetype'))
|
||||
if download_url and ext in KNOWN_EXTENSIONS:
|
||||
|
||||
# if size_bytes is None, this media file is likely unavailable
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
|
||||
size_bytes = int_or_none(media_attributes.get('size_bytes'))
|
||||
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
|
||||
# XXX: what happens if there are multiple attachments?
|
||||
return {
|
||||
**info,
|
||||
'ext': ext,
|
||||
'filesize': int_or_none(media_attributes.get('size_bytes')),
|
||||
'filesize': size_bytes,
|
||||
'url': download_url,
|
||||
}
|
||||
elif i_type == 'user':
|
||||
|
||||
@@ -51,6 +51,9 @@ class RaiBaseIE(InfoExtractor):
|
||||
query={'output': 45, 'pl': platform},
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
if xpath_text(relinker, './license_url', default='{}') != '{}':
|
||||
self.report_drm(video_id)
|
||||
|
||||
if not geoprotection:
|
||||
geoprotection = xpath_text(
|
||||
relinker, './geoprotection', default=None) == 'Y'
|
||||
@@ -153,7 +156,7 @@ class RaiBaseIE(InfoExtractor):
|
||||
br = int_or_none(tbr)
|
||||
if len(fmts) == 1 and not br:
|
||||
br = fmts[0].get('tbr')
|
||||
if br or 0 > 300:
|
||||
if br and br > 300:
|
||||
tbr = compat_str(math.floor(br / 100) * 100)
|
||||
else:
|
||||
tbr = '250'
|
||||
@@ -251,6 +254,8 @@ class RaiPlayIE(RaiBaseIE):
|
||||
},
|
||||
'release_year': 2022,
|
||||
'episode': 'Espresso nel caffè - 07/04/2014',
|
||||
'timestamp': 1396919880,
|
||||
'upload_date': '20140408',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -274,6 +279,8 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'release_year': 2021,
|
||||
'season_number': 1,
|
||||
'episode': 'Senza occhi',
|
||||
'timestamp': 1637318940,
|
||||
'upload_date': '20211119',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
@@ -284,7 +291,7 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DRM protected
|
||||
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||
'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -363,6 +370,8 @@ class RaiPlayLiveIE(RaiPlayIE):
|
||||
'creator': 'Rai News 24',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'upload_date': '20090502',
|
||||
'timestamp': 1241276220,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -448,6 +457,8 @@ class RaiPlaySoundIE(RaiBaseIE):
|
||||
'series': 'Il Ruggito del Coniglio',
|
||||
'episode': 'Il Ruggito del Coniglio del 10/12/2021',
|
||||
'creator': 'rai radio 2',
|
||||
'timestamp': 1638346620,
|
||||
'upload_date': '20211201',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -707,7 +718,8 @@ class RaiIE(RaiBaseIE):
|
||||
|
||||
|
||||
class RaiNewsIE(RaiIE):
|
||||
_VALID_URL = rf'https?://(www\.)?rainews\.it/[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
|
||||
_VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)']
|
||||
_TESTS = [{
|
||||
# new rainews player (#3911)
|
||||
'url': 'https://www.rainews.it/rubriche/24mm/video/2022/05/24mm-del-29052022-12cf645d-1ffd-4220-b27c-07c226dbdecf.html',
|
||||
@@ -732,6 +744,10 @@ class RaiNewsIE(RaiIE):
|
||||
'upload_date': '20161103'
|
||||
},
|
||||
'expected_warnings': ['unable to extract player_data'],
|
||||
}, {
|
||||
# iframe + drm
|
||||
'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -755,6 +771,7 @@ class RaiNewsIE(RaiIE):
|
||||
raise ExtractorError('Relinker URL not found', cause=e)
|
||||
|
||||
relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id)
|
||||
|
||||
self._sort_formats(relinker_info['formats'])
|
||||
|
||||
return {
|
||||
@@ -769,13 +786,13 @@ class RaiNewsIE(RaiIE):
|
||||
class RaiSudtirolIE(RaiBaseIE):
|
||||
_VALID_URL = r'https?://raisudtirol\.rai\.it/.+?media=(?P<id>[TP]tv\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://raisudtirol.rai.it/de/index.php?media=Ttv1656281400',
|
||||
'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
|
||||
'info_dict': {
|
||||
'id': 'Ttv1656281400',
|
||||
'id': 'Ptv1619729460',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tagesschau + Sport am Sonntag - 31-07-2022 20:00',
|
||||
'series': 'Tagesschau + Sport am Sonntag',
|
||||
'upload_date': '20220731',
|
||||
'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51',
|
||||
'series': 'Euro: trasmisciun d\'economia',
|
||||
'upload_date': '20210429',
|
||||
'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+?\.jpg',
|
||||
'uploader': 'raisudtirol',
|
||||
}
|
||||
@@ -796,6 +813,14 @@ class RaiSudtirolIE(RaiBaseIE):
|
||||
'series': video_title,
|
||||
'upload_date': unified_strdate(video_date),
|
||||
'thumbnail': urljoin('https://raisudtirol.rai.it/', video_thumb),
|
||||
'url': self._proto_relative_url(video_url),
|
||||
'uploader': 'raisudtirol',
|
||||
'formats': [{
|
||||
'format_id': 'https-mp4',
|
||||
'url': self._proto_relative_url(video_url),
|
||||
'width': 1024,
|
||||
'height': 576,
|
||||
'fps': 25,
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
}],
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@@ -69,6 +70,10 @@ class RedBeeBaseIE(InfoExtractor):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format['mediaLocator'], asset_id, fatal=False)
|
||||
|
||||
if format.get('drm'):
|
||||
for f in fmts:
|
||||
f['has_drm'] = True
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
@@ -251,7 +256,7 @@ class RTBFIE(RedBeeBaseIE):
|
||||
if not login_token:
|
||||
self.raise_login_required()
|
||||
|
||||
session_jwt = self._download_json(
|
||||
session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json(
|
||||
'https://login.rtbf.be/accounts.getJWT', media_id, query={
|
||||
'login_token': login_token.value,
|
||||
'APIKey': self._GIGYA_API_KEY,
|
||||
@@ -269,8 +274,17 @@ class RTBFIE(RedBeeBaseIE):
|
||||
embed_page = self._download_webpage(
|
||||
'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
|
||||
media_id, query={'id': media_id})
|
||||
data = self._parse_json(self._html_search_regex(
|
||||
r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
|
||||
|
||||
media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False)
|
||||
if not media_data:
|
||||
if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page):
|
||||
raise ExtractorError('Livestream has ended.', expected=True)
|
||||
if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page):
|
||||
self.raise_login_required()
|
||||
|
||||
raise ExtractorError('Could not find media data')
|
||||
|
||||
data = self._parse_json(media_data, media_id)
|
||||
|
||||
error = data.get('error')
|
||||
if error:
|
||||
@@ -280,15 +294,20 @@ class RTBFIE(RedBeeBaseIE):
|
||||
if provider in self._PROVIDERS:
|
||||
return self.url_result(data['url'], self._PROVIDERS[provider])
|
||||
|
||||
title = data['subtitle']
|
||||
title = traverse_obj(data, 'subtitle', 'title')
|
||||
is_live = data.get('isLive')
|
||||
height_re = r'-(\d+)p\.'
|
||||
formats = []
|
||||
formats, subtitles = [], {}
|
||||
|
||||
m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
|
||||
# The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake'
|
||||
# since all they contain is a 20s video that is completely unrelated.
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092
|
||||
m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
|
||||
http_url = data.get('url')
|
||||
@@ -319,10 +338,12 @@ class RTBFIE(RedBeeBaseIE):
|
||||
'height': height,
|
||||
})
|
||||
|
||||
mpd_url = data.get('urlDash')
|
||||
mpd_url = None if data.get('isLive') else data.get('urlDash')
|
||||
if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, media_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
mpd_url, media_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
audio_url = data.get('urlAudio')
|
||||
if audio_url:
|
||||
@@ -332,7 +353,6 @@ class RTBFIE(RedBeeBaseIE):
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for track in (data.get('tracks') or {}).values():
|
||||
sub_url = track.get('url')
|
||||
if not sub_url:
|
||||
@@ -342,7 +362,7 @@ class RTBFIE(RedBeeBaseIE):
|
||||
})
|
||||
|
||||
if not formats:
|
||||
fmts, subs = self._get_formats_and_subtitles(url, media_id)
|
||||
fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,6 +36,28 @@ class ScreencastOMaticIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'https://screencast-o-matic.com/player/' + video_id, video_id)
|
||||
|
||||
if (self._html_extract_title(webpage) == 'Protected Content'
|
||||
or 'This video is private and requires a password' in webpage):
|
||||
password = self.get_param('videopassword')
|
||||
|
||||
if not password:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
form = self._search_regex(
|
||||
r'(?is)<form[^>]*>(?P<form>.+?)</form>', webpage, 'login form', group='form')
|
||||
form_data = self._hidden_inputs(form)
|
||||
form_data.update({
|
||||
'scPassword': password,
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://screencast-o-matic.com/player/password', video_id, 'Logging in',
|
||||
data=urlencode_postdata(form_data))
|
||||
|
||||
if '<small class="text-danger">Invalid password</small>' in webpage:
|
||||
raise ExtractorError('Unable to login: Invalid password', expected=True)
|
||||
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
|
||||
@@ -44,7 +44,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://sovietscloset.com/video/1337',
|
||||
'md5': '11e58781c4ca5b283307aa54db5b3f93',
|
||||
'md5': 'bd012b04b261725510ca5383074cdd55',
|
||||
'info_dict': {
|
||||
'id': '1337',
|
||||
'ext': 'mp4',
|
||||
@@ -69,11 +69,11 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
},
|
||||
{
|
||||
'url': 'https://sovietscloset.com/video/1105',
|
||||
'md5': '578b1958a379e7110ba38697042e9efb',
|
||||
'md5': '89fa928f183893cb65a0b7be846d8a90',
|
||||
'info_dict': {
|
||||
'id': '1105',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arma 3 - Zeus Games #3',
|
||||
'title': 'Arma 3 - Zeus Games #5',
|
||||
'uploader': 'SovietWomble',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
|
||||
'uploader': 'SovietWomble',
|
||||
@@ -89,8 +89,8 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
'availability': 'public',
|
||||
'series': 'Arma 3',
|
||||
'season': 'Zeus Games',
|
||||
'episode_number': 3,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
},
|
||||
},
|
||||
]
|
||||
@@ -122,7 +122,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
|
||||
static_assets_base = self._search_regex(r'(/_nuxt/static/\d+)', webpage, 'staticAssetsBase')
|
||||
static_assets_base = f'https://sovietscloset.com{static_assets_base}'
|
||||
|
||||
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
|
||||
@@ -181,7 +181,7 @@ class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
|
||||
static_assets_base = self._search_regex(r'(/_nuxt/static/\d+)', webpage, 'staticAssetsBase')
|
||||
static_assets_base = f'https://sovietscloset.com{static_assets_base}'
|
||||
|
||||
sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games']
|
||||
|
||||
@@ -29,9 +29,7 @@ class StripchatIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'https://stripchat.com/%s/' % video_id, video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
|
||||
369
yt_dlp/extractor/tencent.py
Normal file
369
yt_dlp/extractor/tencent.py
Normal file
@@ -0,0 +1,369 @@
|
||||
import functools
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_encrypt_bytes
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class TencentBaseIE(InfoExtractor):
|
||||
"""Subclasses must set _API_URL, _APP_VERSION, _PLATFORM, _HOST, _REFERER"""
|
||||
|
||||
def _get_ckey(self, video_id, url, guid):
|
||||
ua = self.get_param('http_headers')['User-Agent']
|
||||
|
||||
payload = (f'{video_id}|{int(time.time())}|mg3c3b04ba|{self._APP_VERSION}|{guid}|'
|
||||
f'{self._PLATFORM}|{url[:48]}|{ua.lower()[:48]}||Mozilla|Netscape|Windows x86_64|00|')
|
||||
|
||||
return aes_cbc_encrypt_bytes(
|
||||
bytes(f'|{sum(map(ord, payload))}|{payload}', 'utf-8'),
|
||||
b'Ok\xda\xa3\x9e/\x8c\xb0\x7f^r-\x9e\xde\xf3\x14',
|
||||
b'\x01PJ\xf3V\xe6\x19\xcf.B\xbb\xa6\x8c?p\xf9',
|
||||
padding_mode='whitespace').hex().upper()
|
||||
|
||||
def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality):
|
||||
guid = ''.join([random.choice(string.digits + string.ascii_lowercase) for _ in range(16)])
|
||||
ckey = self._get_ckey(video_id, video_url, guid)
|
||||
query = {
|
||||
'vid': video_id,
|
||||
'cid': series_id,
|
||||
'cKey': ckey,
|
||||
'encryptVer': '8.1',
|
||||
'spcaptiontype': '1' if subtitle_format == 'vtt' else '0',
|
||||
'sphls': '2' if video_format == 'hls' else '0',
|
||||
'dtype': '3' if video_format == 'hls' else '0',
|
||||
'defn': video_quality,
|
||||
'spsrt': '2', # Enable subtitles
|
||||
'sphttps': '1', # Enable HTTPS
|
||||
'otype': 'json',
|
||||
'spwm': '1',
|
||||
# For SHD
|
||||
'host': self._HOST,
|
||||
'referer': self._REFERER,
|
||||
'ehost': video_url,
|
||||
'appVer': self._APP_VERSION,
|
||||
'platform': self._PLATFORM,
|
||||
# For VQQ
|
||||
'guid': guid,
|
||||
'flowid': ''.join(random.choice(string.digits + string.ascii_lowercase) for _ in range(32)),
|
||||
}
|
||||
|
||||
return self._search_json(r'QZOutputJson=', self._download_webpage(
|
||||
self._API_URL, video_id, query=query), 'api_response', video_id)
|
||||
|
||||
def _extract_video_formats_and_subtitles(self, api_response, video_id):
|
||||
video_response = api_response['vl']['vi'][0]
|
||||
video_width, video_height = video_response.get('vw'), video_response.get('vh')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for video_format in video_response['ul']['ui']:
|
||||
if video_format.get('hls'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_format['url'] + video_format['hls']['pt'], video_id, 'mp4', fatal=False)
|
||||
for f in fmts:
|
||||
f.update({'width': video_width, 'height': video_height})
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}',
|
||||
'width': video_width,
|
||||
'height': video_height,
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _extract_video_native_subtitles(self, api_response, subtitles_format):
|
||||
subtitles = {}
|
||||
for subtitle in traverse_obj(api_response, ('sfl', 'fi')) or ():
|
||||
subtitles.setdefault(subtitle['lang'].lower(), []).append({
|
||||
'url': subtitle['url'],
|
||||
'ext': subtitles_format,
|
||||
'protocol': 'm3u8_native' if determine_ext(subtitle['url']) == 'm3u8' else 'http',
|
||||
})
|
||||
|
||||
return subtitles
|
||||
|
||||
def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id):
|
||||
formats, subtitles = [], {}
|
||||
for video_format, subtitle_format, video_quality in (
|
||||
# '': 480p, 'shd': 720p, 'fhd': 1080p
|
||||
('mp4', 'srt', ''), ('hls', 'vtt', 'shd'), ('hls', 'vtt', 'fhd')):
|
||||
api_response = self._get_video_api_response(
|
||||
url, video_id, series_id, subtitle_format, video_format, video_quality)
|
||||
|
||||
if api_response.get('em') != 0 and api_response.get('exem') != 0:
|
||||
if '您所在区域暂无此内容版权' in api_response.get('msg'):
|
||||
self.raise_geo_restricted()
|
||||
raise ExtractorError(f'Tencent said: {api_response.get("msg")}')
|
||||
|
||||
fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id)
|
||||
native_subtitles = self._extract_video_native_subtitles(api_response, subtitle_format)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, native_subtitles, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
return formats, subtitles
|
||||
|
||||
def _get_clean_title(self, title):
|
||||
return re.sub(
|
||||
r'\s*[_\-]\s*(?:Watch online|腾讯视频|(?:高清)?1080P在线观看平台).*?$',
|
||||
'', title or '').strip() or None
|
||||
|
||||
|
||||
class VQQBaseIE(TencentBaseIE):
|
||||
_VALID_URL_BASE = r'https?://v\.qq\.com'
|
||||
|
||||
_API_URL = 'https://h5vv6.video.qq.com/getvinfo'
|
||||
_APP_VERSION = '3.5.57'
|
||||
_PLATFORM = '10901'
|
||||
_HOST = 'v.qq.com'
|
||||
_REFERER = 'v.qq.com'
|
||||
|
||||
def _get_webpage_metadata(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]*>[^<]*window\.__pinia\s*=\s*([^<]+)</script>',
|
||||
webpage, 'pinia data', fatal=False),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
|
||||
class VQQVideoIE(VQQBaseIE):
|
||||
IE_NAME = 'vqq:video'
|
||||
_VALID_URL = VQQBaseIE._VALID_URL_BASE + r'/x/(?:page|cover/(?P<series_id>\w+))/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.qq.com/x/page/q326831cny0.html',
|
||||
'md5': '826ef93682df09e3deac4a6e6e8cdb6e',
|
||||
'info_dict': {
|
||||
'id': 'q326831cny0',
|
||||
'ext': 'mp4',
|
||||
'title': '我是选手:雷霆裂阵,终极时刻',
|
||||
'description': 'md5:e7ed70be89244017dac2a835a10aeb1e',
|
||||
'thumbnail': r're:^https?://[^?#]+q326831cny0',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://v.qq.com/x/page/o3013za7cse.html',
|
||||
'md5': 'b91cbbeada22ef8cc4b06df53e36fa21',
|
||||
'info_dict': {
|
||||
'id': 'o3013za7cse',
|
||||
'ext': 'mp4',
|
||||
'title': '欧阳娜娜VLOG',
|
||||
'description': 'md5:29fe847497a98e04a8c3826e499edd2e',
|
||||
'thumbnail': r're:^https?://[^?#]+o3013za7cse',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://v.qq.com/x/cover/7ce5noezvafma27/a00269ix3l8.html',
|
||||
'md5': '71459c5375c617c265a22f083facce67',
|
||||
'info_dict': {
|
||||
'id': 'a00269ix3l8',
|
||||
'ext': 'mp4',
|
||||
'title': '鸡毛飞上天 第01集',
|
||||
'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
|
||||
'thumbnail': r're:^https?://[^?#]+7ce5noezvafma27',
|
||||
'series': '鸡毛飞上天',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
|
||||
'md5': '96b9fd4a189fdd4078c111f21d7ac1bc',
|
||||
'info_dict': {
|
||||
'id': 's0043cwsgj0',
|
||||
'ext': 'mp4',
|
||||
'title': '第1集:如何快乐吃糖?',
|
||||
'description': 'md5:1d8c3a0b8729ae3827fa5b2d3ebd5213',
|
||||
'thumbnail': r're:^https?://[^?#]+s0043cwsgj0',
|
||||
'series': '青年理工工作者生活研究所',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage_metadata = self._get_webpage_metadata(webpage, video_id)
|
||||
|
||||
formats, subtitles = self._extract_all_video_formats_and_subtitles(url, video_id, series_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._get_clean_title(self._og_search_title(webpage)
|
||||
or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'title'))),
|
||||
'description': (self._og_search_description(webpage)
|
||||
or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'desc'))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': (self._og_search_thumbnail(webpage)
|
||||
or traverse_obj(webpage_metadata, ('global', 'videoInfo', 'pic160x90'))),
|
||||
'series': traverse_obj(webpage_metadata, ('global', 'coverInfo', 'title')),
|
||||
}
|
||||
|
||||
|
||||
class VQQSeriesIE(VQQBaseIE):
|
||||
IE_NAME = 'vqq:series'
|
||||
_VALID_URL = VQQBaseIE._VALID_URL_BASE + r'/x/cover/(?P<id>\w+)\.html/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.qq.com/x/cover/7ce5noezvafma27.html',
|
||||
'info_dict': {
|
||||
'id': '7ce5noezvafma27',
|
||||
'title': '鸡毛飞上天',
|
||||
'description': 'md5:8cae3534327315b3872fbef5e51b5c5b',
|
||||
},
|
||||
'playlist_count': 55,
|
||||
}, {
|
||||
'url': 'https://v.qq.com/x/cover/oshd7r0vy9sfq8e.html',
|
||||
'info_dict': {
|
||||
'id': 'oshd7r0vy9sfq8e',
|
||||
'title': '恋爱细胞2',
|
||||
'description': 'md5:9d8a2245679f71ca828534b0f95d2a03',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
webpage_metadata = self._get_webpage_metadata(webpage, series_id)
|
||||
|
||||
episode_paths = [f'/x/cover/{series_id}/{video_id}.html' for video_id in re.findall(
|
||||
r'<div[^>]+data-vid="(?P<video_id>[^"]+)"[^>]+class="[^"]+episode-item-rect--number',
|
||||
webpage)]
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=VQQVideoIE, getter=functools.partial(urljoin, url),
|
||||
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
|
||||
or self._og_search_title(webpage)),
|
||||
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
or self._og_search_description(webpage)))
|
||||
|
||||
|
||||
class WeTvBaseIE(TencentBaseIE):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?wetv\.vip/(?:[^?#]+/)?play'
|
||||
|
||||
_API_URL = 'https://play.wetv.vip/getvinfo'
|
||||
_APP_VERSION = '3.5.57'
|
||||
_PLATFORM = '4830201'
|
||||
_HOST = 'wetv.vip'
|
||||
_REFERER = 'wetv.vip'
|
||||
|
||||
def _get_webpage_metadata(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
traverse_obj(self._search_nextjs_data(webpage, video_id), ('props', 'pageProps', 'data')),
|
||||
video_id, fatal=False)
|
||||
|
||||
|
||||
class WeTvEpisodeIE(WeTvBaseIE):
|
||||
IE_NAME = 'wetv:episode'
|
||||
_VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<series_id>\w+)(?:-[^?#]+)?/(?P<id>\w+)(?:-[^?#]+)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://wetv.vip/en/play/air11ooo2rdsdi3-Cute-Programmer/v0040pr89t9-EP1-Cute-Programmer',
|
||||
'md5': '0c70fdfaa5011ab022eebc598e64bbbe',
|
||||
'info_dict': {
|
||||
'id': 'v0040pr89t9',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP1: Cute Programmer',
|
||||
'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
|
||||
'thumbnail': r're:^https?://[^?#]+air11ooo2rdsdi3',
|
||||
'series': 'Cute Programmer',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'duration': 2835,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu/p0039b9nvik',
|
||||
'md5': '3b3c15ca4b9a158d8d28d5aa9d7c0a49',
|
||||
'info_dict': {
|
||||
'id': 'p0039b9nvik',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP1: You Are My Glory',
|
||||
'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
|
||||
'thumbnail': r're:^https?://[^?#]+u37kgfnfzs73kiu',
|
||||
'series': 'You Are My Glory',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'duration': 2454,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/lcxgwod5hapghvw-WeTV-PICK-A-BOO/i0042y00lxp-Zhao-Lusi-Describes-The-First-Experiences-She-Had-In-Who-Rules-The-World-%7C-WeTV-PICK-A-BOO',
|
||||
'md5': '71133f5c2d5d6cad3427e1b010488280',
|
||||
'info_dict': {
|
||||
'id': 'i0042y00lxp',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:f7a0857dbe5fbbe2e7ad630b92b54e6a',
|
||||
'description': 'md5:76260cb9cdc0ef76826d7ca9d92fadfa',
|
||||
'thumbnail': r're:^https?://[^?#]+lcxgwod5hapghvw',
|
||||
'series': 'WeTV PICK-A-BOO',
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'duration': 442,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage_metadata = self._get_webpage_metadata(webpage, video_id)
|
||||
|
||||
formats, subtitles = self._extract_all_video_formats_and_subtitles(url, video_id, series_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._get_clean_title(self._og_search_title(webpage)
|
||||
or traverse_obj(webpage_metadata, ('coverInfo', 'title'))),
|
||||
'description': (traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
or self._og_search_description(webpage)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'duration'))),
|
||||
'series': traverse_obj(webpage_metadata, ('coverInfo', 'title')),
|
||||
'episode_number': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'episode'))),
|
||||
}
|
||||
|
||||
|
||||
class WeTvSeriesIE(WeTvBaseIE):
|
||||
_VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<id>\w+)(?:-[^/?#]+)?/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://wetv.vip/play/air11ooo2rdsdi3-Cute-Programmer',
|
||||
'info_dict': {
|
||||
'id': 'air11ooo2rdsdi3',
|
||||
'title': 'Cute Programmer',
|
||||
'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu-You-Are-My-Glory',
|
||||
'info_dict': {
|
||||
'id': 'u37kgfnfzs73kiu',
|
||||
'title': 'You Are My Glory',
|
||||
'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
|
||||
},
|
||||
'playlist_count': 32,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
webpage_metadata = self._get_webpage_metadata(webpage, series_id)
|
||||
|
||||
episode_paths = ([f'/play/{series_id}/{episode["vid"]}' for episode in webpage_metadata.get('videoList')]
|
||||
or re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage))
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=WeTvEpisodeIE, getter=functools.partial(urljoin, url),
|
||||
title=self._get_clean_title(traverse_obj(webpage_metadata, ('coverInfo', 'title'))
|
||||
or self._og_search_title(webpage)),
|
||||
description=(traverse_obj(webpage_metadata, ('coverInfo', 'description'))
|
||||
or self._og_search_description(webpage)))
|
||||
@@ -8,12 +8,14 @@ class TestURLIE(InfoExtractor):
|
||||
""" Allows addressing of the test cases as test:yout.*be_1 """
|
||||
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
|
||||
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
|
||||
|
||||
def _real_extract(self, url):
|
||||
from . import gen_extractor_classes
|
||||
|
||||
extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
|
||||
if not extractor_id:
|
||||
return {'id': ':test', 'title': '', 'url': url}
|
||||
|
||||
rex = re.compile(extractor_id, flags=re.IGNORECASE)
|
||||
matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
|
||||
|
||||
@@ -4,7 +4,7 @@ from ..utils import int_or_none, parse_qs
|
||||
|
||||
class ToggoIE(InfoExtractor):
|
||||
IE_NAME = 'toggo'
|
||||
_VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/folge/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/(?:folge|video)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei',
|
||||
'info_dict': {
|
||||
@@ -33,6 +33,9 @@ class ToggoIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.toggo.de/toggolino/paw-patrol/folge/der-wetter-zeppelin-der-chili-kochwettbewerb',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.toggo.de/toggolino/paw-patrol/video/paw-patrol-rettung-im-anflug',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
304
yt_dlp/extractor/triller.py
Normal file
304
yt_dlp/extractor/triller.py
Normal file
@@ -0,0 +1,304 @@
|
||||
import itertools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
class TrillerBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'triller'
|
||||
_AUTH_TOKEN = None
|
||||
_API_BASE_URL = 'https://social.triller.co/v1.5'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._AUTH_TOKEN:
|
||||
return
|
||||
|
||||
user_check = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
|
||||
fatal=False, expected_status=400, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://triller.co',
|
||||
}, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
|
||||
if user_check.get('status'): # endpoint returns "status":false if username exists
|
||||
raise ExtractorError('Unable to login: Invalid username', expected=True)
|
||||
|
||||
credentials = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
login = self._download_json(
|
||||
f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
|
||||
fatal=False, expected_status=400, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://triller.co',
|
||||
}, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
|
||||
if not login.get('auth_token'):
|
||||
if login.get('error') == 1008:
|
||||
raise ExtractorError('Unable to login: Incorrect password', expected=True)
|
||||
raise ExtractorError('Unable to login')
|
||||
|
||||
self._AUTH_TOKEN = login['auth_token']
|
||||
|
||||
def _get_comments(self, video_id, limit=15):
|
||||
comment_info = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/videos/{video_id}/comments_v2',
|
||||
video_id, fatal=False, note='Downloading comments API JSON',
|
||||
headers={'Origin': 'https://triller.co'}, query={'limit': limit}) or {}
|
||||
if not comment_info.get('comments'):
|
||||
return
|
||||
for comment_dict in comment_info['comments']:
|
||||
yield {
|
||||
'author': traverse_obj(comment_dict, ('author', 'username')),
|
||||
'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
|
||||
'id': comment_dict.get('id'),
|
||||
'text': comment_dict.get('body'),
|
||||
'timestamp': unified_timestamp(comment_dict.get('timestamp')),
|
||||
}
|
||||
|
||||
def _check_user_info(self, user_info):
|
||||
if not user_info:
|
||||
self.report_warning('Unable to extract user info')
|
||||
elif user_info.get('private') and not user_info.get('followed_by_me'):
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
|
||||
raise ExtractorError('The author of the video is blocked', expected=True)
|
||||
return user_info
|
||||
|
||||
def _parse_video_info(self, video_info, username, user_info=None):
|
||||
video_uuid = video_info.get('video_uuid')
|
||||
video_id = video_info.get('id')
|
||||
|
||||
formats = []
|
||||
video_url = traverse_obj(video_info, 'video_url', 'stream_url')
|
||||
if video_url:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'vcodec': 'h264',
|
||||
'width': video_info.get('width'),
|
||||
'height': video_info.get('height'),
|
||||
'format_id': url_basename(video_url).split('.')[0],
|
||||
'filesize': video_info.get('filesize'),
|
||||
})
|
||||
video_set = video_info.get('video_set') or []
|
||||
for video in video_set:
|
||||
resolution = video.get('resolution') or ''
|
||||
formats.append({
|
||||
'url': video['url'],
|
||||
'ext': 'mp4',
|
||||
'vcodec': video.get('codec'),
|
||||
'vbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'width': int_or_none(resolution.split('x')[0]),
|
||||
'height': int_or_none(resolution.split('x')[1]),
|
||||
'format_id': url_basename(video['url']).split('.')[0],
|
||||
})
|
||||
audio_url = video_info.get('audio_url')
|
||||
if audio_url:
|
||||
formats.append({
|
||||
'url': audio_url,
|
||||
'ext': 'm4a',
|
||||
'format_id': url_basename(audio_url).split('.')[0],
|
||||
})
|
||||
|
||||
manifest_url = video_info.get('transcoded_url')
|
||||
if manifest_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
comment_count = int_or_none(video_info.get('comment_count'))
|
||||
|
||||
user_info = user_info or traverse_obj(video_info, 'user', default={})
|
||||
|
||||
return {
|
||||
'id': str_or_none(video_id) or video_uuid,
|
||||
'title': video_info.get('description') or f'Video by {username}',
|
||||
'thumbnail': video_info.get('thumbnail_url'),
|
||||
'description': video_info.get('description'),
|
||||
'uploader': str_or_none(username),
|
||||
'uploader_id': str_or_none(user_info.get('user_id')),
|
||||
'creator': str_or_none(user_info.get('name')),
|
||||
'timestamp': unified_timestamp(video_info.get('timestamp')),
|
||||
'upload_date': unified_strdate(video_info.get('timestamp')),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'view_count': int_or_none(video_info.get('play_count')),
|
||||
'like_count': int_or_none(video_info.get('likes_count')),
|
||||
'artist': str_or_none(video_info.get('song_artist')),
|
||||
'track': str_or_none(video_info.get('song_title')),
|
||||
'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
|
||||
'uploader_url': f'https://triller.co/@{username}',
|
||||
'extractor_key': TrillerIE.ie_key(),
|
||||
'extractor': TrillerIE.IE_NAME,
|
||||
'formats': formats,
|
||||
'comment_count': comment_count,
|
||||
'__post_extractor': self.extract_comments(video_id, comment_count),
|
||||
}
|
||||
|
||||
|
||||
class TrillerIE(TrillerBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?triller\.co/
|
||||
@(?P<username>[\w\._]+)/video/
|
||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
||||
'md5': '228662d783923b60d78395fedddc0a20',
|
||||
'info_dict': {
|
||||
'id': '71595734',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9a2bf9435c5c4292678996a464669416',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:9a2bf9435c5c4292678996a464669416',
|
||||
'uploader': 'theestallion',
|
||||
'uploader_id': '18992236',
|
||||
'creator': 'Megan Thee Stallion',
|
||||
'timestamp': 1660598222,
|
||||
'upload_date': '20220815',
|
||||
'duration': 47,
|
||||
'height': 3840,
|
||||
'width': 2160,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Megan Thee Stallion',
|
||||
'track': 'Her',
|
||||
'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
||||
'uploader_url': 'https://triller.co/@theestallion',
|
||||
'comment_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'md5': '874055f462af5b0699b9dbb527a505a0',
|
||||
'info_dict': {
|
||||
'id': '71621339',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||
'uploader': 'charlidamelio',
|
||||
'uploader_id': '1875551',
|
||||
'creator': 'charli damelio',
|
||||
'timestamp': 1660773354,
|
||||
'upload_date': '20220817',
|
||||
'duration': 16,
|
||||
'height': 1920,
|
||||
'width': 1080,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Dixie',
|
||||
'track': 'Someone to Blame',
|
||||
'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'uploader_url': 'https://triller.co/@charlidamelio',
|
||||
'comment_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, video_uuid = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
video_info = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/videos/{video_uuid}',
|
||||
video_uuid, note='Downloading video info API JSON',
|
||||
errnote='Unable to download video info API JSON',
|
||||
headers={
|
||||
'Origin': 'https://triller.co',
|
||||
}), ('videos', 0))
|
||||
if not video_info:
|
||||
raise ExtractorError('No video info found in API response')
|
||||
|
||||
user_info = self._check_user_info(video_info.get('user') or {})
|
||||
return self._parse_video_info(video_info, username, user_info)
|
||||
|
||||
|
||||
class TrillerUserIE(TrillerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
# first videos request only returns 2 videos
|
||||
'url': 'https://triller.co/@theestallion',
|
||||
'playlist_mincount': 9,
|
||||
'info_dict': {
|
||||
'id': '18992236',
|
||||
'title': 'theestallion',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://triller.co/@charlidamelio',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': '1875551',
|
||||
'title': 'charlidamelio',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._AUTH_TOKEN:
|
||||
guest = self._download_json(
|
||||
f'{self._API_BASE_URL}/user/create_guest',
|
||||
None, note='Creating guest session', data=b'', headers={
|
||||
'Origin': 'https://triller.co',
|
||||
}, query={
|
||||
'platform': 'Web',
|
||||
'app_version': '',
|
||||
})
|
||||
if not guest.get('auth_token'):
|
||||
raise ExtractorError('Unable to fetch required auth token for user extraction')
|
||||
|
||||
self._AUTH_TOKEN = guest['auth_token']
|
||||
|
||||
def _extract_video_list(self, username, user_id, limit=6):
|
||||
query = {
|
||||
'limit': limit,
|
||||
}
|
||||
for page in itertools.count(1):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
video_list = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
|
||||
username, note=f'Downloading user video list page {page}',
|
||||
errnote='Unable to download user video list', headers={
|
||||
'Authorization': f'Bearer {self._AUTH_TOKEN}',
|
||||
'Origin': 'https://triller.co',
|
||||
}, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
if not video_list.get('videos'):
|
||||
break
|
||||
yield from video_list['videos']
|
||||
query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
|
||||
if not query['before_time']:
|
||||
break
|
||||
|
||||
def _entries(self, videos, username, user_info):
|
||||
for video in videos:
|
||||
yield self._parse_video_info(video, username, user_info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
user_info = self._check_user_info(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/by_username/{username}',
|
||||
username, note='Downloading user info',
|
||||
errnote='Failed to download user info', headers={
|
||||
'Authorization': f'Bearer {self._AUTH_TOKEN}',
|
||||
'Origin': 'https://triller.co',
|
||||
}).get('user', {}))
|
||||
|
||||
user_id = str_or_none(user_info.get('user_id'))
|
||||
videos = self._extract_video_list(username, user_id)
|
||||
thumbnail = user_info.get('avatar_url')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)
|
||||
69
yt_dlp/extractor/truth.py
Normal file
69
yt_dlp/extractor/truth.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class TruthIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://truthsocial\.com/@[^/]+/posts/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://truthsocial.com/@realDonaldTrump/posts/108779000807761862',
|
||||
'md5': '4a5fb1470c192e493d9efd6f19e514d3',
|
||||
'info_dict': {
|
||||
'id': '108779000807761862',
|
||||
'ext': 'qt',
|
||||
'title': 'Truth video #108779000807761862',
|
||||
'description': None,
|
||||
'timestamp': 1659835827,
|
||||
'upload_date': '20220807',
|
||||
'uploader': 'Donald J. Trump',
|
||||
'uploader_id': 'realDonaldTrump',
|
||||
'uploader_url': 'https://truthsocial.com/@realDonaldTrump',
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://truthsocial.com/@ProjectVeritasAction/posts/108618228543962049',
|
||||
'md5': 'fd47ba68933f9dce27accc52275be9c3',
|
||||
'info_dict': {
|
||||
'id': '108618228543962049',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:debde7186cf83f60ff7b44dbb9444e35',
|
||||
'description': 'md5:de2fc49045bf92bb8dc97e56503b150f',
|
||||
'timestamp': 1657382637,
|
||||
'upload_date': '20220709',
|
||||
'uploader': 'Project Veritas Action',
|
||||
'uploader_id': 'ProjectVeritasAction',
|
||||
'uploader_url': 'https://truthsocial.com/@ProjectVeritasAction',
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
status = self._download_json(f'https://truthsocial.com/api/v1/statuses/{video_id}', video_id)
|
||||
uploader_id = strip_or_none(traverse_obj(status, ('account', 'username')))
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': status['media_attachments'][0]['url'],
|
||||
'title': '',
|
||||
'description': strip_or_none(clean_html(status.get('content'))) or None,
|
||||
'timestamp': unified_timestamp(status.get('created_at')),
|
||||
'uploader': strip_or_none(traverse_obj(status, ('account', 'display_name'))),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': format_field(uploader_id, None, 'https://truthsocial.com/@%s'),
|
||||
'repost_count': int_or_none(status.get('reblogs_count')),
|
||||
'like_count': int_or_none(status.get('favourites_count')),
|
||||
'comment_count': int_or_none(status.get('replies_count')),
|
||||
}
|
||||
@@ -70,16 +70,17 @@ class TubiTvIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
|
||||
'https://tubitv.com/oz/videos/%s/content?video_resources=dash&video_resources=hlsv3&video_resources=hlsv6' % video_id, video_id)
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
url = video_data['url']
|
||||
# URL can be sometimes empty. Does this only happen when there is DRM?
|
||||
if url:
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._proto_relative_url(url),
|
||||
video_id, 'mp4', 'm3u8_native')
|
||||
|
||||
for resource in video_data['video_resources']:
|
||||
if resource['type'] in ('dash', ):
|
||||
formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
|
||||
elif resource['type'] in ('hlsv3', 'hlsv6'):
|
||||
formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
|
||||
@@ -1169,7 +1169,7 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
'id': clip.get('id') or video_id,
|
||||
'_old_archive_ids': [make_archive_id(self, old_id)] if old_id else None,
|
||||
'display_id': video_id,
|
||||
'title': clip.get('title') or video_id,
|
||||
'title': clip.get('title'),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(clip.get('durationSeconds')),
|
||||
'view_count': int_or_none(clip.get('viewCount')),
|
||||
|
||||
@@ -2,7 +2,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'info_dict': {
|
||||
|
||||
@@ -1131,7 +1131,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
||||
|
||||
class VimeoUserIE(VimeoChannelIE):
|
||||
IE_NAME = 'vimeo:user'
|
||||
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
|
||||
_VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos)?/?(?:$|[?#])'
|
||||
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
@@ -1140,6 +1140,9 @@ class VimeoUserIE(VimeoChannelIE):
|
||||
'id': 'nkistudio',
|
||||
},
|
||||
'playlist_mincount': 66,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/nkistudio/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_BASE_URL_TEMPL = 'https://vimeo.com/%s'
|
||||
|
||||
|
||||
@@ -1,208 +0,0 @@
|
||||
import functools
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_encrypt_bytes
|
||||
from ..utils import determine_ext, int_or_none, traverse_obj, urljoin
|
||||
|
||||
|
||||
class WeTvBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?wetv\.vip/(?:[^?#]+/)?play'
|
||||
|
||||
def _get_ckey(self, video_id, url, app_version, platform):
|
||||
ua = self.get_param('http_headers')['User-Agent']
|
||||
|
||||
payload = (f'{video_id}|{int(time.time())}|mg3c3b04ba|{app_version}|0000000000000000|'
|
||||
f'{platform}|{url[:48]}|{ua.lower()[:48]}||Mozilla|Netscape|Win32|00|')
|
||||
|
||||
return aes_cbc_encrypt_bytes(
|
||||
bytes(f'|{sum(map(ord, payload))}|{payload}', 'utf-8'),
|
||||
b'Ok\xda\xa3\x9e/\x8c\xb0\x7f^r-\x9e\xde\xf3\x14',
|
||||
b'\x01PJ\xf3V\xe6\x19\xcf.B\xbb\xa6\x8c?p\xf9',
|
||||
padding_mode='whitespace').hex()
|
||||
|
||||
def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality):
|
||||
app_version = '3.5.57'
|
||||
platform = '4830201'
|
||||
|
||||
ckey = self._get_ckey(video_id, video_url, app_version, platform)
|
||||
query = {
|
||||
'vid': video_id,
|
||||
'cid': series_id,
|
||||
'cKey': ckey,
|
||||
'encryptVer': '8.1',
|
||||
'spcaptiontype': '1' if subtitle_format == 'vtt' else '0', # 0 - SRT, 1 - VTT
|
||||
'sphls': '1' if video_format == 'hls' else '0', # 0 - MP4, 1 - HLS
|
||||
'defn': video_quality, # '': 480p, 'shd': 720p, 'fhd': 1080p
|
||||
'spsrt': '1', # Enable subtitles
|
||||
'sphttps': '1', # Enable HTTPS
|
||||
'otype': 'json', # Response format: xml, json,
|
||||
'dtype': '1',
|
||||
'spwm': '1',
|
||||
'host': 'wetv.vip', # These three values are needed for SHD
|
||||
'referer': 'wetv.vip',
|
||||
'ehost': video_url,
|
||||
'appVer': app_version,
|
||||
'platform': platform,
|
||||
}
|
||||
|
||||
return self._search_json(r'QZOutputJson=', self._download_webpage(
|
||||
'https://play.wetv.vip/getvinfo', video_id, query=query), 'api_response', video_id)
|
||||
|
||||
def _get_webpage_metadata(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
traverse_obj(self._search_nextjs_data(webpage, video_id), ('props', 'pageProps', 'data')),
|
||||
video_id, fatal=False)
|
||||
|
||||
|
||||
class WeTvEpisodeIE(WeTvBaseIE):
|
||||
IE_NAME = 'wetv:episode'
|
||||
_VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<series_id>\w+)(?:-[^?#]+)?/(?P<id>\w+)(?:-[^?#]+)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://wetv.vip/en/play/air11ooo2rdsdi3-Cute-Programmer/v0040pr89t9-EP1-Cute-Programmer',
|
||||
'md5': 'a046f565c9dce9b263a0465a422cd7bf',
|
||||
'info_dict': {
|
||||
'id': 'v0040pr89t9',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP1: Cute Programmer',
|
||||
'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
|
||||
'thumbnail': r're:^https?://[^?#]+air11ooo2rdsdi3',
|
||||
'series': 'Cute Programmer',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'duration': 2835,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu/p0039b9nvik',
|
||||
'md5': '4d9d69bcfd11da61f4aae64fc6b316b3',
|
||||
'info_dict': {
|
||||
'id': 'p0039b9nvik',
|
||||
'ext': 'mp4',
|
||||
'title': 'EP1: You Are My Glory',
|
||||
'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
|
||||
'thumbnail': r're:^https?://[^?#]+u37kgfnfzs73kiu',
|
||||
'series': 'You Are My Glory',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'duration': 2454,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/lcxgwod5hapghvw-WeTV-PICK-A-BOO/i0042y00lxp-Zhao-Lusi-Describes-The-First-Experiences-She-Had-In-Who-Rules-The-World-%7C-WeTV-PICK-A-BOO',
|
||||
'md5': '71133f5c2d5d6cad3427e1b010488280',
|
||||
'info_dict': {
|
||||
'id': 'i0042y00lxp',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:f7a0857dbe5fbbe2e7ad630b92b54e6a',
|
||||
'description': 'md5:76260cb9cdc0ef76826d7ca9d92fadfa',
|
||||
'thumbnail': r're:^https?://[^?#]+lcxgwod5hapghvw',
|
||||
'series': 'WeTV PICK-A-BOO',
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'duration': 442,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_video_formats_and_subtitles(self, api_response, video_id, video_quality):
|
||||
video_response = api_response['vl']['vi'][0]
|
||||
video_width = video_response.get('vw')
|
||||
video_height = video_response.get('vh')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for video_format in video_response['ul']['ui']:
|
||||
if video_format.get('hls'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_format['url'] + video_format['hls']['pname'], video_id, 'mp4', fatal=False)
|
||||
for f in fmts:
|
||||
f['width'] = video_width
|
||||
f['height'] = video_height
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}',
|
||||
'width': video_width,
|
||||
'height': video_height,
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _extract_video_subtitles(self, api_response, subtitles_format):
|
||||
subtitles = {}
|
||||
for subtitle in traverse_obj(api_response, ('sfl', 'fi')):
|
||||
subtitles.setdefault(subtitle['lang'].lower(), []).append({
|
||||
'url': subtitle['url'],
|
||||
'ext': subtitles_format,
|
||||
'protocol': 'm3u8_native' if determine_ext(subtitle['url']) == 'm3u8' else 'http',
|
||||
})
|
||||
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, series_id = self._match_valid_url(url).group('id', 'series_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for video_format, subtitle_format, video_quality in (('mp4', 'srt', ''), ('hls', 'vtt', 'shd'), ('hls', 'vtt', 'fhd')):
|
||||
api_response = self._get_video_api_response(url, video_id, series_id, subtitle_format, video_format, video_quality)
|
||||
|
||||
fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id, video_quality)
|
||||
native_subtitles = self._extract_video_subtitles(api_response, subtitle_format)
|
||||
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, native_subtitles, target=subtitles)
|
||||
|
||||
self._sort_formats(formats)
|
||||
webpage_metadata = self._get_webpage_metadata(webpage, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': (self._og_search_title(webpage)
|
||||
or traverse_obj(webpage_metadata, ('coverInfo', 'description'))),
|
||||
'description': (self._og_search_description(webpage)
|
||||
or traverse_obj(webpage_metadata, ('coverInfo', 'description'))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'duration'))),
|
||||
'series': traverse_obj(webpage_metadata, ('coverInfo', 'title')),
|
||||
'episode_number': int_or_none(traverse_obj(webpage_metadata, ('videoInfo', 'episode'))),
|
||||
}
|
||||
|
||||
|
||||
class WeTvSeriesIE(WeTvBaseIE):
|
||||
_VALID_URL = WeTvBaseIE._VALID_URL_BASE + r'/(?P<id>\w+)(?:-[^/?#]+)?/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://wetv.vip/play/air11ooo2rdsdi3-Cute-Programmer',
|
||||
'info_dict': {
|
||||
'id': 'air11ooo2rdsdi3',
|
||||
'title': 'Cute Programmer',
|
||||
'description': 'md5:e87beab3bf9f392d6b9e541a63286343',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}, {
|
||||
'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu-You-Are-My-Glory',
|
||||
'info_dict': {
|
||||
'id': 'u37kgfnfzs73kiu',
|
||||
'title': 'You Are My Glory',
|
||||
'description': 'md5:831363a4c3b4d7615e1f3854be3a123b',
|
||||
},
|
||||
'playlist_count': 32,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
webpage_metadata = self._get_webpage_metadata(webpage, series_id)
|
||||
|
||||
episode_paths = (re.findall(r'<a[^>]+class="play-video__link"[^>]+href="(?P<path>[^"]+)', webpage)
|
||||
or [f'/{series_id}/{episode["vid"]}' for episode in webpage_metadata.get('videoList')])
|
||||
|
||||
return self.playlist_from_matches(
|
||||
episode_paths, series_id, ie=WeTvEpisodeIE, getter=functools.partial(urljoin, url),
|
||||
title=traverse_obj(webpage_metadata, ('coverInfo', 'title')) or self._og_search_title(webpage),
|
||||
description=traverse_obj(webpage_metadata, ('coverInfo', 'description')) or self._og_search_description(webpage))
|
||||
@@ -17,6 +17,7 @@ import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..compat import functools
|
||||
from ..jsinterp import JSInterpreter
|
||||
from ..utils import (
|
||||
@@ -109,8 +110,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID',
|
||||
'clientVersion': '17.29.34',
|
||||
'androidSdkVersion': 30
|
||||
'clientVersion': '17.31.35',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
|
||||
}
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
||||
@@ -121,8 +123,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_EMBEDDED_PLAYER',
|
||||
'clientVersion': '17.29.34',
|
||||
'androidSdkVersion': 30
|
||||
'clientVersion': '17.31.35',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
|
||||
@@ -134,7 +137,8 @@ INNERTUBE_CLIENTS = {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_MUSIC',
|
||||
'clientVersion': '5.16.51',
|
||||
'androidSdkVersion': 30
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
|
||||
}
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
|
||||
@@ -145,8 +149,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'ANDROID_CREATOR',
|
||||
'clientVersion': '22.28.100',
|
||||
'androidSdkVersion': 30
|
||||
'clientVersion': '22.30.100',
|
||||
'androidSdkVersion': 30,
|
||||
'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
|
||||
@@ -159,8 +164,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS',
|
||||
'clientVersion': '17.30.1',
|
||||
'clientVersion': '17.33.2',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
}
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
||||
@@ -170,8 +176,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_MESSAGES_EXTENSION',
|
||||
'clientVersion': '17.30.1',
|
||||
'clientVersion': '17.33.2',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
|
||||
@@ -182,7 +189,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_MUSIC',
|
||||
'clientVersion': '5.18',
|
||||
'clientVersion': '5.21',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
|
||||
@@ -192,7 +201,9 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'IOS_CREATOR',
|
||||
'clientVersion': '22.29.101',
|
||||
'clientVersion': '22.33.101',
|
||||
'deviceModel': 'iPhone14,3',
|
||||
'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
|
||||
@@ -554,7 +565,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
'Origin': origin,
|
||||
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
|
||||
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
|
||||
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
|
||||
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
|
||||
'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
|
||||
}
|
||||
if session_index is None:
|
||||
session_index = self._extract_session_index(ytcfg)
|
||||
@@ -809,7 +821,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
if not traverse_obj(response, *variadic(check_get_keys)):
|
||||
retry.error = ExtractorError('Incomplete data received')
|
||||
retry.error = ExtractorError('Incomplete data received', expected=True)
|
||||
continue
|
||||
|
||||
return response
|
||||
@@ -867,7 +879,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
else None),
|
||||
'live_status': ('is_upcoming' if scheduled_timestamp is not None
|
||||
else 'was_live' if 'streamed' in time_text.lower()
|
||||
else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
|
||||
else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
|
||||
else None),
|
||||
'release_timestamp': scheduled_timestamp,
|
||||
'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
|
||||
@@ -2147,6 +2159,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int
|
||||
}
|
||||
}, {
|
||||
# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
|
||||
'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
|
||||
'info_dict': {
|
||||
'id': '2NUZ8W2llS4',
|
||||
'ext': 'mp4',
|
||||
'title': 'The NP that test your phone performance 🙂',
|
||||
'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
|
||||
'uploader': 'Leon Nguyen',
|
||||
'uploader_id': 'VNSXIII',
|
||||
'uploader_url': 'http://www.youtube.com/user/VNSXIII',
|
||||
'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
|
||||
'duration': 21,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
'categories': ['Gaming'],
|
||||
'tags': 'count:23',
|
||||
'playable_in_embed': True,
|
||||
'live_status': 'not_live',
|
||||
'upload_date': '20220102',
|
||||
'like_count': int,
|
||||
'availability': 'public',
|
||||
'channel': 'Leon Nguyen',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int
|
||||
},
|
||||
'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
|
||||
}, {
|
||||
# date text is premiered video, ensure upload date in UTC (published 1641172509)
|
||||
'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
|
||||
@@ -2512,20 +2553,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
assert os.path.basename(func_id) == func_id
|
||||
|
||||
self.write_debug(f'Extracting signature function {func_id}')
|
||||
cache_spec = self.cache.load('youtube-sigfuncs', func_id)
|
||||
if cache_spec is not None:
|
||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
|
||||
|
||||
code = self._load_player(video_id, player_url)
|
||||
if not cache_spec:
|
||||
code = self._load_player(video_id, player_url)
|
||||
if code:
|
||||
res = self._parse_sig_js(code)
|
||||
|
||||
test_string = ''.join(map(chr, range(len(example_sig))))
|
||||
cache_res = res(test_string)
|
||||
cache_spec = [ord(c) for c in cache_res]
|
||||
|
||||
cache_spec = [ord(c) for c in res(test_string)]
|
||||
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||
return res
|
||||
|
||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||
|
||||
def _print_sig_code(self, func, example_sig):
|
||||
if not self.get_param('youtube_print_sig_code'):
|
||||
@@ -2593,18 +2631,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
initial_function = jsi.extract_function(funcname)
|
||||
return lambda s: initial_function([s])
|
||||
|
||||
def _cached(self, func, *cache_id):
|
||||
def inner(*args, **kwargs):
|
||||
if cache_id not in self._player_cache:
|
||||
try:
|
||||
self._player_cache[cache_id] = func(*args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
self._player_cache[cache_id] = e
|
||||
except Exception as e:
|
||||
self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
|
||||
|
||||
ret = self._player_cache[cache_id]
|
||||
if isinstance(ret, Exception):
|
||||
raise ret
|
||||
return ret
|
||||
return inner
|
||||
|
||||
def _decrypt_signature(self, s, video_id, player_url):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
try:
|
||||
player_id = (player_url, self._signature_cache_id(s))
|
||||
if player_id not in self._player_cache:
|
||||
func = self._extract_signature_function(video_id, player_url, s)
|
||||
self._player_cache[player_id] = func
|
||||
func = self._player_cache[player_id]
|
||||
self._print_sig_code(func, s)
|
||||
return func(s)
|
||||
except Exception as e:
|
||||
raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
|
||||
extract_sig = self._cached(
|
||||
self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
|
||||
func = extract_sig(video_id, player_url, s)
|
||||
self._print_sig_code(func, s)
|
||||
return func(s)
|
||||
|
||||
def _decrypt_nsig(self, s, video_id, player_url):
|
||||
"""Turn the encrypted n field into a working signature"""
|
||||
@@ -2612,48 +2661,87 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError('Cannot decrypt nsig without player_url')
|
||||
player_url = urljoin('https://www.youtube.com', player_url)
|
||||
|
||||
sig_id = ('nsig_value', s)
|
||||
if sig_id in self._player_cache:
|
||||
return self._player_cache[sig_id]
|
||||
|
||||
try:
|
||||
player_id = ('nsig', player_url)
|
||||
if player_id not in self._player_cache:
|
||||
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
|
||||
func = self._player_cache[player_id]
|
||||
self._player_cache[sig_id] = func(s)
|
||||
self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
|
||||
return self._player_cache[sig_id]
|
||||
except Exception as e:
|
||||
raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
nfunc, idx = self._search_regex(
|
||||
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
|
||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
if not idx:
|
||||
return nfunc
|
||||
return json.loads(js_to_json(self._search_regex(
|
||||
rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
|
||||
f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
|
||||
|
||||
def _extract_n_function(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self.cache.load('youtube-nsig', player_id)
|
||||
|
||||
if func_code:
|
||||
jsi = JSInterpreter(func_code)
|
||||
else:
|
||||
jscode = self._load_player(video_id, player_url)
|
||||
funcname = self._extract_n_function_name(jscode)
|
||||
jsi = JSInterpreter(jscode)
|
||||
func_code = jsi.extract_function_code(funcname)
|
||||
self.cache.store('youtube-nsig', player_id, func_code)
|
||||
|
||||
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
||||
except ExtractorError as e:
|
||||
raise ExtractorError('Unable to extract nsig function code', cause=e)
|
||||
if self.get_param('youtube_print_sig_code'):
|
||||
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
|
||||
|
||||
return lambda s: jsi.extract_function_from_code(*func_code)([s])
|
||||
try:
|
||||
extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
|
||||
ret = extract_nsig(jsi, func_code)(s)
|
||||
except JSInterpreter.Exception as e:
|
||||
try:
|
||||
jsi = PhantomJSwrapper(self, timeout=5000)
|
||||
except ExtractorError:
|
||||
raise e
|
||||
self.report_warning(
|
||||
f'Native nsig extraction failed: Trying with PhantomJS\n'
|
||||
f' n = {s} ; player = {player_url}', video_id)
|
||||
self.write_debug(e)
|
||||
|
||||
args, func_body = func_code
|
||||
ret = jsi.execute(
|
||||
f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
|
||||
video_id=video_id, note='Executing signature code').strip()
|
||||
|
||||
self.write_debug(f'Decrypted nsig {s} => {ret}')
|
||||
return ret
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
funcname, idx = self._search_regex(
|
||||
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
|
||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
if not idx:
|
||||
return funcname
|
||||
|
||||
return json.loads(js_to_json(self._search_regex(
|
||||
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
|
||||
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
|
||||
jscode = func_code or self._load_player(video_id, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
|
||||
if func_code:
|
||||
return jsi, player_id, func_code
|
||||
|
||||
func_name = self._extract_n_function_name(jscode)
|
||||
|
||||
# For redundancy
|
||||
func_code = self._search_regex(
|
||||
r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
|
||||
# NB: The end of the regex is intentionally kept strict
|
||||
{(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
|
||||
jscode, 'nsig function', group=('var', 'code'), default=None)
|
||||
if func_code:
|
||||
func_code = ([func_code[0]], func_code[1])
|
||||
else:
|
||||
self.write_debug('Extracting nsig function with jsinterp')
|
||||
func_code = jsi.extract_function_code(func_name)
|
||||
|
||||
self.cache.store('youtube-nsig', player_id, func_code)
|
||||
return jsi, player_id, func_code
|
||||
|
||||
def _extract_n_function_from_code(self, jsi, func_code):
|
||||
func = jsi.extract_function_from_code(*func_code)
|
||||
|
||||
def extract_nsig(s):
|
||||
try:
|
||||
ret = func([s])
|
||||
except JSInterpreter.Exception:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
||||
|
||||
if ret.startswith('enhanced_except_'):
|
||||
raise JSInterpreter.Exception('Signature function returned an exception')
|
||||
return ret
|
||||
|
||||
return extract_nsig
|
||||
|
||||
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||||
"""
|
||||
@@ -2916,8 +3004,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# YouTube comments have a max depth of 2
|
||||
max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
|
||||
if max_depth:
|
||||
self._downloader.deprecation_warning(
|
||||
'[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
|
||||
self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
|
||||
'Set max replies in the max-comments extractor argument instead')
|
||||
if max_depth == 1 and parent:
|
||||
return
|
||||
|
||||
@@ -3039,7 +3127,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _is_unplayable(player_response):
|
||||
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
|
||||
|
||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
|
||||
_STORY_PLAYER_PARAMS = '8AEB'
|
||||
|
||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
|
||||
|
||||
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
|
||||
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
|
||||
@@ -3049,8 +3139,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
yt_query = {
|
||||
'videoId': video_id,
|
||||
'params': '8AEB' # enable stories
|
||||
}
|
||||
if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
|
||||
yt_query['params'] = self._STORY_PLAYER_PARAMS
|
||||
|
||||
yt_query.update(self._generate_player_context(sts))
|
||||
return self._extract_response(
|
||||
item_id=video_id, ep='player', query=yt_query,
|
||||
@@ -3083,7 +3175,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
return orderedSet(requested_clients)
|
||||
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||
initial_pr = None
|
||||
if webpage:
|
||||
initial_pr = self._search_json(
|
||||
@@ -3133,7 +3225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
try:
|
||||
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
|
||||
except ExtractorError as e:
|
||||
if last_error:
|
||||
self.report_warning(last_error)
|
||||
@@ -3167,7 +3259,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
|
||||
itags, stream_ids = {}, []
|
||||
itag_qualities, res_qualities = {}, {}
|
||||
itag_qualities, res_qualities = {}, {0: None}
|
||||
q = qualities([
|
||||
# Normally tiny is the smallest video-only formats. But
|
||||
# audio-only formats with unknown quality may get tagged as tiny
|
||||
@@ -3219,7 +3311,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self._decrypt_signature(encrypted_sig, video_id, player_url)
|
||||
)
|
||||
except ExtractorError as e:
|
||||
self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
|
||||
self.report_warning('Signature extraction failed: Some formats may be missing',
|
||||
video_id=video_id, only_once=True)
|
||||
self.write_debug(e, only_once=True)
|
||||
continue
|
||||
|
||||
@@ -3227,12 +3320,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
throttled = False
|
||||
if query.get('n'):
|
||||
try:
|
||||
decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
|
||||
fmt_url = update_url_query(fmt_url, {
|
||||
'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
|
||||
'n': decrypt_nsig(query['n'][0], video_id, player_url)
|
||||
})
|
||||
except ExtractorError as e:
|
||||
phantomjs_hint = ''
|
||||
if isinstance(e, JSInterpreter.Exception):
|
||||
phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
|
||||
f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
|
||||
self.report_warning(
|
||||
'nsig extraction failed: You may experience throttling for some formats\n'
|
||||
f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
|
||||
f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
|
||||
f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
|
||||
self.write_debug(e, only_once=True)
|
||||
throttled = True
|
||||
|
||||
@@ -3246,9 +3345,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
|
||||
else -1)
|
||||
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||||
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||
# Make sure to avoid false positives with small duration differences.
|
||||
# Eg: __2ABJjxzNo, ySuUZEjARPY
|
||||
# E.g. __2ABJjxzNo, ySuUZEjARPY
|
||||
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
|
||||
if is_damaged:
|
||||
self.report_warning(
|
||||
@@ -3319,10 +3418,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
f['format_id'] = itag
|
||||
itags[itag] = proto
|
||||
|
||||
f['quality'] = next((
|
||||
q(qdict[val])
|
||||
for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
|
||||
if val in qdict), -1)
|
||||
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
|
||||
if f['quality'] == -1 and f.get('height'):
|
||||
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
|
||||
return True
|
||||
|
||||
subtitles = {}
|
||||
@@ -3391,14 +3489,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
|
||||
webpage = None
|
||||
if 'webpage' not in self._configuration_arg('player_skip'):
|
||||
query = {'bpctr': '9999999999', 'has_verified': '1'}
|
||||
if smuggled_data.get('is_story'):
|
||||
query['pp'] = self._STORY_PLAYER_PARAMS
|
||||
webpage = self._download_webpage(
|
||||
webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
|
||||
webpage_url, video_id, fatal=False, query=query)
|
||||
|
||||
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
|
||||
|
||||
player_responses, player_url = self._extract_player_responses(
|
||||
self._get_requested_clients(url, smuggled_data),
|
||||
video_id, webpage, master_ytcfg)
|
||||
video_id, webpage, master_ytcfg, smuggled_data)
|
||||
|
||||
return webpage, master_ytcfg, player_responses, player_url
|
||||
|
||||
@@ -3588,7 +3689,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
formats.extend(self._extract_storyboard(player_responses, duration))
|
||||
|
||||
# source_preference is lower for throttled/potentially damaged formats
|
||||
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'channels', 'source', 'codec:vp9.2', 'lang', 'proto'))
|
||||
self._sort_formats(formats, (
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
@@ -3863,7 +3965,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
upload_date = (
|
||||
unified_strdate(get_first(microformats, 'uploadDate'))
|
||||
or unified_strdate(search_meta('uploadDate')))
|
||||
if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
|
||||
if not upload_date or (
|
||||
not info.get('is_live')
|
||||
and not info.get('was_live')
|
||||
and info.get('live_status') != 'is_upcoming'
|
||||
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
|
||||
):
|
||||
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
|
||||
info['upload_date'] = upload_date
|
||||
|
||||
@@ -5832,7 +5939,7 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
|
||||
|
||||
class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
|
||||
IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
|
||||
IE_NAME = 'youtube:music:search_url'
|
||||
_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
|
||||
_TESTS = [{
|
||||
@@ -5970,7 +6077,7 @@ class YoutubeStoriesIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
playlist_id = f'RLTD{self._match_id(url)}'
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
|
||||
smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
|
||||
ie=YoutubeTabIE, video_id=playlist_id)
|
||||
|
||||
|
||||
|
||||
@@ -2,10 +2,7 @@ import re
|
||||
from uuid import uuid4
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError, compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -237,30 +234,26 @@ class ZattooPlatformBaseIE(InfoExtractor):
|
||||
ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
|
||||
return info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, record_id = self._match_valid_url(url).groups()
|
||||
return getattr(self, f'_extract_{self._TYPE}')(video_id or record_id)
|
||||
|
||||
def _make_valid_url(host):
|
||||
return rf'https?://(?:www\.)?{re.escape(host)}/watch/[^/]+?/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
|
||||
|
||||
def _create_valid_url(host, match, qs, base_re=None):
|
||||
match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
|
||||
return rf'''(?x)https?://(?:www\.)?{re.escape(host)}/(?:
|
||||
[^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
|
||||
{match_base}
|
||||
)'''
|
||||
|
||||
|
||||
class ZattooBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'zattoo'
|
||||
_HOST = 'zattoo.com'
|
||||
|
||||
@staticmethod
|
||||
def _create_valid_url(match, qs, base_re=None):
|
||||
match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
|
||||
return rf'''(?x)https?://(?:www\.)?zattoo\.com/(?:
|
||||
[^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
|
||||
{match_base}
|
||||
)'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
vid1, vid2 = self._match_valid_url(url).group('vid1', 'vid2')
|
||||
return getattr(self, f'_extract_{self._TYPE}')(vid1 or vid2)
|
||||
|
||||
|
||||
class ZattooIE(ZattooBaseIE):
|
||||
_VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://zattoo.com/program/zdf/250170418',
|
||||
@@ -287,7 +280,7 @@ class ZattooIE(ZattooBaseIE):
|
||||
|
||||
|
||||
class ZattooLiveIE(ZattooBaseIE):
|
||||
_VALID_URL = ZattooBaseIE._create_valid_url(r'[^/?&#]+', 'channel', 'live')
|
||||
_VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
|
||||
@@ -303,7 +296,7 @@ class ZattooLiveIE(ZattooBaseIE):
|
||||
|
||||
|
||||
class ZattooMoviesIE(ZattooBaseIE):
|
||||
_VALID_URL = ZattooBaseIE._create_valid_url(r'\w+', 'movie_id', 'vod/movies')
|
||||
_VALID_URL = _create_valid_url(ZattooBaseIE._HOST, r'\w+', 'movie_id', 'vod/movies')
|
||||
_TYPE = 'ondemand'
|
||||
_TESTS = [{
|
||||
'url': 'https://zattoo.com/vod/movies/7521',
|
||||
@@ -315,7 +308,7 @@ class ZattooMoviesIE(ZattooBaseIE):
|
||||
|
||||
|
||||
class ZattooRecordingsIE(ZattooBaseIE):
|
||||
_VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'recording')
|
||||
_VALID_URL = _create_valid_url('zattoo.com', r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://zattoo.com/recordings?recording=193615508',
|
||||
@@ -326,139 +319,547 @@ class ZattooRecordingsIE(ZattooBaseIE):
|
||||
}]
|
||||
|
||||
|
||||
class NetPlusIE(ZattooPlatformBaseIE):
|
||||
class NetPlusTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'netplus'
|
||||
_HOST = 'netplus.tv'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class NetPlusTVIE(NetPlusTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.netplus.tv/watch/abc/123-abc',
|
||||
'url': 'https://netplus.tv/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://netplus.tv/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MNetTVIE(ZattooPlatformBaseIE):
|
||||
class NetPlusTVLiveIE(NetPlusTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://netplus.tv/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://netplus.tv/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NetPlusTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class NetPlusTVRecordingsIE(NetPlusTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(NetPlusTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://netplus.tv/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://netplus.tv/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MNetTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'mnettv'
|
||||
_HOST = 'tvplus.m-net.de'
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class MNetTVIE(MNetTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
|
||||
'url': 'https://tvplus.m-net.de/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplus.m-net.de/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class WalyTVIE(ZattooPlatformBaseIE):
|
||||
class MNetTVLiveIE(MNetTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplus.m-net.de/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplus.m-net.de/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MNetTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class MNetTVRecordingsIE(MNetTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(MNetTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplus.m-net.de/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplus.m-net.de/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class WalyTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'walytv'
|
||||
_HOST = 'player.waly.tv'
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class WalyTVIE(WalyTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.waly.tv/watch/abc/123-abc',
|
||||
'url': 'https://player.waly.tv/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.waly.tv/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class BBVTVIE(ZattooPlatformBaseIE):
|
||||
class WalyTVLiveIE(WalyTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.waly.tv/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.waly.tv/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if WalyTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class WalyTVRecordingsIE(WalyTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(WalyTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.waly.tv/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://player.waly.tv/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class BBVTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'bbvtv'
|
||||
_HOST = 'bbv-tv.net'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class BBVTVIE(BBVTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
|
||||
'url': 'https://bbv-tv.net/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bbv-tv.net/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class VTXTVIE(ZattooPlatformBaseIE):
|
||||
class BBVTVLiveIE(BBVTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://bbv-tv.net/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bbv-tv.net/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BBVTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class BBVTVRecordingsIE(BBVTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(BBVTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://bbv-tv.net/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bbv-tv.net/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class VTXTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'vtxtv'
|
||||
_HOST = 'vtxtv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class VTXTVIE(VTXTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
|
||||
'url': 'https://vtxtv.ch/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vtxtv.ch/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class GlattvisionTVIE(ZattooPlatformBaseIE):
|
||||
class VTXTVLiveIE(VTXTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://vtxtv.ch/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vtxtv.ch/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if VTXTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class VTXTVRecordingsIE(VTXTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(VTXTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://vtxtv.ch/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vtxtv.ch/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class GlattvisionTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'glattvisiontv'
|
||||
_HOST = 'iptv.glattvision.ch'
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class GlattvisionTVIE(GlattvisionTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
|
||||
'url': 'https://iptv.glattvision.ch/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://iptv.glattvision.ch/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SAKTVIE(ZattooPlatformBaseIE):
|
||||
class GlattvisionTVLiveIE(GlattvisionTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://iptv.glattvision.ch/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://iptv.glattvision.ch/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if GlattvisionTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class GlattvisionTVRecordingsIE(GlattvisionTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(GlattvisionTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://iptv.glattvision.ch/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://iptv.glattvision.ch/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SAKTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'saktv'
|
||||
_HOST = 'saktv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class SAKTVIE(SAKTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.saktv.ch/watch/abc/123-abc',
|
||||
'url': 'https://saktv.ch/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://saktv.ch/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EWETVIE(ZattooPlatformBaseIE):
|
||||
class SAKTVLiveIE(SAKTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://saktv.ch/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://saktv.ch/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SAKTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class SAKTVRecordingsIE(SAKTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(SAKTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://saktv.ch/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://saktv.ch/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EWETVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'ewetv'
|
||||
_HOST = 'tvonline.ewe.de'
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class EWETVIE(EWETVBaseIE):
|
||||
_VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
|
||||
'url': 'https://tvonline.ewe.de/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvonline.ewe.de/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class QuantumTVIE(ZattooPlatformBaseIE):
|
||||
class EWETVLiveIE(EWETVBaseIE):
|
||||
_VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.ewe.de/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvonline.ewe.de/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if EWETVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class EWETVRecordingsIE(EWETVBaseIE):
|
||||
_VALID_URL = _create_valid_url(EWETVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.ewe.de/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvonline.ewe.de/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class QuantumTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'quantumtv'
|
||||
_HOST = 'quantum-tv.com'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class QuantumTVIE(QuantumTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
|
||||
'url': 'https://quantum-tv.com/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://quantum-tv.com/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class OsnatelTVIE(ZattooPlatformBaseIE):
|
||||
class QuantumTVLiveIE(QuantumTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://quantum-tv.com/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://quantum-tv.com/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if QuantumTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class QuantumTVRecordingsIE(QuantumTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(QuantumTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://quantum-tv.com/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://quantum-tv.com/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class OsnatelTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'osnateltv'
|
||||
_HOST = 'tvonline.osnatel.de'
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class OsnatelTVIE(OsnatelTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
|
||||
'url': 'https://tvonline.osnatel.de/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvonline.osnatel.de/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EinsUndEinsTVIE(ZattooPlatformBaseIE):
|
||||
class OsnatelTVLiveIE(OsnatelTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.osnatel.de/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvonline.osnatel.de/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if OsnatelTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class OsnatelTVRecordingsIE(OsnatelTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(OsnatelTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.osnatel.de/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvonline.osnatel.de/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EinsUndEinsTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = '1und1tv'
|
||||
_HOST = '1und1.tv'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class EinsUndEinsTVIE(EinsUndEinsTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.1und1.tv/watch/abc/123-abc',
|
||||
'url': 'https://1und1.tv/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://1und1.tv/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SaltTVIE(ZattooPlatformBaseIE):
|
||||
class EinsUndEinsTVLiveIE(EinsUndEinsTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://1und1.tv/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://1und1.tv/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if EinsUndEinsTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class EinsUndEinsTVRecordingsIE(EinsUndEinsTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(EinsUndEinsTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://1und1.tv/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://1und1.tv/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SaltTVBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'salttv'
|
||||
_HOST = 'tv.salt.ch'
|
||||
_VALID_URL = _make_valid_url(_HOST)
|
||||
|
||||
|
||||
class SaltTVIE(SaltTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'\d+', 'program', '(?:program|watch)/[^/]+')
|
||||
_TYPE = 'video'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.salt.ch/watch/abc/123-abc',
|
||||
'url': 'https://tv.salt.ch/program/daserste/210177916',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.salt.ch/guide/german?channel=srf1&program=169860555',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SaltTVLiveIE(SaltTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'[^/?&#]+', 'channel', 'live')
|
||||
_TYPE = 'live'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.salt.ch/channels/german?channel=srf_zwei',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.salt.ch/live/srf1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SaltTVIE.suitable(url) else super().suitable(url)
|
||||
|
||||
|
||||
class SaltTVRecordingsIE(SaltTVBaseIE):
|
||||
_VALID_URL = _create_valid_url(SaltTVBaseIE._HOST, r'\d+', 'recording')
|
||||
_TYPE = 'record'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.salt.ch/recordings?recording=193615508',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.salt.ch/tc/ptc_recordings_all_recordings?recording=193615420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -1,27 +1,136 @@
|
||||
import collections
|
||||
import contextlib
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import operator
|
||||
import re
|
||||
|
||||
from .utils import ExtractorError, remove_quotes
|
||||
from .utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
remove_quotes,
|
||||
truncate_string,
|
||||
unified_timestamp,
|
||||
write_string,
|
||||
)
|
||||
|
||||
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
||||
_OPERATORS = {
|
||||
'|': operator.or_,
|
||||
'^': operator.xor,
|
||||
'&': operator.and_,
|
||||
'>>': operator.rshift,
|
||||
'<<': operator.lshift,
|
||||
'-': operator.sub,
|
||||
'+': operator.add,
|
||||
'%': operator.mod,
|
||||
'/': operator.truediv,
|
||||
'*': operator.mul,
|
||||
|
||||
def _js_bit_op(op):
|
||||
def zeroise(x):
|
||||
return 0 if x in (None, JS_Undefined) else x
|
||||
|
||||
def wrapped(a, b):
|
||||
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_arith_op(op):
|
||||
|
||||
def wrapped(a, b):
|
||||
if JS_Undefined in (a, b):
|
||||
return float('nan')
|
||||
return op(a or 0, b or 0)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_div(a, b):
|
||||
if JS_Undefined in (a, b) or not (a and b):
|
||||
return float('nan')
|
||||
return (a or 0) / b if b else float('inf')
|
||||
|
||||
|
||||
def _js_mod(a, b):
|
||||
if JS_Undefined in (a, b) or not b:
|
||||
return float('nan')
|
||||
return (a or 0) % b
|
||||
|
||||
|
||||
def _js_exp(a, b):
|
||||
if not b:
|
||||
return 1 # even 0 ** 0 !!
|
||||
elif JS_Undefined in (a, b):
|
||||
return float('nan')
|
||||
return (a or 0) ** b
|
||||
|
||||
|
||||
def _js_eq_op(op):
|
||||
|
||||
def wrapped(a, b):
|
||||
if {a, b} <= {None, JS_Undefined}:
|
||||
return op(a, a)
|
||||
return op(a, b)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_comp_op(op):
|
||||
|
||||
def wrapped(a, b):
|
||||
if JS_Undefined in (a, b):
|
||||
return False
|
||||
if isinstance(a, str) or isinstance(b, str):
|
||||
return op(str(a or 0), str(b or 0))
|
||||
return op(a or 0, b or 0)
|
||||
|
||||
return wrapped
|
||||
|
||||
|
||||
def _js_ternary(cndn, if_true=True, if_false=False):
|
||||
"""Simulate JS's ternary operator (cndn?if_true:if_false)"""
|
||||
if cndn in (False, None, 0, '', JS_Undefined):
|
||||
return if_false
|
||||
with contextlib.suppress(TypeError):
|
||||
if math.isnan(cndn): # NB: NaN cannot be checked by membership
|
||||
return if_false
|
||||
return if_true
|
||||
|
||||
|
||||
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
||||
_OPERATORS = { # None => Defined in JSInterpreter._operator
|
||||
'?': None,
|
||||
'??': None,
|
||||
'||': None,
|
||||
'&&': None,
|
||||
|
||||
'|': _js_bit_op(operator.or_),
|
||||
'^': _js_bit_op(operator.xor),
|
||||
'&': _js_bit_op(operator.and_),
|
||||
|
||||
'===': operator.is_,
|
||||
'!==': operator.is_not,
|
||||
'==': _js_eq_op(operator.eq),
|
||||
'!=': _js_eq_op(operator.ne),
|
||||
|
||||
'<=': _js_comp_op(operator.le),
|
||||
'>=': _js_comp_op(operator.ge),
|
||||
'<': _js_comp_op(operator.lt),
|
||||
'>': _js_comp_op(operator.gt),
|
||||
|
||||
'>>': _js_bit_op(operator.rshift),
|
||||
'<<': _js_bit_op(operator.lshift),
|
||||
|
||||
'+': _js_arith_op(operator.add),
|
||||
'-': _js_arith_op(operator.sub),
|
||||
|
||||
'*': _js_arith_op(operator.mul),
|
||||
'/': _js_div,
|
||||
'%': _js_mod,
|
||||
'**': _js_exp,
|
||||
}
|
||||
|
||||
_MATCHING_PARENS = dict(zip('({[', ')}]'))
|
||||
_QUOTES = '\'"'
|
||||
_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
|
||||
|
||||
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
||||
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
||||
_QUOTES = '\'"/'
|
||||
|
||||
|
||||
class JS_Undefined:
|
||||
pass
|
||||
|
||||
|
||||
class JS_Break(ExtractorError):
|
||||
@@ -34,6 +143,12 @@ class JS_Continue(ExtractorError):
|
||||
ExtractorError.__init__(self, 'Invalid continue')
|
||||
|
||||
|
||||
class JS_Throw(ExtractorError):
|
||||
def __init__(self, e):
|
||||
self.error = e
|
||||
ExtractorError.__init__(self, f'Uncaught exception {e}')
|
||||
|
||||
|
||||
class LocalNameSpace(collections.ChainMap):
|
||||
def __setitem__(self, key, value):
|
||||
for scope in self.maps:
|
||||
@@ -46,34 +161,100 @@ class LocalNameSpace(collections.ChainMap):
|
||||
raise NotImplementedError('Deleting is not supported')
|
||||
|
||||
|
||||
class Debugger:
|
||||
import sys
|
||||
ENABLED = False and 'pytest' in sys.modules
|
||||
|
||||
@staticmethod
|
||||
def write(*args, level=100):
|
||||
write_string(f'[debug] JS: {" " * (100 - level)}'
|
||||
f'{" ".join(truncate_string(str(x), 50, 50) for x in args)}\n')
|
||||
|
||||
@classmethod
|
||||
def wrap_interpreter(cls, f):
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||
if cls.ENABLED and stmt.strip():
|
||||
cls.write(stmt, level=allow_recursion)
|
||||
try:
|
||||
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
|
||||
except Exception as e:
|
||||
if cls.ENABLED:
|
||||
if isinstance(e, ExtractorError):
|
||||
e = e.orig_msg
|
||||
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
|
||||
raise
|
||||
if cls.ENABLED and stmt.strip():
|
||||
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||
return ret, should_ret
|
||||
return interpret_statement
|
||||
|
||||
|
||||
class JSInterpreter:
|
||||
__named_object_counter = 0
|
||||
|
||||
_RE_FLAGS = {
|
||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||
# invent new bitmask values well above that for literal parsing
|
||||
# TODO: new pattern class to execute matches with these flags
|
||||
'd': 1024, # Generate indices for substring matches
|
||||
'g': 2048, # Global search
|
||||
'i': re.I, # Case-insensitive search
|
||||
'm': re.M, # Multi-line search
|
||||
's': re.S, # Allows . to match newline characters
|
||||
'u': re.U, # Treat a pattern as a sequence of unicode code points
|
||||
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
|
||||
}
|
||||
|
||||
_EXC_NAME = '__yt_dlp_exception__'
|
||||
|
||||
def __init__(self, code, objects=None):
|
||||
self.code, self._functions = code, {}
|
||||
self._objects = {} if objects is None else objects
|
||||
|
||||
class Exception(ExtractorError):
|
||||
def __init__(self, msg, expr=None, *args, **kwargs):
|
||||
if expr is not None:
|
||||
msg = f'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}'
|
||||
super().__init__(msg, *args, **kwargs)
|
||||
|
||||
def _named_object(self, namespace, obj):
|
||||
self.__named_object_counter += 1
|
||||
name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}'
|
||||
namespace[name] = obj
|
||||
return name
|
||||
|
||||
@classmethod
|
||||
def _regex_flags(cls, expr):
|
||||
flags = 0
|
||||
if not expr:
|
||||
return flags, expr
|
||||
for idx, ch in enumerate(expr):
|
||||
if ch not in cls._RE_FLAGS:
|
||||
break
|
||||
flags |= cls._RE_FLAGS[ch]
|
||||
return flags, expr[idx + 1:]
|
||||
|
||||
@staticmethod
|
||||
def _separate(expr, delim=',', max_split=None):
|
||||
OP_CHARS = '+-*/%&|^=<>!,;{}:'
|
||||
if not expr:
|
||||
return
|
||||
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||
in_quote, escaping = None, False
|
||||
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||
for idx, char in enumerate(expr):
|
||||
if char in _MATCHING_PARENS:
|
||||
if not in_quote and char in _MATCHING_PARENS:
|
||||
counters[_MATCHING_PARENS[char]] += 1
|
||||
elif char in counters:
|
||||
elif not in_quote and char in counters:
|
||||
counters[char] -= 1
|
||||
elif not escaping and char in _QUOTES and in_quote in (char, None):
|
||||
in_quote = None if in_quote else char
|
||||
elif not escaping:
|
||||
if char in _QUOTES and in_quote in (char, None):
|
||||
if in_quote or after_op or char != '/':
|
||||
in_quote = None if in_quote and not in_regex_char_group else char
|
||||
elif in_quote == '/' and char in '[]':
|
||||
in_regex_char_group = char == '['
|
||||
escaping = not escaping and in_quote and char == '\\'
|
||||
after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
|
||||
|
||||
if char != delim[pos] or any(counters.values()) or in_quote:
|
||||
pos = 0
|
||||
@@ -89,114 +270,205 @@ class JSInterpreter:
|
||||
yield expr[start:]
|
||||
|
||||
@classmethod
|
||||
def _separate_at_paren(cls, expr, delim):
|
||||
def _separate_at_paren(cls, expr, delim=None):
|
||||
if delim is None:
|
||||
delim = expr and _MATCHING_PARENS[expr[0]]
|
||||
separated = list(cls._separate(expr, delim, 1))
|
||||
if len(separated) < 2:
|
||||
raise ExtractorError(f'No terminating paren {delim} in {expr}')
|
||||
raise cls.Exception(f'No terminating paren {delim}', expr)
|
||||
return separated[0][1:].strip(), separated[1].strip()
|
||||
|
||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||
if op in ('||', '&&'):
|
||||
if (op == '&&') ^ _js_ternary(left_val):
|
||||
return left_val # short circuiting
|
||||
elif op == '??':
|
||||
if left_val not in (None, JS_Undefined):
|
||||
return left_val
|
||||
elif op == '?':
|
||||
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
|
||||
|
||||
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
|
||||
if not _OPERATORS.get(op):
|
||||
return right_val
|
||||
|
||||
try:
|
||||
return _OPERATORS[op](left_val, right_val)
|
||||
except Exception as e:
|
||||
raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e)
|
||||
|
||||
def _index(self, obj, idx, allow_undefined=False):
|
||||
if idx == 'length':
|
||||
return len(obj)
|
||||
try:
|
||||
return obj[int(idx)] if isinstance(obj, list) else obj[idx]
|
||||
except Exception as e:
|
||||
if allow_undefined:
|
||||
return JS_Undefined
|
||||
raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e)
|
||||
|
||||
def _dump(self, obj, namespace):
|
||||
try:
|
||||
return json.dumps(obj)
|
||||
except TypeError:
|
||||
return self._named_object(namespace, obj)
|
||||
|
||||
@Debugger.wrap_interpreter
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||
if allow_recursion < 0:
|
||||
raise ExtractorError('Recursion limit reached')
|
||||
raise self.Exception('Recursion limit reached')
|
||||
allow_recursion -= 1
|
||||
|
||||
should_abort = False
|
||||
should_return = False
|
||||
sub_statements = list(self._separate(stmt, ';')) or ['']
|
||||
stmt = sub_statements.pop().lstrip()
|
||||
expr = stmt = sub_statements.pop().strip()
|
||||
|
||||
for sub_stmt in sub_statements:
|
||||
ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
|
||||
if should_abort:
|
||||
return ret, should_abort
|
||||
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
|
||||
if should_return:
|
||||
return ret, should_return
|
||||
|
||||
m = re.match(r'(?P<var>var\s)|return(?:\s+|$)', stmt)
|
||||
if not m: # Try interpreting it as an expression
|
||||
expr = stmt
|
||||
elif m.group('var'):
|
||||
expr = stmt[len(m.group(0)):]
|
||||
else:
|
||||
expr = stmt[len(m.group(0)):]
|
||||
should_abort = True
|
||||
|
||||
return self.interpret_expression(expr, local_vars, allow_recursion), should_abort
|
||||
|
||||
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||
expr = expr.strip()
|
||||
m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt)
|
||||
if m:
|
||||
expr = stmt[len(m.group(0)):].strip()
|
||||
if m.group('throw'):
|
||||
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
||||
should_return = not m.group('var')
|
||||
if not expr:
|
||||
return None
|
||||
return None, should_return
|
||||
|
||||
if expr[0] in _QUOTES:
|
||||
inner, outer = self._separate(expr, expr[0], 1)
|
||||
if expr[0] == '/':
|
||||
flags, outer = self._regex_flags(outer)
|
||||
inner = re.compile(inner[1:], flags=flags)
|
||||
else:
|
||||
inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
|
||||
if not outer:
|
||||
return inner, should_return
|
||||
expr = self._named_object(local_vars, inner) + outer
|
||||
|
||||
if expr.startswith('new '):
|
||||
obj = expr[4:]
|
||||
if obj.startswith('Date('):
|
||||
left, right = self._separate_at_paren(obj[4:])
|
||||
expr = unified_timestamp(
|
||||
self.interpret_expression(left, local_vars, allow_recursion), False)
|
||||
if not expr:
|
||||
raise self.Exception(f'Failed to parse date {left!r}', expr)
|
||||
expr = self._dump(int(expr * 1000), local_vars) + right
|
||||
else:
|
||||
raise self.Exception(f'Unsupported object {obj}', expr)
|
||||
|
||||
if expr.startswith('void '):
|
||||
left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
|
||||
return None, should_return
|
||||
|
||||
if expr.startswith('{'):
|
||||
inner, outer = self._separate_at_paren(expr, '}')
|
||||
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1)
|
||||
inner, outer = self._separate_at_paren(expr)
|
||||
# try for object expression (Map)
|
||||
sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
|
||||
if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
|
||||
def dict_item(key, val):
|
||||
val = self.interpret_expression(val, local_vars, allow_recursion)
|
||||
if re.match(_NAME_RE, key):
|
||||
return key, val
|
||||
return self.interpret_expression(key, local_vars, allow_recursion), val
|
||||
|
||||
return dict(dict_item(k, v) for k, v in sub_expressions), should_return
|
||||
|
||||
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
|
||||
if not outer or should_abort:
|
||||
return inner
|
||||
return inner, should_abort or should_return
|
||||
else:
|
||||
expr = json.dumps(inner) + outer
|
||||
expr = self._dump(inner, local_vars) + outer
|
||||
|
||||
if expr.startswith('('):
|
||||
inner, outer = self._separate_at_paren(expr, ')')
|
||||
inner = self.interpret_expression(inner, local_vars, allow_recursion)
|
||||
if not outer:
|
||||
return inner
|
||||
inner, outer = self._separate_at_paren(expr)
|
||||
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
|
||||
if not outer or should_abort:
|
||||
return inner, should_abort or should_return
|
||||
else:
|
||||
expr = json.dumps(inner) + outer
|
||||
expr = self._dump(inner, local_vars) + outer
|
||||
|
||||
if expr.startswith('['):
|
||||
inner, outer = self._separate_at_paren(expr, ']')
|
||||
inner, outer = self._separate_at_paren(expr)
|
||||
name = self._named_object(local_vars, [
|
||||
self.interpret_expression(item, local_vars, allow_recursion)
|
||||
for item in self._separate(inner)])
|
||||
expr = name + outer
|
||||
|
||||
m = re.match(r'(?P<try>try)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
|
||||
if m and m.group('try'):
|
||||
if expr[m.end()] == '{':
|
||||
try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
|
||||
else:
|
||||
try_expr, expr = expr[m.end() - 1:], ''
|
||||
ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1)
|
||||
m = re.match(r'''(?x)
|
||||
(?P<try>try)\s*\{|
|
||||
(?P<switch>switch)\s*\(|
|
||||
(?P<for>for)\s*\(
|
||||
''', expr)
|
||||
md = m.groupdict() if m else {}
|
||||
if md.get('try'):
|
||||
try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||
err = None
|
||||
try:
|
||||
ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
|
||||
if should_abort:
|
||||
return ret, True
|
||||
except Exception as e:
|
||||
# XXX: This works for now, but makes debugging future issues very hard
|
||||
err = e
|
||||
|
||||
pending = (None, False)
|
||||
m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
|
||||
if m:
|
||||
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||
if err:
|
||||
catch_vars = {}
|
||||
if m.group('err'):
|
||||
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
|
||||
catch_vars = local_vars.new_child(catch_vars)
|
||||
err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
||||
|
||||
m = re.match(r'finally\s*\{', expr)
|
||||
if m:
|
||||
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
|
||||
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
|
||||
if should_abort:
|
||||
return ret, True
|
||||
|
||||
ret, should_abort = pending
|
||||
if should_abort:
|
||||
return ret
|
||||
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||
return ret, True
|
||||
|
||||
elif m and m.group('catch'):
|
||||
# We ignore the catch block
|
||||
_, expr = self._separate_at_paren(expr, '}')
|
||||
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||
if err:
|
||||
raise err
|
||||
|
||||
elif m and m.group('for'):
|
||||
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
|
||||
elif md.get('for'):
|
||||
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
|
||||
if remaining.startswith('{'):
|
||||
body, expr = self._separate_at_paren(remaining, '}')
|
||||
body, expr = self._separate_at_paren(remaining)
|
||||
else:
|
||||
switch_m = re.match(r'switch\s*\(', remaining) # FIXME
|
||||
if switch_m:
|
||||
switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
|
||||
switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
|
||||
body, expr = self._separate_at_paren(remaining, '}')
|
||||
body = 'switch(%s){%s}' % (switch_val, body)
|
||||
else:
|
||||
body, expr = remaining, ''
|
||||
start, cndn, increment = self._separate(constructor, ';')
|
||||
if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]:
|
||||
raise ExtractorError(
|
||||
f'Premature return in the initialization of a for loop in {constructor!r}')
|
||||
self.interpret_expression(start, local_vars, allow_recursion)
|
||||
while True:
|
||||
if not self.interpret_expression(cndn, local_vars, allow_recursion):
|
||||
if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)):
|
||||
break
|
||||
try:
|
||||
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1)
|
||||
ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion)
|
||||
if should_abort:
|
||||
return ret
|
||||
return ret, True
|
||||
except JS_Break:
|
||||
break
|
||||
except JS_Continue:
|
||||
pass
|
||||
if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]:
|
||||
raise ExtractorError(
|
||||
f'Premature return in the initialization of a for loop in {constructor!r}')
|
||||
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||
self.interpret_expression(increment, local_vars, allow_recursion)
|
||||
|
||||
elif m and m.group('switch'):
|
||||
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
|
||||
elif md.get('switch'):
|
||||
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
|
||||
switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
|
||||
body, expr = self._separate_at_paren(remaining, '}')
|
||||
items = body.replace('default:', 'case default:').split('case ')[1:]
|
||||
@@ -207,24 +479,31 @@ class JSInterpreter:
|
||||
if default:
|
||||
matched = matched or case == 'default'
|
||||
elif not matched:
|
||||
matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion)
|
||||
matched = (case != 'default'
|
||||
and switch_val == self.interpret_expression(case, local_vars, allow_recursion))
|
||||
if not matched:
|
||||
continue
|
||||
try:
|
||||
ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1)
|
||||
ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion)
|
||||
if should_abort:
|
||||
return ret
|
||||
except JS_Break:
|
||||
break
|
||||
if matched:
|
||||
break
|
||||
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
|
||||
|
||||
if md:
|
||||
ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
|
||||
return ret, should_abort or should_return
|
||||
|
||||
# Comma separated statements
|
||||
sub_expressions = list(self._separate(expr))
|
||||
expr = sub_expressions.pop().strip() if sub_expressions else ''
|
||||
for sub_expr in sub_expressions:
|
||||
self.interpret_expression(sub_expr, local_vars, allow_recursion)
|
||||
if len(sub_expressions) > 1:
|
||||
for sub_expr in sub_expressions:
|
||||
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
|
||||
if should_abort:
|
||||
return ret, True
|
||||
return ret, False
|
||||
|
||||
for m in re.finditer(rf'''(?x)
|
||||
(?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
|
||||
@@ -236,109 +515,123 @@ class JSInterpreter:
|
||||
local_vars[var] += 1 if sign[0] == '+' else -1
|
||||
if m.group('pre_sign'):
|
||||
ret = local_vars[var]
|
||||
expr = expr[:start] + json.dumps(ret) + expr[end:]
|
||||
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
|
||||
|
||||
if not expr:
|
||||
return None
|
||||
return None, should_return
|
||||
|
||||
m = re.match(fr'''(?x)
|
||||
(?P<assign>
|
||||
(?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
|
||||
(?P<op>{"|".join(map(re.escape, _OPERATORS))})?
|
||||
=(?P<expr>.*)$
|
||||
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
|
||||
=(?!=)(?P<expr>.*)$
|
||||
)|(?P<return>
|
||||
(?!if|return|true|false|null)(?P<name>{_NAME_RE})$
|
||||
(?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
|
||||
)|(?P<indexing>
|
||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||
)|(?P<attribute>
|
||||
(?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
|
||||
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
|
||||
)|(?P<function>
|
||||
(?P<fname>{_NAME_RE})\((?P<args>[\w$,]*)\)$
|
||||
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
||||
)''', expr)
|
||||
if m and m.group('assign'):
|
||||
if not m.group('op'):
|
||||
opfunc = lambda curr, right: right
|
||||
else:
|
||||
opfunc = _OPERATORS[m.group('op')]
|
||||
right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
|
||||
left_val = local_vars.get(m.group('out'))
|
||||
|
||||
if not m.group('index'):
|
||||
local_vars[m.group('out')] = opfunc(left_val, right_val)
|
||||
return local_vars[m.group('out')]
|
||||
elif left_val is None:
|
||||
raise ExtractorError(f'Cannot index undefined variable: {m.group("out")}')
|
||||
local_vars[m.group('out')] = self._operator(
|
||||
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return local_vars[m.group('out')], should_return
|
||||
elif left_val in (None, JS_Undefined):
|
||||
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
|
||||
|
||||
idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
|
||||
if not isinstance(idx, int):
|
||||
raise ExtractorError(f'List indices must be integers: {idx}')
|
||||
left_val[idx] = opfunc(left_val[idx], right_val)
|
||||
return left_val[idx]
|
||||
if not isinstance(idx, (int, float)):
|
||||
raise self.Exception(f'List index {idx} must be integer', expr)
|
||||
idx = int(idx)
|
||||
left_val[idx] = self._operator(
|
||||
m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return left_val[idx], should_return
|
||||
|
||||
elif expr.isdigit():
|
||||
return int(expr)
|
||||
return int(expr), should_return
|
||||
|
||||
elif expr == 'break':
|
||||
raise JS_Break()
|
||||
elif expr == 'continue':
|
||||
raise JS_Continue()
|
||||
elif expr == 'undefined':
|
||||
return JS_Undefined, should_return
|
||||
elif expr == 'NaN':
|
||||
return float('NaN'), should_return
|
||||
|
||||
elif m and m.group('return'):
|
||||
return local_vars[m.group('name')]
|
||||
return local_vars.get(m.group('name'), JS_Undefined), should_return
|
||||
|
||||
with contextlib.suppress(ValueError):
|
||||
return json.loads(expr)
|
||||
return json.loads(js_to_json(expr, strict=True)), should_return
|
||||
|
||||
if m and m.group('indexing'):
|
||||
val = local_vars[m.group('in')]
|
||||
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
|
||||
return val[idx]
|
||||
return self._index(val, idx), should_return
|
||||
|
||||
for op, opfunc in _OPERATORS.items():
|
||||
for op in _OPERATORS:
|
||||
separated = list(self._separate(expr, op))
|
||||
if len(separated) < 2:
|
||||
right_expr = separated.pop()
|
||||
while True:
|
||||
if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip():
|
||||
separated.pop()
|
||||
elif not (separated and op == '?' and right_expr.startswith('.')):
|
||||
break
|
||||
right_expr = f'{op}{right_expr}'
|
||||
if op != '-':
|
||||
right_expr = f'{separated.pop()}{op}{right_expr}'
|
||||
if not separated:
|
||||
continue
|
||||
right_val = separated.pop()
|
||||
left_val = op.join(separated)
|
||||
left_val, should_abort = self.interpret_statement(
|
||||
left_val, local_vars, allow_recursion - 1)
|
||||
if should_abort:
|
||||
raise ExtractorError(f'Premature left-side return of {op} in {expr!r}')
|
||||
right_val, should_abort = self.interpret_statement(
|
||||
right_val, local_vars, allow_recursion - 1)
|
||||
if should_abort:
|
||||
raise ExtractorError(f'Premature right-side return of {op} in {expr!r}')
|
||||
return opfunc(left_val or 0, right_val)
|
||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
|
||||
|
||||
if m and m.group('attribute'):
|
||||
variable = m.group('var')
|
||||
member = remove_quotes(m.group('member') or m.group('member2'))
|
||||
variable, member, nullish = m.group('var', 'member', 'nullish')
|
||||
if not member:
|
||||
member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
|
||||
arg_str = expr[m.end():]
|
||||
if arg_str.startswith('('):
|
||||
arg_str, remaining = self._separate_at_paren(arg_str, ')')
|
||||
arg_str, remaining = self._separate_at_paren(arg_str)
|
||||
else:
|
||||
arg_str, remaining = None, arg_str
|
||||
|
||||
def assertion(cndn, msg):
|
||||
""" assert, but without risk of getting optimized out """
|
||||
if not cndn:
|
||||
raise ExtractorError(f'{member} {msg}: {expr}')
|
||||
raise self.Exception(f'{member} {msg}', expr)
|
||||
|
||||
def eval_method():
|
||||
if variable == 'String':
|
||||
obj = str
|
||||
elif variable in local_vars:
|
||||
obj = local_vars[variable]
|
||||
else:
|
||||
if (variable, member) == ('console', 'debug'):
|
||||
if Debugger.ENABLED:
|
||||
Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion))
|
||||
return
|
||||
|
||||
types = {
|
||||
'String': str,
|
||||
'Math': float,
|
||||
}
|
||||
obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
|
||||
if obj is NO_DEFAULT:
|
||||
if variable not in self._objects:
|
||||
self._objects[variable] = self.extract_object(variable)
|
||||
obj = self._objects[variable]
|
||||
try:
|
||||
self._objects[variable] = self.extract_object(variable)
|
||||
except self.Exception:
|
||||
if not nullish:
|
||||
raise
|
||||
obj = self._objects.get(variable, JS_Undefined)
|
||||
|
||||
if nullish and obj is JS_Undefined:
|
||||
return JS_Undefined
|
||||
|
||||
# Member access
|
||||
if arg_str is None:
|
||||
if member == 'length':
|
||||
return len(obj)
|
||||
return obj[member]
|
||||
return self._index(obj, member, nullish)
|
||||
|
||||
# Function call
|
||||
argvals = [
|
||||
@@ -349,12 +642,17 @@ class JSInterpreter:
|
||||
if member == 'fromCharCode':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
return ''.join(map(chr, argvals))
|
||||
raise ExtractorError(f'Unsupported string method {member}')
|
||||
raise self.Exception(f'Unsupported String method {member}', expr)
|
||||
elif obj == float:
|
||||
if member == 'pow':
|
||||
assertion(len(argvals) == 2, 'takes two arguments')
|
||||
return argvals[0] ** argvals[1]
|
||||
raise self.Exception(f'Unsupported Math method {member}', expr)
|
||||
|
||||
if member == 'split':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(argvals == [''], 'with arguments is not implemented')
|
||||
return list(obj)
|
||||
assertion(len(argvals) == 1, 'with limit argument is not implemented')
|
||||
return obj.split(argvals[0]) if argvals[0] else list(obj)
|
||||
elif member == 'join':
|
||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||
@@ -400,7 +698,7 @@ class JSInterpreter:
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
f, this = (argvals + [''])[:2]
|
||||
return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)]
|
||||
return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)]
|
||||
elif member == 'indexOf':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(len(argvals) <= 2, 'takes at-most 2 arguments')
|
||||
@@ -409,28 +707,43 @@ class JSInterpreter:
|
||||
return obj.index(idx, start)
|
||||
except ValueError:
|
||||
return -1
|
||||
elif member == 'charCodeAt':
|
||||
assertion(isinstance(obj, str), 'must be applied on a string')
|
||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
||||
idx = argvals[0] if isinstance(argvals[0], int) else 0
|
||||
if idx >= len(obj):
|
||||
return None
|
||||
return ord(obj[idx])
|
||||
|
||||
return obj[int(member) if isinstance(obj, list) else member](argvals)
|
||||
idx = int(member) if isinstance(obj, list) else member
|
||||
return obj[idx](argvals, allow_recursion=allow_recursion)
|
||||
|
||||
if remaining:
|
||||
return self.interpret_expression(
|
||||
ret, should_abort = self.interpret_statement(
|
||||
self._named_object(local_vars, eval_method()) + remaining,
|
||||
local_vars, allow_recursion)
|
||||
return ret, should_return or should_abort
|
||||
else:
|
||||
return eval_method()
|
||||
return eval_method(), should_return
|
||||
|
||||
elif m and m.group('function'):
|
||||
fname = m.group('fname')
|
||||
argvals = tuple(
|
||||
int(v) if v.isdigit() else local_vars[v]
|
||||
for v in self._separate(m.group('args')))
|
||||
argvals = [self.interpret_expression(v, local_vars, allow_recursion)
|
||||
for v in self._separate(m.group('args'))]
|
||||
if fname in local_vars:
|
||||
return local_vars[fname](argvals)
|
||||
return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return
|
||||
elif fname not in self._functions:
|
||||
self._functions[fname] = self.extract_function(fname)
|
||||
return self._functions[fname](argvals)
|
||||
return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return
|
||||
|
||||
raise ExtractorError(f'Unsupported JS expression {expr!r}')
|
||||
raise self.Exception(
|
||||
f'Unsupported JS expression {truncate_string(expr, 20, 20) if expr != stmt else ""}', stmt)
|
||||
|
||||
def interpret_expression(self, expr, local_vars, allow_recursion):
|
||||
ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion)
|
||||
if should_return:
|
||||
raise self.Exception('Cannot return from an expression', expr)
|
||||
return ret
|
||||
|
||||
def extract_object(self, objname):
|
||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||
@@ -442,12 +755,14 @@ class JSInterpreter:
|
||||
}\s*;
|
||||
''' % (re.escape(objname), _FUNC_NAME_RE),
|
||||
self.code)
|
||||
if not obj_m:
|
||||
raise self.Exception(f'Could not find object {objname}')
|
||||
fields = obj_m.group('fields')
|
||||
# Currently, it only supports function definitions
|
||||
fields_m = re.finditer(
|
||||
r'''(?x)
|
||||
(?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
|
||||
''' % _FUNC_NAME_RE,
|
||||
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
||||
''' % (_FUNC_NAME_RE, _NAME_RE),
|
||||
fields)
|
||||
for f in fields_m:
|
||||
argnames = f.group('args').split(',')
|
||||
@@ -458,19 +773,19 @@ class JSInterpreter:
|
||||
def extract_function_code(self, funcname):
|
||||
""" @returns argnames, code """
|
||||
func_m = re.search(
|
||||
r'''(?x)
|
||||
r'''(?xs)
|
||||
(?:
|
||||
function\s+%(name)s|
|
||||
[{;,]\s*%(name)s\s*=\s*function|
|
||||
var\s+%(name)s\s*=\s*function
|
||||
(?:var|const|let)\s+%(name)s\s*=\s*function
|
||||
)\s*
|
||||
\((?P<args>[^)]*)\)\s*
|
||||
(?P<code>{(?:(?!};)[^"]|"([^"]|\\")*")+})''' % {'name': re.escape(funcname)},
|
||||
(?P<code>{.+})''' % {'name': re.escape(funcname)},
|
||||
self.code)
|
||||
code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match
|
||||
code, _ = self._separate_at_paren(func_m.group('code'))
|
||||
if func_m is None:
|
||||
raise ExtractorError(f'Could not find JS function "{funcname}"')
|
||||
return func_m.group('args').split(','), code
|
||||
raise self.Exception(f'Could not find JS function "{funcname}"')
|
||||
return [x.strip() for x in func_m.group('args').split(',')], code
|
||||
|
||||
def extract_function(self, funcname):
|
||||
return self.extract_function_from_code(*self.extract_function_code(funcname))
|
||||
@@ -482,7 +797,7 @@ class JSInterpreter:
|
||||
if mobj is None:
|
||||
break
|
||||
start, body_start = mobj.span()
|
||||
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
|
||||
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
||||
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||
[x.strip() for x in mobj.group('args').split(',')],
|
||||
body, local_vars, *global_stack))
|
||||
@@ -494,16 +809,13 @@ class JSInterpreter:
|
||||
|
||||
def build_function(self, argnames, code, *global_stack):
|
||||
global_stack = list(global_stack) or [{}]
|
||||
argnames = tuple(argnames)
|
||||
|
||||
def resf(args, **kwargs):
|
||||
global_stack[0].update({
|
||||
**dict(zip(argnames, args)),
|
||||
**kwargs
|
||||
})
|
||||
def resf(args, kwargs={}, allow_recursion=100):
|
||||
global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
|
||||
global_stack[0].update(kwargs)
|
||||
var_stack = LocalNameSpace(*global_stack)
|
||||
for stmt in self._separate(code.replace('\n', ''), ';'):
|
||||
ret, should_abort = self.interpret_statement(stmt, var_stack)
|
||||
if should_abort:
|
||||
break
|
||||
return ret
|
||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||
if should_abort:
|
||||
return ret
|
||||
return resf
|
||||
|
||||
@@ -34,7 +34,7 @@ def format_text(text, f):
|
||||
'''
|
||||
@param f String representation of formatting to apply in the form:
|
||||
[style] [light] font_color [on [light] bg_color]
|
||||
Eg: "red", "bold green on light blue"
|
||||
E.g. "red", "bold green on light blue"
|
||||
'''
|
||||
f = f.upper()
|
||||
tokens = f.strip().split()
|
||||
|
||||
@@ -25,10 +25,12 @@ from .utils import (
|
||||
OUTTMPL_TYPES,
|
||||
POSTPROCESS_WHEN,
|
||||
Config,
|
||||
deprecation_warning,
|
||||
expand_path,
|
||||
format_field,
|
||||
get_executable_path,
|
||||
join_nonempty,
|
||||
orderedSet_from_options,
|
||||
remove_end,
|
||||
write_string,
|
||||
)
|
||||
@@ -77,7 +79,7 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
|
||||
if root.parse_known_args()[0].ignoreconfig:
|
||||
return False
|
||||
# Multiple package names can be given here
|
||||
# Eg: ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for
|
||||
# E.g. ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for
|
||||
# the configuration file of any of these three packages
|
||||
for package in ('yt-dlp',):
|
||||
if user:
|
||||
@@ -163,6 +165,7 @@ class _YoutubeDLHelpFormatter(optparse.IndentedHelpFormatter):
|
||||
|
||||
class _YoutubeDLOptionParser(optparse.OptionParser):
|
||||
# optparse is deprecated since python 3.2. So assume a stable interface even for private methods
|
||||
ALIAS_DEST = '_triggered_aliases'
|
||||
ALIAS_TRIGGER_LIMIT = 100
|
||||
|
||||
def __init__(self):
|
||||
@@ -174,6 +177,7 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
|
||||
formatter=_YoutubeDLHelpFormatter(),
|
||||
conflict_handler='resolve',
|
||||
)
|
||||
self.set_default(self.ALIAS_DEST, collections.defaultdict(int))
|
||||
|
||||
_UNKNOWN_OPTION = (optparse.BadOptionError, optparse.AmbiguousOptionError)
|
||||
_BAD_OPTION = optparse.OptionValueError
|
||||
@@ -232,30 +236,16 @@ def create_parser():
|
||||
current + value if append is True else value + current)
|
||||
|
||||
def _set_from_options_callback(
|
||||
option, opt_str, value, parser, delim=',', allowed_values=None, aliases={},
|
||||
option, opt_str, value, parser, allowed_values, delim=',', aliases={},
|
||||
process=lambda x: x.lower().strip()):
|
||||
current = set(getattr(parser.values, option.dest))
|
||||
values = [process(value)] if delim is None else list(map(process, value.split(delim)[::-1]))
|
||||
while values:
|
||||
actual_val = val = values.pop()
|
||||
if not val:
|
||||
raise optparse.OptionValueError(f'Invalid {option.metavar} for {opt_str}: {value}')
|
||||
if val == 'all':
|
||||
current.update(allowed_values)
|
||||
elif val == '-all':
|
||||
current = set()
|
||||
elif val in aliases:
|
||||
values.extend(aliases[val])
|
||||
else:
|
||||
if val[0] == '-':
|
||||
val = val[1:]
|
||||
current.discard(val)
|
||||
else:
|
||||
current.update([val])
|
||||
if allowed_values is not None and val not in allowed_values:
|
||||
raise optparse.OptionValueError(f'wrong {option.metavar} for {opt_str}: {actual_val}')
|
||||
values = [process(value)] if delim is None else map(process, value.split(delim))
|
||||
try:
|
||||
requested = orderedSet_from_options(values, collections.ChainMap(aliases, {'all': allowed_values}),
|
||||
start=getattr(parser.values, option.dest))
|
||||
except ValueError as e:
|
||||
raise optparse.OptionValueError(f'wrong {option.metavar} for {opt_str}: {e.args[0]}')
|
||||
|
||||
setattr(parser.values, option.dest, current)
|
||||
setattr(parser.values, option.dest, set(requested))
|
||||
|
||||
def _dict_from_options_callback(
|
||||
option, opt_str, value, parser,
|
||||
@@ -305,8 +295,7 @@ def create_parser():
|
||||
aliases = (x if x.startswith('-') else f'--{x}' for x in map(str.strip, aliases.split(',')))
|
||||
try:
|
||||
alias_group.add_option(
|
||||
*aliases, help=opts, nargs=nargs, type='str' if nargs else None,
|
||||
dest='_triggered_aliases', default=collections.defaultdict(int),
|
||||
*aliases, help=opts, nargs=nargs, dest=parser.ALIAS_DEST, type='str' if nargs else None,
|
||||
metavar=' '.join(f'ARG{i}' for i in range(nargs)), action='callback',
|
||||
callback=_alias_callback, callback_kwargs={'opts': opts, 'nargs': nargs})
|
||||
except Exception as err:
|
||||
@@ -365,16 +354,26 @@ def create_parser():
|
||||
'--extractor-descriptions',
|
||||
action='store_true', dest='list_extractor_descriptions', default=False,
|
||||
help='Output descriptions of all supported extractors and exit')
|
||||
general.add_option(
|
||||
'--use-extractors', '--ies',
|
||||
action='callback', dest='allowed_extractors', metavar='NAMES', type='str',
|
||||
default=[], callback=_list_from_options_callback,
|
||||
help=(
|
||||
'Extractor names to use separated by commas. '
|
||||
'You can also use regexes, "all", "default" and "end" (end URL matching); '
|
||||
'e.g. --ies "holodex.*,end,youtube". '
|
||||
'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. '
|
||||
'Use --list-extractors for a list of extractor names. (Alias: --ies)'))
|
||||
general.add_option(
|
||||
'--force-generic-extractor',
|
||||
action='store_true', dest='force_generic_extractor', default=False,
|
||||
help='Force extraction to use the generic extractor')
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
general.add_option(
|
||||
'--default-search',
|
||||
dest='default_search', metavar='PREFIX',
|
||||
help=(
|
||||
'Use this prefix for unqualified URLs. '
|
||||
'Eg: "gvsearch2:python" downloads two videos from google videos for the search term "python". '
|
||||
'E.g. "gvsearch2:python" downloads two videos from google videos for the search term "python". '
|
||||
'Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). '
|
||||
'"error" just throws an error. The default value "fixup_error" repairs broken URLs, '
|
||||
'but emits an error if this is not possible instead of searching'))
|
||||
@@ -443,11 +442,12 @@ def create_parser():
|
||||
'allowed_values': {
|
||||
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata',
|
||||
'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley',
|
||||
'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
}, 'aliases': {
|
||||
'youtube-dl': ['-multistreams', 'all'],
|
||||
'youtube-dlc': ['-no-youtube-channel-redirect', '-no-live-chat', 'all'],
|
||||
'youtube-dl': ['all', '-multistreams'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
|
||||
}
|
||||
}, help=(
|
||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||
@@ -459,7 +459,7 @@ def create_parser():
|
||||
help=(
|
||||
'Create aliases for an option string. Unless an alias starts with a dash "-", it is prefixed with "--". '
|
||||
'Arguments are parsed according to the Python string formatting mini-language. '
|
||||
'Eg: --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options '
|
||||
'E.g. --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options '
|
||||
'"--get-audio" and "-X" that takes an argument (ARG0) and expands to '
|
||||
'"-S=aext:ARG0,abr -x --audio-format ARG0". All defined aliases are listed in the --help output. '
|
||||
'Alias options can trigger more aliases; so be careful to avoid defining recursive options. '
|
||||
@@ -471,8 +471,8 @@ def create_parser():
|
||||
'--proxy', dest='proxy',
|
||||
default=None, metavar='URL',
|
||||
help=(
|
||||
'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme. '
|
||||
'Eg: socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'))
|
||||
'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme, '
|
||||
'e.g. socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'))
|
||||
network.add_option(
|
||||
'--socket-timeout',
|
||||
dest='socket_timeout', type=float, default=None, metavar='SECONDS',
|
||||
@@ -537,7 +537,7 @@ def create_parser():
|
||||
'Comma separated playlist_index of the videos to download. '
|
||||
'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. '
|
||||
'Use negative indices to count from the right and negative STEP to download in reverse order. '
|
||||
'Eg: "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15'))
|
||||
'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15'))
|
||||
selection.add_option(
|
||||
'--match-title',
|
||||
dest='matchtitle', metavar='REGEX',
|
||||
@@ -549,17 +549,17 @@ def create_parser():
|
||||
selection.add_option(
|
||||
'--min-filesize',
|
||||
metavar='SIZE', dest='min_filesize', default=None,
|
||||
help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
|
||||
help='Do not download any videos smaller than SIZE, e.g. 50k or 44.6M')
|
||||
selection.add_option(
|
||||
'--max-filesize',
|
||||
metavar='SIZE', dest='max_filesize', default=None,
|
||||
help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
|
||||
help='Do not download any videos larger than SIZE, e.g. 50k or 44.6M')
|
||||
selection.add_option(
|
||||
'--date',
|
||||
metavar='DATE', dest='date', default=None,
|
||||
help=(
|
||||
'Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format '
|
||||
'[now|today|yesterday][-N[day|week|month|year]]. Eg: --date today-2weeks'))
|
||||
'[now|today|yesterday][-N[day|week|month|year]]. E.g. --date today-2weeks'))
|
||||
selection.add_option(
|
||||
'--datebefore',
|
||||
metavar='DATE', dest='datebefore', default=None,
|
||||
@@ -589,7 +589,7 @@ def create_parser():
|
||||
'You can also simply specify a field to match if the field is present, '
|
||||
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
|
||||
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
|
||||
'the filter matches if atleast one of the conditions are met. Eg: --match-filter '
|
||||
'the filter matches if atleast one of the conditions are met. E.g. --match-filter '
|
||||
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||
'matches only videos that are not live OR those that have a like count more than 100 '
|
||||
'(or the like field is not available) and also has a description '
|
||||
@@ -634,7 +634,7 @@ def create_parser():
|
||||
selection.add_option(
|
||||
'--break-per-input',
|
||||
action='store_true', dest='break_per_url', default=False,
|
||||
help='Make --break-on-existing, --break-on-reject and --max-downloads act only on the current input URL')
|
||||
help='--break-on-existing, --break-on-reject, --max-downloads, and autonumber resets per input URL')
|
||||
selection.add_option(
|
||||
'--no-break-per-input',
|
||||
action='store_false', dest='break_per_url',
|
||||
@@ -785,7 +785,7 @@ def create_parser():
|
||||
'--merge-output-format',
|
||||
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
||||
help=(
|
||||
'Containers that may be used when merging formats, separated by "/" (Eg: "mp4/mkv"). '
|
||||
'Containers that may be used when merging formats, separated by "/", e.g. "mp4/mkv". '
|
||||
'Ignored if no merge is required. '
|
||||
f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
|
||||
video_format.add_option(
|
||||
@@ -825,14 +825,14 @@ def create_parser():
|
||||
subtitles.add_option(
|
||||
'--sub-format',
|
||||
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
|
||||
help='Subtitle format; accepts formats preference, Eg: "srt" or "ass/srt/best"')
|
||||
help='Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best"')
|
||||
subtitles.add_option(
|
||||
'--sub-langs', '--srt-langs',
|
||||
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
|
||||
default=[], callback=_list_from_options_callback,
|
||||
help=(
|
||||
'Languages of the subtitles to download (can be regex) or "all" separated by commas. (Eg: --sub-langs "en.*,ja") '
|
||||
'You can prefix the language code with a "-" to exclude it from the requested languages. (Eg: --sub-langs all,-live_chat) '
|
||||
'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". '
|
||||
'You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. '
|
||||
'Use --list-subs for a list of available language tags'))
|
||||
|
||||
downloader = optparse.OptionGroup(parser, 'Download Options')
|
||||
@@ -843,11 +843,11 @@ def create_parser():
|
||||
downloader.add_option(
|
||||
'-r', '--limit-rate', '--rate-limit',
|
||||
dest='ratelimit', metavar='RATE',
|
||||
help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
|
||||
help='Maximum download rate in bytes per second, e.g. 50K or 4.2M')
|
||||
downloader.add_option(
|
||||
'--throttled-rate',
|
||||
dest='throttledratelimit', metavar='RATE',
|
||||
help='Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted (e.g. 100K)')
|
||||
help='Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted, e.g. 100K')
|
||||
downloader.add_option(
|
||||
'-R', '--retries',
|
||||
dest='retries', metavar='RETRIES', default=10,
|
||||
@@ -871,8 +871,8 @@ def create_parser():
|
||||
'Time to sleep between retries in seconds (optionally) prefixed by the type of retry '
|
||||
'(http (default), fragment, file_access, extractor) to apply the sleep to. '
|
||||
'EXPR can be a number, linear=START[:END[:STEP=1]] or exp=START[:END[:BASE=2]]. '
|
||||
'This option can be used multiple times to set the sleep for the different retry types. '
|
||||
'Eg: --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20'))
|
||||
'This option can be used multiple times to set the sleep for the different retry types, '
|
||||
'e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20'))
|
||||
downloader.add_option(
|
||||
'--skip-unavailable-fragments', '--no-abort-on-unavailable-fragment',
|
||||
action='store_true', dest='skip_unavailable_fragments', default=True,
|
||||
@@ -892,7 +892,7 @@ def create_parser():
|
||||
downloader.add_option(
|
||||
'--buffer-size',
|
||||
dest='buffersize', metavar='SIZE', default='1024',
|
||||
help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
|
||||
help='Size of download buffer, e.g. 1024 or 16K (default is %default)')
|
||||
downloader.add_option(
|
||||
'--resize-buffer',
|
||||
action='store_false', dest='noresizebuffer',
|
||||
@@ -905,7 +905,7 @@ def create_parser():
|
||||
'--http-chunk-size',
|
||||
dest='http_chunk_size', metavar='SIZE', default=None,
|
||||
help=(
|
||||
'Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
|
||||
'Size of a chunk for chunk-based HTTP downloading, e.g. 10485760 or 10M (default is disabled). '
|
||||
'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)'))
|
||||
downloader.add_option(
|
||||
'--test',
|
||||
@@ -963,8 +963,8 @@ def create_parser():
|
||||
help=(
|
||||
'Download only chapters whose title matches the given regular expression. '
|
||||
'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
|
||||
'Eg: --download-sections "*10:15-15:00" --download-sections "intro". '
|
||||
'Needs ffmpeg. This option can be used multiple times to download multiple sections'))
|
||||
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
|
||||
'e.g. --download-sections "*10:15-15:00" --download-sections "intro"'))
|
||||
downloader.add_option(
|
||||
'--downloader', '--external-downloader',
|
||||
dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
|
||||
@@ -978,7 +978,7 @@ def create_parser():
|
||||
'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. '
|
||||
f'Currently supports native, {", ".join(sorted(list_external_downloaders()))}. '
|
||||
'You can use this option multiple times to set different downloaders for different protocols. '
|
||||
'For example, --downloader aria2c --downloader "dash,m3u8:native" will use '
|
||||
'E.g. --downloader aria2c --downloader "dash,m3u8:native" will use '
|
||||
'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads '
|
||||
'(Alias: --external-downloader)'))
|
||||
downloader.add_option(
|
||||
@@ -1188,7 +1188,7 @@ def create_parser():
|
||||
'Template for progress outputs, optionally prefixed with one of "download:" (default), '
|
||||
'"download-title:" (the console title), "postprocess:", or "postprocess-title:". '
|
||||
'The video\'s fields are accessible under the "info" key and '
|
||||
'the progress attributes are accessible under "progress" key. E.g.: '
|
||||
'the progress attributes are accessible under "progress" key. E.g. '
|
||||
# TODO: Document the fields inside "progress"
|
||||
'--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"'))
|
||||
verbosity.add_option(
|
||||
@@ -1401,14 +1401,15 @@ def create_parser():
|
||||
help='Do not read/dump cookies from/to file (default)')
|
||||
filesystem.add_option(
|
||||
'--cookies-from-browser',
|
||||
dest='cookiesfrombrowser', metavar='BROWSER[+KEYRING][:PROFILE]',
|
||||
dest='cookiesfrombrowser', metavar='BROWSER[+KEYRING][:PROFILE][::CONTAINER]',
|
||||
help=(
|
||||
'The name of the browser and (optionally) the name/path of '
|
||||
'the profile to load cookies from, separated by a ":". '
|
||||
'The name of the browser to load cookies from. '
|
||||
f'Currently supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}. '
|
||||
'By default, the most recently accessed profile is used. '
|
||||
'The keyring used for decrypting Chromium cookies on Linux can be '
|
||||
'(optionally) specified after the browser name separated by a "+". '
|
||||
'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
|
||||
'the name/path of the PROFILE to load cookies from, '
|
||||
'and the CONTAINER name (if Firefox) ("none" for no container) '
|
||||
'can be given with their respective seperators. '
|
||||
'By default, all containers of the most recently accessed profile are used. '
|
||||
f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
|
||||
filesystem.add_option(
|
||||
'--no-cookies-from-browser',
|
||||
@@ -1488,7 +1489,7 @@ def create_parser():
|
||||
'Remux the video into another container if necessary '
|
||||
f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). '
|
||||
'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; '
|
||||
'Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv'))
|
||||
'e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv'))
|
||||
postproc.add_option(
|
||||
'--recode-video',
|
||||
metavar='FORMAT', dest='recodevideo', default=None,
|
||||
@@ -1513,7 +1514,7 @@ def create_parser():
|
||||
'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable '
|
||||
'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, '
|
||||
'"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument '
|
||||
'before the specified input/output file. Eg: --ppa "Merger+ffmpeg_i1:-v quiet". '
|
||||
'before the specified input/output file, e.g. --ppa "Merger+ffmpeg_i1:-v quiet". '
|
||||
'You can use this option multiple times to give different arguments to different '
|
||||
'postprocessors. (Alias: --ppa)'))
|
||||
postproc.add_option(
|
||||
@@ -1729,7 +1730,7 @@ def create_parser():
|
||||
'SponsorBlock categories to create chapters for, separated by commas. '
|
||||
f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). '
|
||||
'You can prefix the category with a "-" to exclude it. See [1] for description of the categories. '
|
||||
'Eg: --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
|
||||
'E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
|
||||
sponsorblock.add_option(
|
||||
'--sponsorblock-remove', metavar='CATS',
|
||||
dest='sponsorblock_remove', default=set(), action='callback', type='str',
|
||||
@@ -1866,7 +1867,6 @@ def create_parser():
|
||||
|
||||
|
||||
def _hide_login_info(opts):
|
||||
write_string(
|
||||
'DeprecationWarning: "yt_dlp.options._hide_login_info" is deprecated and may be removed in a future version. '
|
||||
'Use "yt_dlp.utils.Config.hide_login_info" instead\n')
|
||||
deprecation_warning(f'"{__name__}._hide_login_info" is deprecated and may be removed '
|
||||
'in a future version. Use "yt_dlp.utils.Config.hide_login_info" instead')
|
||||
return Config.hide_login_info(opts)
|
||||
|
||||
@@ -7,10 +7,10 @@ from ..utils import (
|
||||
PostProcessingError,
|
||||
RetryManager,
|
||||
_configuration_args,
|
||||
deprecation_warning,
|
||||
encodeFilename,
|
||||
network_exceptions,
|
||||
sanitized_Request,
|
||||
write_string,
|
||||
)
|
||||
|
||||
|
||||
@@ -73,10 +73,14 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
|
||||
if self._downloader:
|
||||
return self._downloader.report_warning(text, *args, **kwargs)
|
||||
|
||||
def deprecation_warning(self, text):
|
||||
def deprecation_warning(self, msg):
|
||||
warn = getattr(self._downloader, 'deprecation_warning', deprecation_warning)
|
||||
return warn(msg, stacklevel=1)
|
||||
|
||||
def deprecated_feature(self, msg):
|
||||
if self._downloader:
|
||||
return self._downloader.deprecation_warning(text)
|
||||
write_string(f'DeprecationWarning: {text}')
|
||||
return self._downloader.deprecated_feature(msg)
|
||||
return deprecation_warning(msg, stacklevel=1)
|
||||
|
||||
def report_error(self, text, *args, **kwargs):
|
||||
self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. '
|
||||
|
||||
@@ -139,7 +139,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
if not success:
|
||||
success = True
|
||||
atomicparsley = next((
|
||||
x for x in ['AtomicParsley', 'atomicparsley']
|
||||
# libatomicparsley.so : See https://github.com/xibr/ytdlp-lazy/issues/1
|
||||
x for x in ['AtomicParsley', 'atomicparsley', 'libatomicparsley.so']
|
||||
if check_executable(x, ['-v'])), None)
|
||||
if atomicparsley is None:
|
||||
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
|
||||
|
||||
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
Popen,
|
||||
PostProcessingError,
|
||||
_get_exe_version_output,
|
||||
deprecation_warning,
|
||||
detect_exe_version,
|
||||
determine_ext,
|
||||
dfxp2srt,
|
||||
@@ -30,7 +31,6 @@ from ..utils import (
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_json_file,
|
||||
write_string,
|
||||
)
|
||||
|
||||
EXT_TO_OUT_FORMATS = {
|
||||
@@ -109,18 +109,24 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
return {p: p for p in programs}
|
||||
|
||||
if not os.path.exists(location):
|
||||
self.report_warning(f'ffmpeg-location {location} does not exist! Continuing without ffmpeg')
|
||||
self.report_warning(
|
||||
f'ffmpeg-location {location} does not exist! Continuing without ffmpeg', only_once=True)
|
||||
return {}
|
||||
elif os.path.isdir(location):
|
||||
dirname, basename = location, None
|
||||
dirname, basename, filename = location, None, None
|
||||
else:
|
||||
basename = os.path.splitext(os.path.basename(location))[0]
|
||||
basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
|
||||
filename = os.path.basename(location)
|
||||
basename = next((p for p in programs if p in filename), 'ffmpeg')
|
||||
dirname = os.path.dirname(os.path.abspath(location))
|
||||
if basename in self._ffmpeg_to_avconv.keys():
|
||||
self._prefer_ffmpeg = True
|
||||
|
||||
paths = {p: os.path.join(dirname, p) for p in programs}
|
||||
if basename and basename in filename:
|
||||
for p in programs:
|
||||
path = os.path.join(dirname, filename.replace(basename, p))
|
||||
if os.path.exists(path):
|
||||
paths[p] = path
|
||||
if basename:
|
||||
paths[basename] = location
|
||||
return paths
|
||||
@@ -171,9 +177,9 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
return self.probe_basename
|
||||
|
||||
def _get_version(self, kind):
|
||||
executables = (kind, self._ffmpeg_to_avconv[kind])
|
||||
executables = (kind, )
|
||||
if not self._prefer_ffmpeg:
|
||||
executables = reversed(executables)
|
||||
executables = (kind, self._ffmpeg_to_avconv[kind])
|
||||
basename, version, features = next(filter(
|
||||
lambda x: x[1], ((p, *self._get_ffmpeg_version(p)) for p in executables)), (None, None, {}))
|
||||
if kind == 'ffmpeg':
|
||||
@@ -181,8 +187,8 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
else:
|
||||
self.probe_basename = basename
|
||||
if basename == self._ffmpeg_to_avconv[kind]:
|
||||
self.deprecation_warning(
|
||||
f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and may be removed in a future version. Use {kind} instead')
|
||||
self.deprecated_feature(f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and '
|
||||
f'may be removed in a future version. Use {kind} instead')
|
||||
return version
|
||||
|
||||
@functools.cached_property
|
||||
@@ -1058,7 +1064,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
|
||||
|
||||
@classmethod
|
||||
def is_webp(cls, path):
|
||||
write_string(f'DeprecationWarning: {cls.__module__}.{cls.__name__}.is_webp is deprecated')
|
||||
deprecation_warning(f'{cls.__module__}.{cls.__name__}.is_webp is deprecated')
|
||||
return imghdr.what(path) == 'webp'
|
||||
|
||||
def fixup_webp(self, info, idx=-1):
|
||||
@@ -1099,6 +1105,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
|
||||
continue
|
||||
has_thumbnail = True
|
||||
self.fixup_webp(info, idx)
|
||||
original_thumbnail = thumbnail_dict['filepath'] # Path can change during fixup
|
||||
thumbnail_ext = os.path.splitext(original_thumbnail)[1][1:].lower()
|
||||
if thumbnail_ext == 'jpeg':
|
||||
thumbnail_ext = 'jpg'
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import atexit
|
||||
import contextlib
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
@@ -9,10 +10,11 @@ import sys
|
||||
from zipimport import zipimporter
|
||||
|
||||
from .compat import functools # isort: split
|
||||
from .compat import compat_realpath
|
||||
from .compat import compat_realpath, compat_shlex_quote
|
||||
from .utils import (
|
||||
Popen,
|
||||
cached_method,
|
||||
deprecation_warning,
|
||||
shell_quote,
|
||||
system_identifier,
|
||||
traverse_obj,
|
||||
@@ -50,6 +52,19 @@ def detect_variant():
|
||||
return VARIANT or _get_variant_and_executable_path()[0]
|
||||
|
||||
|
||||
@functools.cache
|
||||
def current_git_head():
|
||||
if detect_variant() != 'source':
|
||||
return
|
||||
with contextlib.suppress(Exception):
|
||||
stdout, _, _ = Popen.run(
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if re.fullmatch('[0-9a-f]+', stdout.strip()):
|
||||
return stdout.strip()
|
||||
|
||||
|
||||
_FILE_SUFFIXES = {
|
||||
'zip': '',
|
||||
'py2exe': '_min.exe',
|
||||
@@ -229,24 +244,33 @@ class Updater:
|
||||
except OSError:
|
||||
return self._report_permission_error(new_filename)
|
||||
|
||||
try:
|
||||
if old_filename:
|
||||
if old_filename:
|
||||
mask = os.stat(self.filename).st_mode
|
||||
try:
|
||||
os.rename(self.filename, old_filename)
|
||||
except OSError:
|
||||
return self._report_error('Unable to move current version')
|
||||
try:
|
||||
if old_filename:
|
||||
os.rename(new_filename, self.filename)
|
||||
except OSError:
|
||||
self._report_error('Unable to overwrite current version')
|
||||
return os.rename(old_filename, self.filename)
|
||||
except OSError:
|
||||
return self._report_error('Unable to move current version')
|
||||
|
||||
if detect_variant() not in ('win32_exe', 'py2exe'):
|
||||
if old_filename:
|
||||
os.remove(old_filename)
|
||||
else:
|
||||
try:
|
||||
os.rename(new_filename, self.filename)
|
||||
except OSError:
|
||||
self._report_error('Unable to overwrite current version')
|
||||
return os.rename(old_filename, self.filename)
|
||||
|
||||
if detect_variant() in ('win32_exe', 'py2exe'):
|
||||
atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"',
|
||||
shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
elif old_filename:
|
||||
try:
|
||||
os.remove(old_filename)
|
||||
except OSError:
|
||||
self._report_error('Unable to remove the old version')
|
||||
|
||||
try:
|
||||
os.chmod(self.filename, mask)
|
||||
except OSError:
|
||||
return self._report_error(
|
||||
f'Unable to set permissions. Run: sudo chmod a+rx {compat_shlex_quote(self.filename)}')
|
||||
|
||||
self.ydl.to_screen(f'Updated yt-dlp to version {self.new_version}')
|
||||
return True
|
||||
@@ -279,11 +303,8 @@ def run_update(ydl):
|
||||
def update_self(to_screen, verbose, opener):
|
||||
import traceback
|
||||
|
||||
from .utils import write_string
|
||||
|
||||
write_string(
|
||||
'DeprecationWarning: "yt_dlp.update.update_self" is deprecated and may be removed in a future version. '
|
||||
'Use "yt_dlp.update.run_update(ydl)" instead\n')
|
||||
deprecation_warning(f'"{__name__}.update_self" is deprecated and may be removed '
|
||||
f'in a future version. Use "{__name__}.run_update(ydl)" instead')
|
||||
|
||||
printfn = to_screen
|
||||
|
||||
|
||||
140
yt_dlp/utils.py
140
yt_dlp/utils.py
@@ -150,6 +150,16 @@ MONTH_NAMES = {
|
||||
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
|
||||
}
|
||||
|
||||
# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
|
||||
TIMEZONE_NAMES = {
|
||||
'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
|
||||
'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
|
||||
'EST': -5, 'EDT': -4, # Eastern
|
||||
'CST': -6, 'CDT': -5, # Central
|
||||
'MST': -7, 'MDT': -6, # Mountain
|
||||
'PST': -8, 'PDT': -7 # Pacific
|
||||
}
|
||||
|
||||
# needed for sanitizing filenames in restricted mode
|
||||
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
|
||||
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
|
||||
@@ -600,7 +610,7 @@ def sanitize_open(filename, open_mode):
|
||||
if sys.platform == 'win32':
|
||||
import msvcrt
|
||||
|
||||
# stdout may be any IO stream. Eg, when using contextlib.redirect_stdout
|
||||
# stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
|
||||
with contextlib.suppress(io.UnsupportedOperation):
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
|
||||
@@ -776,8 +786,8 @@ def _htmlentity_transform(entity_with_semicolon):
|
||||
if entity in html.entities.name2codepoint:
|
||||
return chr(html.entities.name2codepoint[entity])
|
||||
|
||||
# TODO: HTML5 allows entities without a semicolon. For example,
|
||||
# 'Éric' should be decoded as 'Éric'.
|
||||
# TODO: HTML5 allows entities without a semicolon.
|
||||
# E.g. 'Éric' should be decoded as 'Éric'.
|
||||
if entity_with_semicolon in html.entities.html5:
|
||||
return html.entities.html5[entity_with_semicolon]
|
||||
|
||||
@@ -818,8 +828,8 @@ def escapeHTML(text):
|
||||
|
||||
|
||||
def process_communicate_or_kill(p, *args, **kwargs):
|
||||
write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
|
||||
'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
|
||||
deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
|
||||
f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
|
||||
return Popen.communicate_or_kill(p, *args, **kwargs)
|
||||
|
||||
|
||||
@@ -830,12 +840,35 @@ class Popen(subprocess.Popen):
|
||||
else:
|
||||
_startupinfo = None
|
||||
|
||||
def __init__(self, *args, text=False, **kwargs):
|
||||
@staticmethod
|
||||
def _fix_pyinstaller_ld_path(env):
|
||||
"""Restore LD_LIBRARY_PATH when using PyInstaller
|
||||
Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
|
||||
https://github.com/yt-dlp/yt-dlp/issues/4573
|
||||
"""
|
||||
if not hasattr(sys, '_MEIPASS'):
|
||||
return
|
||||
|
||||
def _fix(key):
|
||||
orig = env.get(f'{key}_ORIG')
|
||||
if orig is None:
|
||||
env.pop(key, None)
|
||||
else:
|
||||
env[key] = orig
|
||||
|
||||
_fix('LD_LIBRARY_PATH') # Linux
|
||||
_fix('DYLD_LIBRARY_PATH') # macOS
|
||||
|
||||
def __init__(self, *args, env=None, text=False, **kwargs):
|
||||
if env is None:
|
||||
env = os.environ.copy()
|
||||
self._fix_pyinstaller_ld_path(env)
|
||||
|
||||
if text is True:
|
||||
kwargs['universal_newlines'] = True # For 3.6 compatibility
|
||||
kwargs.setdefault('encoding', 'utf-8')
|
||||
kwargs.setdefault('errors', 'replace')
|
||||
super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
|
||||
super().__init__(*args, env=env, **kwargs, startupinfo=self._startupinfo)
|
||||
|
||||
def communicate_or_kill(self, *args, **kwargs):
|
||||
try:
|
||||
@@ -850,9 +883,9 @@ class Popen(subprocess.Popen):
|
||||
self.wait(timeout=timeout)
|
||||
|
||||
@classmethod
|
||||
def run(cls, *args, **kwargs):
|
||||
def run(cls, *args, timeout=None, **kwargs):
|
||||
with cls(*args, **kwargs) as proc:
|
||||
stdout, stderr = proc.communicate_or_kill()
|
||||
stdout, stderr = proc.communicate_or_kill(timeout=timeout)
|
||||
return stdout or '', stderr or '', proc.returncode
|
||||
|
||||
|
||||
@@ -1684,7 +1717,11 @@ def extract_timezone(date_str):
|
||||
$)
|
||||
''', date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
||||
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
|
||||
if timezone is not None:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
timezone = datetime.timedelta(hours=timezone or 0)
|
||||
else:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
if not m.group('sign'):
|
||||
@@ -1746,7 +1783,8 @@ def unified_timestamp(date_str, day_first=True):
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
date_str = re.sub(r'[,|]', '', date_str)
|
||||
date_str = re.sub(r'\s+', ' ', re.sub(
|
||||
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
|
||||
|
||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||
timezone, date_str = extract_timezone(date_str)
|
||||
@@ -1768,9 +1806,10 @@ def unified_timestamp(date_str, day_first=True):
|
||||
with contextlib.suppress(ValueError):
|
||||
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
|
||||
return calendar.timegm(dt.timetuple())
|
||||
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
return calendar.timegm(timetuple) + pm_delta * 3600
|
||||
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
||||
|
||||
|
||||
def determine_ext(url, default_ext='unknown_video'):
|
||||
@@ -1918,7 +1957,7 @@ class DateRange:
|
||||
|
||||
def platform_name():
|
||||
""" Returns the platform name as a str """
|
||||
write_string('DeprecationWarning: yt_dlp.utils.platform_name is deprecated, use platform.platform instead')
|
||||
deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
|
||||
return platform.platform()
|
||||
|
||||
|
||||
@@ -1964,6 +2003,23 @@ def write_string(s, out=None, encoding=None):
|
||||
out.flush()
|
||||
|
||||
|
||||
def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
|
||||
from . import _IN_CLI
|
||||
if _IN_CLI:
|
||||
if msg in deprecation_warning._cache:
|
||||
return
|
||||
deprecation_warning._cache.add(msg)
|
||||
if printer:
|
||||
return printer(f'{msg}{bug_reports_message()}', **kwargs)
|
||||
return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
|
||||
else:
|
||||
import warnings
|
||||
warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
|
||||
|
||||
|
||||
deprecation_warning._cache = set()
|
||||
|
||||
|
||||
def bytes_to_intlist(bs):
|
||||
if not bs:
|
||||
return []
|
||||
@@ -3199,7 +3255,7 @@ def strip_jsonp(code):
|
||||
r'\g<callback_data>', code)
|
||||
|
||||
|
||||
def js_to_json(code, vars={}):
|
||||
def js_to_json(code, vars={}, *, strict=False):
|
||||
# vars is a dict of var, val pairs to substitute
|
||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
||||
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
||||
@@ -3233,14 +3289,17 @@ def js_to_json(code, vars={}):
|
||||
|
||||
if v in vars:
|
||||
return vars[v]
|
||||
if strict:
|
||||
raise ValueError(f'Unknown value: {v}')
|
||||
|
||||
return '"%s"' % v
|
||||
|
||||
def create_map(mobj):
|
||||
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
|
||||
|
||||
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
||||
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
|
||||
if not strict:
|
||||
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
||||
|
||||
return re.sub(r'''(?sx)
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
||||
@@ -3482,8 +3541,8 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
|
||||
},
|
||||
}
|
||||
|
||||
sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', ''))
|
||||
vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0])
|
||||
sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
|
||||
vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
|
||||
|
||||
for ext in preferences or COMPATIBLE_CODECS.keys():
|
||||
codec_set = COMPATIBLE_CODECS.get(ext, set())
|
||||
@@ -4843,8 +4902,8 @@ def decode_base_n(string, n=None, table=None):
|
||||
|
||||
|
||||
def decode_base(value, digits):
|
||||
write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated '
|
||||
'and may be removed in a future version. Use yt_dlp.decode_base_n instead')
|
||||
deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
|
||||
f'in a future version. Use {__name__}.decode_base_n instead')
|
||||
return decode_base_n(value, table=digits)
|
||||
|
||||
|
||||
@@ -5313,8 +5372,8 @@ def traverse_obj(
|
||||
|
||||
|
||||
def traverse_dict(dictn, keys, casesense=True):
|
||||
write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
|
||||
'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
|
||||
deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
|
||||
f'in a future version. Use "{__name__}.traverse_obj" instead')
|
||||
return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
|
||||
|
||||
|
||||
@@ -5745,7 +5804,7 @@ class RetryManager:
|
||||
if not count:
|
||||
return warn(e)
|
||||
elif isinstance(e, ExtractorError):
|
||||
e = remove_end(str(e.cause) or e.orig_msg, '.')
|
||||
e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
|
||||
warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
|
||||
|
||||
delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
|
||||
@@ -5759,6 +5818,43 @@ def make_archive_id(ie, video_id):
|
||||
return f'{ie_key.lower()} {video_id}'
|
||||
|
||||
|
||||
def truncate_string(s, left, right=0):
|
||||
assert left > 3 and right >= 0
|
||||
if s is None or len(s) <= left + right:
|
||||
return s
|
||||
return f'{s[:left-3]}...{s[-right:]}'
|
||||
|
||||
|
||||
def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
|
||||
assert 'all' in alias_dict, '"all" alias is required'
|
||||
requested = list(start or [])
|
||||
for val in options:
|
||||
discard = val.startswith('-')
|
||||
if discard:
|
||||
val = val[1:]
|
||||
|
||||
if val in alias_dict:
|
||||
val = alias_dict[val] if not discard else [
|
||||
i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
|
||||
# NB: Do not allow regex in aliases for performance
|
||||
requested = orderedSet_from_options(val, alias_dict, start=requested)
|
||||
continue
|
||||
|
||||
current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
|
||||
else [val] if val in alias_dict['all'] else None)
|
||||
if current is None:
|
||||
raise ValueError(val)
|
||||
|
||||
if discard:
|
||||
for item in current:
|
||||
while item in requested:
|
||||
requested.remove(item)
|
||||
else:
|
||||
requested.extend(current)
|
||||
|
||||
return orderedSet(requested)
|
||||
|
||||
|
||||
# Deprecated
|
||||
has_certifi = bool(certifi)
|
||||
has_websockets = bool(websockets)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2022.08.08'
|
||||
__version__ = '2022.09.01'
|
||||
|
||||
RELEASE_GIT_HEAD = '3157158f7'
|
||||
RELEASE_GIT_HEAD = '5d7c7d656'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user